Spaces:

JaneDing2025
/

IconEval

Sleeping

App Files Files Community

hailey2024 commited on Oct 23

Commit

309ceaa

1 Parent(s): 126eac4

init src files

Browse files

Files changed (6) hide show

README copy.md +14 -0
app.py +33 -0
src/allocation.py +917 -0
src/database.py +146 -0
src/eval_server.py +815 -0
src/instuction_md.py +40 -0

README copy.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Regrounding
+emoji: 🏃
+colorFrom: yellow
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.49.1
+app_file: app.py
+pinned: false
+license: cc-by-nc-nd-4.0
+short_description: Icon evaluation interface.
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""App for the IconEval interface."""
+import os
+from argparse import Namespace
+from src.eval_server import app_main
+REPO_ROOT = os.path.dirname(os.path.abspath(__file__))
+DB_PATH = os.path.join(REPO_ROOT, 'eval_round_r1.db')
+IMAGE_TEMPLATE = os.path.join(REPO_ROOT, 'images', '{label}', '{path}')
+REDUNDANCY = int(os.environ['REDUNDANCY'])
+ALLOWED_USERS_STR = os.environ['ALLOW_USERS']
+ALLOWED_USERS = [u.strip() for u in ALLOWED_USERS_STR.split(',') if u.strip()]
+BASIC_USER = os.environ['BASIC_USER']
+BASIC_PASS = os.environ['BASIC_PASS']
+ROUND_ID = 'r1'
+AUTO_ALLO_NUM = int(os.environ['AUTO_ALLO_NUM'])
+AUTO_COMMIT = int(os.environ.get('AUTO_COMMIT', '10'))
+args = Namespace(
+    db_path=DB_PATH,
+    round_id=ROUND_ID,
+    redundancy=REDUNDANCY,
+    image_template=IMAGE_TEMPLATE,
+    allowed_users=ALLOWED_USERS,
+    auto_allo_num=AUTO_ALLO_NUM,
+    auto_commit=AUTO_COMMIT,
+    basic_user=BASIC_USER,
+    basic_pass=BASIC_PASS,
+)
+app_main(args)

src/allocation.py ADDED Viewed

	@@ -0,0 +1,917 @@

+"""Task allocation engine for dynamic workload management."""
+import hashlib
+import json
+from datetime import datetime, timezone
+from typing import Any
+from database import DB
+class AllocationValidator:
+    """Input validator for allocation operations."""
+    @staticmethod
+    def parse_amount_spec(spec: str) -> dict[str, Any]:
+        """Parse amount specification.
+        Examples:
+            'count:100' -> {'type': 'count', 'value': 100}
+            'ratio:0.5' -> {'type': 'ratio', 'value': 0.5}
+            'equal' -> {'type': 'equal'}
+            'ratio:A:0.3,B:0.7' -> {'type': 'ratio_per_user', 'users': {'A': 0.3, 'B': 0.7}}
+            'all' -> {'type': 'all'}
+        """
+        if spec == 'equal':
+            return {'type': 'equal'}
+        if spec == 'all':
+            return {'type': 'all'}
+        if spec.startswith('count:'):
+            count = int(spec[6:])
+            if count <= 0:
+                raise ValueError(f'Count must be positive, got: {count}')
+            return {'type': 'count', 'value': count}
+        if spec.startswith('ratio:'):
+            rest = spec[6:]
+            # Check if per-user ratio
+            if ':' in rest:
+                # Format: ratio:A:0.3,B:0.3,C:0.4
+                user_ratios = {}
+                for part in rest.split(','):
+                    user, ratio_str = part.split(':')
+                    ratio = float(ratio_str)
+                    if ratio < 0 or ratio > 1:
+                        raise ValueError(f'Ratio must be in [0, 1], got: {ratio} for user {user}')
+                    user_ratios[user.strip()] = ratio
+                total = sum(user_ratios.values())
+                if abs(total - 1.0) > 0.001:
+                    raise ValueError(f'User ratios must sum to 1.0, got: {total}')
+                return {'type': 'ratio_per_user', 'users': user_ratios}
+            else:
+                # Single ratio
+                ratio = float(rest)
+                if ratio <= 0 or ratio > 1:
+                    raise ValueError(f'Ratio must be in (0, 1], got: {ratio}')
+                return {'type': 'ratio', 'value': ratio}
+        raise ValueError(f'Invalid amount spec: {spec}')
+    @staticmethod
+    def parse_target(target: str) -> dict[str, Any]:
+        """Parse target specification.
+        Examples:
+            'user:A' -> {'type': 'single', 'user': 'A'}
+            'users:A,B,C' -> {'type': 'multiple', 'users': ['A', 'B', 'C']}
+        """
+        if target.startswith('user:'):
+            user = target[5:].strip()
+            if not user:
+                raise ValueError('User ID cannot be empty')
+            return {'type': 'single', 'user': user}
+        if target.startswith('users:'):
+            users = [u.strip() for u in target[6:].split(',')]
+            users = list(dict.fromkeys(users))  # Remove duplicates, preserve order
+            if not users:
+                raise ValueError('User list cannot be empty')
+            if len(users) != len(target[6:].split(',')):
+                print('Warning: Duplicate users removed from target')
+            return {'type': 'multiple', 'users': users}
+        raise ValueError(f'Invalid target: {target}')
+    @staticmethod
+    def validate_no_self_overlap(from_users: list[str], to_users: list[str]) -> list[str]:
+        """Remove self-overlapping users and return filtered list."""
+        overlap = set(from_users) & set(to_users)
+        if overlap:
+            print(f'Warning: Removing self-overlapping users: {overlap}')
+            return [u for u in to_users if u not in overlap]
+        return to_users
+    @staticmethod
+    def check_users_exist(db: DB, round_id: str, users: list[str]) -> None:
+        """Check if users exist in this round (warn for new users)."""
+        with db._connect() as conn:
+            # Check if assignments table exists
+            cur = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='assignments'")
+            if not cur.fetchone():
+                return  # New schema not initialized, skip check
+            for user in users:
+                cur = conn.execute(
+                    'SELECT COUNT(*) FROM assignments WHERE round_id = ? AND user_id = ?', (round_id, user)
+                )
+                count = cur.fetchone()[0]
+                if count == 0:
+                    print(f"Info: User '{user}' is new to this round")
+class AllocationEngine:
+    """Task allocation engine for managing workload distribution.
+    Methods:
+    - allocate(from_source, to_target, amount_spec, redundancy, force, dry_run, reason)
+    - release(from_user, amount_spec, dry_run, reason)
+    - rebalance(users, mode_spec, dry_run, reason)
+    """
+    def __init__(
+        self,
+        db: DB,
+        round_id: str,
+        operator: str = 'admin',
+    ):
+        """Initialize allocation engine.
+        Args:
+            db: Database instance
+            round_id: Round ID
+            operator: Operator name for audit logging
+        """
+        self.db = db
+        self.round_id = round_id
+        self.operator = operator
+    def allocate(
+        self,
+        from_source: str,
+        to_target: str,
+        amount_spec: str,
+        redundancy: int,
+        force: bool = False,
+        dry_run: bool = False,
+        reason: str = '',
+    ) -> dict[str, Any]:
+        """Core allocation function.
+        Redundancy design:
+        - Each task needs N different annotators (redundancy parameter)
+        - Each user can only annotate a task ONCE (enforced by PRIMARY KEY)
+        - If a user already has a task, it cannot be assigned to them again
+        - This may reduce the actual number of tasks allocated if users already
+          have some of the source tasks
+        Args:
+            from_source: Source of tasks ('unassigned', 'user:A')
+            to_target: Target users ('user:A', 'users:A,B,C')
+            amount_spec: Amount specification ('count:100', 'ratio:0.5', 'equal')
+            redundancy: Number of different people who should label each task
+            force: Allow transferring in_progress tasks (default: False)
+            dry_run: Preview only, don't execute (default: False)
+            reason: Reason for this allocation (for audit)
+        Returns:
+            {
+                'success': bool,
+                'affected_users': list[str],
+                'affected_count': int,
+                'distribution': dict[str, int],  # user -> count
+                'item_ids_sample': list[str],
+                'error': str  # only if success=False
+            }
+        """
+        # 1. Validate inputs
+        validator = AllocationValidator()
+        amount_parsed = validator.parse_amount_spec(amount_spec)
+        target_parsed = validator.parse_target(to_target)
+        # 2. Get target users
+        target_users = self._get_target_users(target_parsed)
+        if not target_users:
+            return {'success': False, 'error': 'No target users specified', 'affected_count': 0}
+        validator.check_users_exist(self.db, self.round_id, target_users)
+        # 3. Get available source tasks for EACH user (filtered by redundancy at SQL level)
+        user_available_tasks = self._get_available_source_items(from_source, target_users, redundancy, force)
+        # Check if any user has available tasks
+        total_available = sum(len(tasks) for tasks in user_available_tasks.values())
+        if total_available == 0:
+            return {
+                'success': False,
+                'error': f'No available tasks from source {from_source} for target users '
+                '(tasks may already be assigned or at redundancy limit)',
+                'affected_count': 0,
+            }
+        # 4. Calculate distribution considering each user's available tasks
+        distribution = self._calculate_distribution_per_user(user_available_tasks, amount_parsed)
+        # 6. Execute or preview
+        if dry_run:
+            result = self._preview_allocation(distribution)
+        else:
+            result = self._execute_allocation(from_source, to_target, amount_spec, distribution, force, reason)
+        return result
+    def release(self, from_user: str, amount_spec: str, dry_run: bool = False, reason: str = '') -> dict[str, Any]:
+        """Release tasks from a user back to unassigned pool.
+        Args:
+            from_user: User ID to release tasks from
+            amount_spec: Amount to release ('count:50', 'ratio:0.3', 'all')
+            dry_run: Preview only
+            reason: Reason for release
+        Returns:
+            Result dictionary with success status
+        """
+        validator = AllocationValidator()
+        amount_parsed = validator.parse_amount_spec(amount_spec)
+        # Get user's pending tasks
+        with self.db._connect() as conn:
+            cur = conn.execute(
+                """
+                SELECT item_id
+                FROM assignments
+                WHERE round_id = ? AND user_id = ? AND status = 'pending'
+                ORDER BY assigned_at
+                """,
+                (self.round_id, from_user),
+            )
+            pending_items = [row[0] for row in cur.fetchall()]
+        if not pending_items:
+            return {'success': False, 'error': 'No pending tasks to release'}
+        # Calculate how many to release
+        if amount_parsed['type'] == 'count':
+            to_release = pending_items[: min(amount_parsed['value'], len(pending_items))]
+        elif amount_parsed['type'] == 'ratio':
+            count = int(len(pending_items) * amount_parsed['value'])
+            to_release = pending_items[:count]
+        elif amount_parsed['type'] == 'all':
+            to_release = pending_items
+        else:
+            return {'success': False, 'error': f'Invalid amount_spec for release: {amount_spec}'}
+        # Preview release
+        if dry_run:
+            return {
+                'success': True,
+                'dry_run': True,
+                'affected_users': [from_user],
+                'affected_count': len(to_release),
+                'item_ids_sample': to_release[:10],
+            }
+        # Execute release
+        now = datetime.now(timezone.utc).isoformat()
+        with self.db._connect() as conn, self.db._lock:
+            placeholders = ','.join('?' * len(to_release))
+            conn.execute(
+                f"""
+                DELETE FROM assignments
+                WHERE round_id = ? AND user_id = ? AND item_id IN ({placeholders})
+                """,
+                (self.round_id, from_user, *to_release),
+            )
+            # Record log
+            conn.execute(
+                """
+                INSERT INTO allocation_history
+                (round_id, operation, operator, from_source, to_target, amount_spec,
+                 affected_users, affected_count, item_ids_sample, item_ids_hash,
+                 dry_run, force, reason, created_at)
+                VALUES (?, 'release', ?, ?, 'pool', ?, ?, ?, ?, ?, 0, 0, ?, ?)
+                """,
+                (
+                    self.round_id,
+                    self.operator,
+                    f'user:{from_user}',
+                    amount_spec,
+                    json.dumps([from_user]),
+                    len(to_release),
+                    json.dumps(to_release[:10]),
+                    self._compute_item_hash(to_release),
+                    reason,
+                    now,
+                ),
+            )
+        return {
+            'success': True,
+            'affected_users': [from_user],
+            'affected_count': len(to_release),
+            'item_ids_sample': to_release[:10],
+        }
+    def rebalance(self, users: list[str], mode_spec: str, dry_run: bool = False, reason: str = '') -> dict[str, Any]:
+        """Rebalance pending tasks among selected users.
+        Args:
+            users: List of user IDs to rebalance among
+            mode_spec: Rebalance mode ('equal', 'ratio:A:0.3,B:0.7')
+            dry_run: Preview only
+            reason: Reason for rebalance
+        Returns:
+            Result dictionary with success status
+        """
+        # 1. Collect all unique pending tasks from these users (avoid duplicates for redundancy > 1)
+        with self.db._connect() as conn:
+            placeholders_users = ','.join('?' * len(users))
+            cur = conn.execute(
+                f"""
+                SELECT DISTINCT item_id
+                FROM assignments
+                WHERE round_id = ? AND user_id IN ({placeholders_users}) AND status = 'pending'
+                """,
+                (self.round_id, *users),
+            )
+            all_pending = [row[0] for row in cur.fetchall()]
+        if not all_pending:
+            return {'success': False, 'error': 'No pending tasks to rebalance'}
+        # 2. Check for redundancy > 1 tasks and collect their current assignment counts
+        task_redundancy_info = {}
+        with self.db._connect() as conn:
+            for item_id in all_pending:
+                # Get redundancy config
+                cur = conn.execute(
+                    """
+                    SELECT redundancy_required
+                    FROM task_config
+                    WHERE round_id = ? AND item_id = ?
+                    """,
+                    (self.round_id, item_id),
+                )
+                row = cur.fetchone()
+                redundancy_required = row[0] if row else 1
+                # Count current assignments among rebalance users
+                placeholders = ','.join('?' * len(users))
+                cur = conn.execute(
+                    f"""
+                    SELECT COUNT(*)
+                    FROM assignments
+                    WHERE round_id = ? AND item_id = ? AND user_id IN ({placeholders}) AND status = 'pending'
+                    """,
+                    (self.round_id, item_id, *users),
+                )
+                current_count = cur.fetchone()[0]
+                task_redundancy_info[item_id] = {
+                    'redundancy_required': redundancy_required,
+                    'current_count': current_count,
+                }
+        # 3. Parse distribution mode
+        validator = AllocationValidator()
+        amount_parsed = validator.parse_amount_spec(mode_spec)
+        # 4. Warn if there are redundancy > 1 tasks
+        redundancy_gt1_tasks = [
+            item_id for item_id, info in task_redundancy_info.items() if info['redundancy_required'] > 1
+        ]
+        if redundancy_gt1_tasks:
+            print(
+                f'Warning: {len(redundancy_gt1_tasks)} tasks have redundancy > 1. '
+                'Rebalance will redistribute unique tasks (may change redundancy counts). '
+                f'Consider using allocate/release for fine control. Sample: {redundancy_gt1_tasks[:3]}'
+            )
+        # 5. Build per-user available pools from all_pending (exclude items the user already has)
+        #    We do NOT filter by redundancy limit here because we are redistributing
+        #    the same set of tasks within the same user group; distinct-user counts
+        #    will remain within limits. We only prevent giving a user a task they
+        #    already have (any status).
+        user_available_tasks: dict[str, list[str]] = {}
+        with self.db._connect() as conn:
+            for user in users:
+                if not all_pending:
+                    user_available_tasks[user] = []
+                    continue
+                # exclude items that the user already has
+                placeholders = ','.join('?' * len(all_pending))
+                cur = conn.execute(
+                    f"""
+                    SELECT x.item_id
+                    FROM (
+                        SELECT ? AS round_id, ? AS user_id
+                    ) p
+                    JOIN (
+                        SELECT item_id FROM (
+                            VALUES {','.join(['(?)'] * len(all_pending))}
+                        ) AS v(item_id)
+                    ) AS x ON 1=1
+                    WHERE NOT EXISTS (
+                        SELECT 1
+                        FROM assignments a
+                        WHERE a.round_id = p.round_id
+                          AND a.item_id = x.item_id
+                          AND a.user_id = p.user_id
+                    )
+                    """,
+                    (self.round_id, user, *all_pending),
+                )
+                user_available_tasks[user] = [row[0] for row in cur.fetchall()]
+        # 6. Calculate new distribution using per-user pools
+        distribution = self._calculate_distribution_per_user(user_available_tasks, amount_parsed)
+        # Preview rebalance
+        if dry_run:
+            return {
+                'success': True,
+                'dry_run': True,
+                'affected_users': users,
+                'affected_count': len(all_pending),
+                'distribution': {u: len(items) for u, items in distribution.items()},
+                'item_ids_sample': all_pending[:10],
+            }
+        # 7. Execute rebalance (delete old assignments, insert new ones)
+        now = datetime.now(timezone.utc).isoformat()
+        with self.db._connect() as conn, self.db._lock:
+            conn.execute('BEGIN IMMEDIATE')
+            try:
+                # Delete all pending tasks from these users
+                placeholders_users = ','.join('?' * len(users))
+                conn.execute(
+                    f"""
+                    DELETE FROM assignments
+                    WHERE round_id = ? AND user_id IN ({placeholders_users}) AND status = 'pending'
+                    """,
+                    (self.round_id, *users),
+                )
+                # Insert new assignments
+                for user, item_ids in distribution.items():
+                    for item_id in item_ids:
+                        # Check if this user already has this task (avoid PRIMARY KEY violation)
+                        # Each user can only have ONE assignment per task
+                        cur = conn.execute(
+                            """
+                            SELECT 1
+                            FROM assignments
+                            WHERE round_id = ? AND item_id = ? AND user_id = ?
+                            """,
+                            (self.round_id, item_id, user),
+                        )
+                        if cur.fetchone():
+                            # User already has this task, skip
+                            continue
+                        # Compute next slot based on how many DISTINCT users already have this task
+                        cur = conn.execute(
+                            """
+                            SELECT COUNT(DISTINCT user_id)
+                            FROM assignments
+                            WHERE round_id = ? AND item_id = ?
+                            """,
+                            (self.round_id, item_id),
+                        )
+                        next_slot = cur.fetchone()[0]
+                        # Insert new assignment
+                        conn.execute(
+                            """
+                            INSERT INTO assignments
+                            (round_id, item_id, user_id, status, assigned_at, redundancy_slot)
+                            VALUES (?, ?, ?, 'pending', ?, ?)
+                            """,
+                            (self.round_id, item_id, user, now, next_slot),
+                        )
+                # Record log
+                conn.execute(
+                    """
+                    INSERT INTO allocation_history
+                    (round_id, operation, operator, from_source, to_target, amount_spec,
+                     affected_users, affected_count, item_ids_sample, item_ids_hash,
+                     dry_run, force, reason, created_at)
+                    VALUES (?, 'rebalance', ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)
+                    """,
+                    (
+                        self.round_id,
+                        self.operator,
+                        f'users:{",".join(users)}',
+                        f'users:{",".join(users)}',
+                        mode_spec,
+                        json.dumps(users),
+                        len(all_pending),
+                        json.dumps(all_pending[:10]),
+                        self._compute_item_hash(all_pending),
+                        reason,
+                        now,
+                    ),
+                )
+                conn.execute('COMMIT')
+                return {
+                    'success': True,
+                    'affected_users': users,
+                    'affected_count': len(all_pending),
+                    'distribution': {u: len(items) for u, items in distribution.items()},
+                }
+            except Exception as e:
+                conn.execute('ROLLBACK')
+                raise e
+    # ========== Internal Methods ==========
+    def _get_available_source_items(
+        self, from_source: str, target_users: list[str], redundancy: int, force: bool
+    ) -> dict[str, list[str]]:
+        """Get available source tasks for EACH target user separately.
+        This method returns a dictionary mapping each user to their available tasks.
+        Different users have different available tasks because they
+        already have different existing assignments.
+        For each user, filters at SQL level to return tasks that:
+        1. Come from the specified source (unassigned pool or another user)
+        2. Are not already assigned to THIS SPECIFIC USER
+        3. Have not reached their redundancy limit
+        Args:
+            from_source: Source specification ('unassigned' or 'user:UserID')
+            target_users: List of users who will receive these tasks
+            redundancy: Number of different annotators required per task
+            force: If True, allow transferring in_progress tasks (only for user source)
+        Returns:
+            Dictionary mapping user_id to list of available item_ids
+            Example: {'Alice': ['task1', 'task2'], 'Bob': ['task1', 'task3'], ...}
+        """
+        user_available_tasks = {}
+        with self.db._connect() as conn:
+            # Query available tasks for EACH user separately
+            for user in target_users:
+                if from_source == 'unassigned':
+                    # Get tasks from unassigned pool that this user doesn't have
+                    cur = conn.execute(
+                        """
+                        SELECT t.item_id
+                        FROM tasks t
+                        LEFT JOIN task_config tc
+                          ON t.round_id = tc.round_id AND t.item_id = tc.item_id
+                        WHERE t.round_id = ?
+                          -- This specific user doesn't have this task
+                          AND NOT EXISTS (
+                              SELECT 1
+                              FROM assignments a
+                              WHERE a.round_id = t.round_id
+                                AND a.item_id = t.item_id
+                                AND a.user_id = ?
+                          )
+                          -- Check redundancy limit (current assignments < required)
+                          AND (
+                              SELECT COUNT(DISTINCT a2.user_id)
+                              FROM assignments a2
+                              WHERE a2.round_id = t.round_id AND a2.item_id = t.item_id
+                          ) < COALESCE(tc.redundancy_required, ?)
+                          -- Check redundancy completion
+                          AND COALESCE(tc.redundancy_completed, 0) < COALESCE(tc.redundancy_required, ?)
+                        ORDER BY t.order_key
+                        """,
+                        (self.round_id, user, redundancy, redundancy),
+                    )
+                elif from_source.startswith('user:'):
+                    source_user_id = from_source[5:]
+                    if force:
+                        status_filter = "('pending', 'in_progress')"
+                    else:
+                        status_filter = "('pending')"
+                    cur = conn.execute(
+                        f"""
+                        SELECT DISTINCT a1.item_id
+                        FROM assignments a1
+                        LEFT JOIN task_config tc
+                          ON a1.round_id = tc.round_id AND a1.item_id = a1.item_id
+                        WHERE a1.round_id = ?
+                          AND a1.user_id = ?
+                          AND a1.status IN {status_filter}
+                          -- This specific target user doesn't have this task
+                          AND NOT EXISTS (
+                              SELECT 1
+                              FROM assignments a2
+                              WHERE a2.round_id = a1.round_id
+                                AND a2.item_id = a1.item_id
+                                AND a2.user_id = ?
+                          )
+                          -- Check redundancy limit
+                          AND (
+                              SELECT COUNT(DISTINCT a3.user_id)
+                              FROM assignments a3
+                              WHERE a3.round_id = a1.round_id AND a3.item_id = a1.item_id
+                          ) < COALESCE(tc.redundancy_required, ?)
+                          -- Check redundancy completion
+                          AND COALESCE(tc.redundancy_completed, 0) < COALESCE(tc.redundancy_required, ?)
+                        ORDER BY a1.assigned_at
+                        """,
+                        (self.round_id, source_user_id, user, redundancy, redundancy),
+                    )
+                else:
+                    raise ValueError(f'Invalid from_source: {from_source}')
+                user_available_tasks[user] = [row[0] for row in cur.fetchall()]
+        return user_available_tasks
+    def _get_target_users(self, target_parsed: dict) -> list[str]:
+        """Get target user list."""
+        if target_parsed['type'] == 'single':
+            return [target_parsed['user']]
+        else:
+            return target_parsed['users']
+    def _calculate_distribution_per_user(
+        self, user_available_tasks: dict[str, list[str]], amount_parsed: dict
+    ) -> dict[str, list[str]]:
+        """Calculate task distribution for each user based on their available tasks.
+        Key insight: Each user has a different pool of available tasks because they
+        have different existing assignments. This method respects that and assigns
+        tasks from each user's individual available pool.
+        Args:
+            user_available_tasks: Dict mapping user_id to list of available task IDs
+                                  Example: {'Alice': ['task1', 'task2', 'task3'],
+                                           'Bob': ['task1', 'task4']}
+            amount_parsed: Parsed amount specification
+        Returns:
+            Dict mapping user_id to list of assigned task IDs
+            Example: {'Alice': ['task1', 'task2'], 'Bob': ['task1']}
+        """
+        target_users = list(user_available_tasks.keys())
+        distribution = {}
+        if amount_parsed['type'] == 'count':
+            # Each user gets up to N tasks from their available pool
+            count = amount_parsed['value']
+            for user in target_users:
+                available = user_available_tasks[user]
+                distribution[user] = available[: min(count, len(available))]
+        elif amount_parsed['type'] == 'ratio':
+            # Each user gets X% of their available tasks
+            ratio = amount_parsed['value']
+            for user in target_users:
+                available = user_available_tasks[user]
+                count = int(len(available) * ratio)
+                distribution[user] = available[:count]
+        elif amount_parsed['type'] == 'equal':
+            # Try to give each user roughly equal number of tasks
+            # Strategy: Calculate target count as average, but respect each user's limit
+            total_available = sum(len(tasks) for tasks in user_available_tasks.values())
+            target_per_user = total_available // len(target_users)
+            for user in target_users:
+                available = user_available_tasks[user]
+                distribution[user] = available[: min(target_per_user, len(available))]
+        elif amount_parsed['type'] == 'ratio_per_user':
+            # User-specific ratios: calculate from total available pool
+            user_ratios = amount_parsed['users']
+            total_available = sum(len(tasks) for tasks in user_available_tasks.values())
+            # Sort by user_id for deterministic behavior
+            sorted_users = sorted(user_ratios.items(), key=lambda x: x[0])
+            for user, ratio in sorted_users:
+                if user not in user_available_tasks:
+                    distribution[user] = []
+                    continue
+                available = user_available_tasks[user]
+                target_count = int(total_available * ratio)
+                # Take up to target_count, but limited by what's available for this user
+                distribution[user] = available[: min(target_count, len(available))]
+        elif amount_parsed['type'] == 'all':
+            # Give each user all their available tasks
+            for user in target_users:
+                distribution[user] = user_available_tasks[user]
+        else:
+            raise ValueError(f'Unknown amount type: {amount_parsed["type"]}')
+        return distribution
+    def _calculate_distribution(
+        self, item_ids: list[str], target_users: list[str], amount_parsed: dict
+    ) -> dict[str, list[str]]:
+        """Calculate task distribution for each user (old method, kept for compatibility).
+        Returns:
+            {'user_A': ['item_1', 'item_2'], 'user_B': ['item_3'], ...}
+        """
+        n_items = len(item_ids)
+        if amount_parsed['type'] == 'count':
+            count = min(amount_parsed['value'], n_items)
+            selected_items = item_ids[:count]
+            return self._distribute_equal(selected_items, target_users)
+        elif amount_parsed['type'] == 'ratio':
+            ratio = amount_parsed['value']
+            count = int(n_items * ratio)
+            selected_items = item_ids[:count]
+            return self._distribute_equal(selected_items, target_users)
+        elif amount_parsed['type'] in ('equal', 'all'):
+            return self._distribute_equal(item_ids, target_users)
+        elif amount_parsed['type'] == 'ratio_per_user':
+            user_ratios = amount_parsed['users']
+            distribution = {}
+            start_idx = 0
+            # Sort by user_id for deterministic behavior
+            sorted_users = sorted(user_ratios.items(), key=lambda x: x[0])
+            for i, (user, ratio) in enumerate(sorted_users):
+                if i == len(sorted_users) - 1:
+                    # Last user gets all remaining (avoid floating point errors)
+                    distribution[user] = item_ids[start_idx:]
+                else:
+                    count = int(n_items * ratio)
+                    distribution[user] = item_ids[start_idx : start_idx + count]
+                    start_idx += count
+            return distribution
+        raise ValueError(f'Unknown amount type: {amount_parsed["type"]}')
+    def _distribute_equal(self, item_ids: list[str], users: list[str]) -> dict[str, list[str]]:
+        """Distribute tasks equally among users (deterministic remainder allocation)."""
+        n_items = len(item_ids)
+        n_users = len(users)
+        base_count = n_items // n_users
+        remainder = n_items % n_users
+        # Sort by user_id for deterministic behavior
+        sorted_users = sorted(users)
+        distribution = {}
+        start_idx = 0
+        for i, user in enumerate(sorted_users):
+            # First 'remainder' users get one extra item
+            count = base_count + (1 if i < remainder else 0)
+            distribution[user] = item_ids[start_idx : start_idx + count]
+            start_idx += count
+        return distribution
+    def _compute_item_hash(self, item_ids: list[str]) -> str:
+        """Compute hash of item_id list for audit."""
+        content = ','.join(sorted(item_ids))
+        return hashlib.sha256(content.encode()).hexdigest()[:16]
+    def _preview_allocation(self, distribution: dict[str, list[str]]) -> dict[str, Any]:
+        """Dry-run preview."""
+        total_allocated = sum(len(items) for items in distribution.values())
+        # Sample: first 2 items from each user
+        sample_ids = []
+        for items in distribution.values():
+            sample_ids.extend(items[:2])
+        sample_ids = sample_ids[:10]
+        return {
+            'success': True,
+            'dry_run': True,
+            'affected_users': list(distribution.keys()),
+            'affected_count': total_allocated,
+            'distribution': {u: len(items) for u, items in distribution.items()},
+            'item_ids_sample': sample_ids,
+            'item_ids_hash': self._compute_item_hash([iid for items in distribution.values() for iid in items]),
+        }
+    def _execute_allocation(
+        self,
+        from_source: str,
+        to_target: str,
+        amount_spec: str,
+        distribution: dict[str, list[str]],
+        force: bool,
+        reason: str,
+    ) -> dict[str, Any]:
+        """Execute actual allocation."""
+        now = datetime.now(timezone.utc).isoformat()
+        all_assigned_items = [iid for items in distribution.values() for iid in items]
+        with self.db._connect() as conn, self.db._lock:
+            conn.execute('BEGIN IMMEDIATE')
+            try:
+                # 1. If transferring from a user, delete their assignments (no skipped state)
+                if from_source.startswith('user:'):
+                    source_user = from_source[5:]
+                    placeholders = ','.join('?' * len(all_assigned_items))
+                    conn.execute(
+                        f"""
+                        DELETE FROM assignments
+                        WHERE round_id = ? AND user_id = ? AND item_id IN ({placeholders})
+                        """,
+                        (self.round_id, source_user, *all_assigned_items),
+                    )
+                # 2. Create assignments for target users
+                for user, item_ids in distribution.items():
+                    for item_id in item_ids:
+                        # Check if this user already has this task (avoid PRIMARY KEY violation)
+                        # Each user can only have ONE assignment per task
+                        cur = conn.execute(
+                            """
+                            SELECT 1
+                            FROM assignments
+                            WHERE round_id = ? AND item_id = ? AND user_id = ?
+                            """,
+                            (self.round_id, item_id, user),
+                        )
+                        if cur.fetchone():
+                            # User already has this task, skip
+                            continue
+                        # Compute next slot based on how many DISTINCT users already have this task
+                        cur = conn.execute(
+                            """
+                            SELECT COUNT(DISTINCT user_id)
+                            FROM assignments
+                            WHERE round_id = ? AND item_id = ?
+                            """,
+                            (self.round_id, item_id),
+                        )
+                        next_slot = cur.fetchone()[0]
+                        conn.execute(
+                            """
+                            INSERT INTO assignments
+                            (round_id, item_id, user_id, status, assigned_at, redundancy_slot)
+                            VALUES (?, ?, ?, 'pending', ?, ?)
+                            """,
+                            (self.round_id, item_id, user, now, next_slot),
+                        )
+                # 3. Record log
+                conn.execute(
+                    """
+                    INSERT INTO allocation_history
+                    (round_id, operation, operator, from_source, to_target, amount_spec,
+                     affected_users, affected_count, item_ids_sample, item_ids_hash,
+                     dry_run, force, reason, created_at)
+                    VALUES (?, 'allocate', ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?)
+                    """,
+                    (
+                        self.round_id,
+                        self.operator,
+                        from_source,
+                        to_target,
+                        amount_spec,
+                        json.dumps(list(distribution.keys())),
+                        len(all_assigned_items),
+                        json.dumps(all_assigned_items[:10]),
+                        self._compute_item_hash(all_assigned_items),
+                        1 if force else 0,
+                        reason,
+                        now,
+                    ),
+                )
+                conn.execute('COMMIT')
+                return {
+                    'success': True,
+                    'affected_users': list(distribution.keys()),
+                    'affected_count': len(all_assigned_items),
+                    'distribution': {u: len(items) for u, items in distribution.items()},
+                    'item_ids_sample': all_assigned_items[:10],
+                }
+            except Exception as e:
+                conn.execute('ROLLBACK')
+                raise e

src/database.py ADDED Viewed

	@@ -0,0 +1,146 @@

+"""Database class for the evaluation."""
+import json
+import os
+import sqlite3
+import subprocess
+import threading
+from contextlib import contextmanager
+from datetime import datetime, timezone
+from typing import Optional
+REQUIRED_TABLES = ('tasks', 'assignments', 'task_config', 'answers', 'allocation_history')
+def _get_time_now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+class DB:
+    """Database for the evaluation."""
+    def __init__(self, db_path: str, schema_path: Optional[str] = None, verify_only: bool = True):
+        """Initialize the database."""
+        os.makedirs(os.path.dirname(db_path) or '.', exist_ok=True)
+        self.db_path = db_path
+        self._lock = threading.Lock()
+        with self._connect() as conn:
+            if schema_path is not None and os.path.exists(schema_path):
+                with open(schema_path, encoding='utf-8') as f:
+                    schema_sql = f.read()
+                conn.executescript(schema_sql)
+            elif verify_only:
+                missing = []
+                for t in REQUIRED_TABLES:
+                    cur = conn.execute('SELECT name FROM sqlite_master WHERE type="table" AND name=?', (t,))
+                    if cur.fetchone() is None:
+                        missing.append(t)
+                if missing:
+                    raise FileNotFoundError(f'Missing required tables: {", ".join(missing)}')
+            else:
+                raise FileNotFoundError(f'SQL schema file not found at: {schema_path or "None"}')
+    @contextmanager
+    def _connect(self):
+        conn = sqlite3.connect(self.db_path, timeout=30, isolation_level=None)  # autocommit
+        conn.execute('PRAGMA foreign_keys=ON;')
+        try:
+            yield conn
+        finally:
+            conn.close()
+    # ========== Answer Recording ==========
+    def record_answer(
+        self,
+        round_id: str,
+        user_id: str,
+        item_id: str,
+        label: str,
+        image_path: str,
+        score: int,
+        words_not_present: list[str],
+    ) -> None:
+        """Record an answer into the database."""
+        now = _get_time_now()
+        with self._connect() as conn, self._lock:
+            conn.execute(
+                'INSERT OR REPLACE INTO answers(round_id, user_id, item_id, label, image_path, '
+                'score, words_not_present, answered_at) '
+                'VALUES(?,?,?,?,?,?,?,?)',
+                (
+                    round_id,
+                    user_id,
+                    item_id,
+                    label,
+                    image_path,
+                    score,
+                    json.dumps(words_not_present, ensure_ascii=False),
+                    now,
+                ),
+            )
+    def get_answered_item_ids(self, round_id: str, user_id: str) -> set[str]:
+        """Get all the answered item ids of a user in a round."""
+        with self._connect() as conn:
+            cur = conn.execute(
+                'SELECT item_id FROM answers WHERE round_id=? AND user_id=?',
+                (round_id, user_id),
+            )
+            return {r[0] for r in cur.fetchall()}  # fetchall returns a list of tuples
+    def get_answer(self, round_id: str, user_id: str, item_id: str) -> Optional[tuple[int, list[str]]]:
+        """Get an answer of a user in a round for a specific item.
+        Returns:
+            tuple[int, list[str]]: score (int) and list of words not present in the image.
+            None if the answer does not exist.
+        """
+        with self._connect() as conn:
+            cur = conn.execute(
+                'SELECT score, words_not_present FROM answers WHERE round_id=? AND user_id=? AND item_id=?',
+                (round_id, user_id, item_id),
+            )
+            row = cur.fetchone()
+            if not row:
+                return None
+            score, words_not_present = row
+            return int(score), json.loads(words_not_present)
+    # ========== Lease Management (for in_progress tasks) ==========
+    def cleanup_expired_leases(self, round_id: str) -> int:
+        """Auto-recover expired in_progress tasks to pending status.
+        Returns the number of tasks recovered.
+        """
+        now = _get_time_now()
+        with self._connect() as conn, self._lock:
+            cur = conn.execute(
+                """
+                UPDATE assignments
+                SET status = 'pending', lease_until = NULL, started_at = NULL
+                WHERE round_id = ?
+                AND status = 'in_progress'
+                AND lease_until < ?
+                """,
+                (round_id, now),
+            )
+            return cur.rowcount or 0
+    # ========== Auto Commit (For HF Spaces) ==========
+    def commit_and_push_db(self) -> None:
+        """Commit and push the database to the repository."""
+        if not os.getenv('SPACE_ID'):
+            return
+        try:
+            ts = _get_time_now()
+            subprocess.run(['git', 'add', self.db_path], check=True)
+            subprocess.run(['git', 'commit', '-m', f'update db {ts}'], check=True)
+            subprocess.run(['git', 'push', 'origin', 'main'], check=True)
+        except Exception as e:
+            raise RuntimeError(f'Failed to commit and push the database: {e}')

src/eval_server.py ADDED Viewed

	@@ -0,0 +1,815 @@

+"""Evaluation server."""
+import json
+from datetime import datetime, timedelta, timezone
+from typing import Any
+import gradio as gr
+from allocation import AllocationEngine
+from database import DB
+from instuction_md import (
+    important_notes_instruction,
+    login_instruction,
+    nav_instruction,
+    q1_instruction,
+    q2_instruction,
+)
+# Type alias for return tuples
+# Most functions return 8 elements (without user_state)
+ReturnTuple = tuple[
+    dict[str, Any],  # img
+    dict[str, Any],  # label_md
+    dict[str, Any],  # words_checkbox
+    dict[str, Any],  # done_md
+    dict[str, Any],  # current_idx_md
+    dict[str, Any],  # progress_md
+    str | None,  # current_item_state
+    dict[str, Any],  # score
+]
+# start_or_resume returns 9 elements (includes user_state)
+StartReturnTuple = tuple[
+    dict[str, Any],  # img
+    dict[str, Any],  # label_md
+    dict[str, Any],  # words_checkbox
+    dict[str, Any],  # done_md
+    dict[str, Any],  # current_idx_md
+    dict[str, Any],  # progress_md
+    dict[str, Any],  # user_state
+    str | None,  # current_item_state
+    str | None,  # score
+]
+def _error_return(message: str, keep_current_item: bool = True, current_item_id: str | None = None) -> ReturnTuple:
+    """Create a standard error return tuple with notification.
+    Args:
+        message: Error message to display
+        keep_current_item: If True, preserve current_item_id; if False, set to None
+        current_item_id: The current item ID (only used if keep_current_item is True)
+    Returns:
+        Standard 8-element return tuple with error message
+    """
+    # Show warning notification (3 seconds)
+    gr.Warning(message, duration=3)
+    return (
+        gr.update(),
+        gr.update(),
+        gr.update(),
+        gr.update(value=message),
+        gr.update(),
+        gr.update(),
+        current_item_id if keep_current_item else None,
+        gr.update(),
+    )
+def format_image_path(template: str, label: str, path: str) -> str:
+    """Replace the template with the label and path."""
+    return template.format(label=label, path=path)
+def app_main(args) -> None:
+    """Run the evaluation server."""
+    # Set custom temp directory to avoid permission issues with shared /tmp/gradio
+    # user_temp_dir = os.path.join(tempfile.gettempdir(), f'gradio_{os.getenv("USER", "user")}')
+    # os.makedirs(user_temp_dir, exist_ok=True)
+    # os.environ['GRADIO_TEMP_DIR'] = user_temp_dir
+    db_path = args.db_path
+    db = DB(db_path)
+    # Get auto-allocation setting
+    auto_allo_num = args.auto_allo_num
+    allowed_users = args.allowed_users
+    with gr.Blocks(title='Icon Evaluation', theme=gr.themes.Base()) as demo:
+        user_state = gr.State({'user_id': None})
+        current_item_state = gr.State(None)  # current item_id
+        # Instructions page (shown first)
+        with gr.Column(visible=True) as instructions_page:
+            # gr.Markdown(task_instructions)
+            gr.Markdown('# Instructions')
+            gr.Markdown(
+                'Welcome to the Icon Evaluation Task! Please read the instructions first time you start the task.'
+            )
+            with gr.Walkthrough(selected=0) as walkthrough:
+                with gr.Step('Login', id=0):
+                    gr.Markdown(login_instruction)
+                    gr.HTML(
+                        """<img src="https://image2url.com/images/1760534494987-2ee98f6a-8ec5-40fc-b48b-026fc57b8b00.png">"""
+                    )
+                    btn = gr.Button('Next Step')
+                    btn.click(lambda: gr.Walkthrough(selected=1), outputs=walkthrough)
+                with gr.Step('Question 1', id=1):
+                    gr.Markdown(q1_instruction)
+                    gr.HTML(
+                        """<img src="https://image2url.com/images/1760534393840-1f609fc4-66a9-4033-9bca-6b6517ab2e4c.png">"""
+                    )
+                    btn = gr.Button('Next Step')
+                    btn.click(lambda: gr.Walkthrough(selected=2), outputs=walkthrough)
+                with gr.Step('Question 2', id=2):
+                    gr.Markdown(q2_instruction)
+                    gr.HTML(
+                        """<img src="https://image2url.com/images/1760534411276-0aa6e148-935d-4901-97c6-2697ae26f52e.png">"""
+                    )
+                    btn = gr.Button('Next Step')
+                    btn.click(lambda: gr.Walkthrough(selected=3), outputs=walkthrough)
+                with gr.Step('Important Notes', id=3):
+                    gr.Markdown(important_notes_instruction)
+                    gr.HTML(
+                        """<img src="https://image2url.com/images/1760534430736-43665629-d687-4fa9-8d13-8e577fbee25d.png">"""
+                    )
+                    gr.Markdown(nav_instruction)
+                    gr.HTML(
+                        """<img src="https://image2url.com/images/1760534445418-88e9259a-76f8-47ca-80df-7efe06c546c8.png">"""
+                    )
+            start_task_btn = gr.Button('🚀 Start Evaluation Task', variant='primary', size='lg')
+        # Main evaluation interface (hidden initially)
+        with gr.Column(visible=False) as main_page:
+            with gr.Row():
+                user_id_inp = gr.Textbox(label='User ID', placeholder='e.g.: alice_01', scale=2)
+                start_btn = gr.Button('Start/Resume', variant='primary', scale=1)
+            # Progress indicators (At: current index, Done: completed count)
+            with gr.Row():
+                current_idx_md = gr.Markdown('**At:** -/-', visible=False)
+                progress_md = gr.Markdown('**Done:** -/-', visible=False)
+            # Image display (disable download function)
+            img = gr.Image(label='Image', type='filepath', height=256, show_download_button=False)
+            # Collapsible task guide
+            with gr.Accordion('Task Instructions (Click to expand)', open=False):
+                gr.Markdown(
+                    '## Question 1: Relevance Score (1-5)\n'
+                    'Rate how relevant the icon is to the given label on a scale of 1 to 5:\n'
+                    '- **5**: Strongly related (highly consistent with the label, clear match)\n'
+                    '- **4**: Moderately related (clear partial connection, overall closer to related)\n'
+                    '- **3**: Neutral/uncertain (ambiguous, could be seen as either related or unrelated)\n'
+                    '- **2**: Weakly related (some minor or indirect connection, overall closer to unrelated)\n'
+                    '- **1**: Completely unrelated (no clear connection)\n'
+                    '## Question 2: Keyword Presence\n'
+                    'Check the 10 keywords and indicate which ones are **NOT** visible in the icon image:\n'
+                    '- Select **specific keywords** that are missing from the image, OR\n'
+                    '- Select **"ALL WORDS PRESENT"** if all 10 keywords are visible in the image\n'
+                    'You must select at least one option (either specific missing keywords or "ALL WORDS PRESENT")'
+                )
+            # Relevance score
+            label_md = gr.Markdown(visible=False)
+            score = gr.Radio(
+                choices=['1', '2', '3', '4', '5'],
+                label='1-5 points: the relevance of the image to the label',
+                interactive=True,
+            )
+            # 10 words checkbox
+            words_checkbox = gr.CheckboxGroup(
+                choices=[],
+                label='10 words: Select the words that are NOT presented in the image or "ALL WORDS PRESENT"',
+                interactive=True,
+                visible=False,
+            )
+            # Action buttons
+            with gr.Row():
+                submit_btn = gr.Button('Submit and Next', variant='primary')
+            # Navigation controls
+            with gr.Row():
+                jump_idx = gr.Number(label='Jump to the index (1-indexed)', precision=0)
+                jump_btn = gr.Button('Jump')
+                prev_btn = gr.Button('Prev')
+                next_btn = gr.Button('Next')
+            # Status messages
+            done_md = gr.Markdown(visible=False)
+        # Button to show main page
+        start_task_btn.click(
+            fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
+            outputs=[instructions_page, main_page],
+            queue=False,
+        )
+        def get_user_items_list(user_id: str) -> list[dict[str, Any]]:
+            """Get ordered list of items assigned to user."""
+            with db._connect() as conn:
+                cur = conn.execute(
+                    """
+                    SELECT t.item_id, t.label, t.path, t.words, t.order_key, a.status
+                    FROM assignments a
+                    JOIN tasks t ON a.round_id = t.round_id AND a.item_id = t.item_id
+                    WHERE a.round_id = ? AND a.user_id = ?
+                    ORDER BY t.order_key
+                    """,
+                    (args.round_id, user_id),
+                )
+                rows = cur.fetchall()
+            items_list = []
+            for row in rows:
+                item_id, label, path, words_json, order_key, status = row
+                items_list.append(
+                    {
+                        'item_id': item_id,
+                        'label': label,
+                        'path': path,
+                        'words': json.loads(words_json),
+                        'order_key': order_key,
+                        'status': status,
+                    }
+                )
+            return items_list
+        def get_next_item(user_id: str) -> dict[str, Any] | None:
+            """Get the next pending or in_progress item for user to resume."""
+            items_list = get_user_items_list(user_id)
+            for item in items_list:
+                if item['status'] in ('pending', 'in_progress'):
+                    return item
+            return None
+        def start_or_resume(user_id: str, state: dict[str, Any]) -> StartReturnTuple:
+            """Start or resume the evaluation.
+            Return: img, label_md, words_checkbox, done_md, current_idx_md, progress_md,
+                    state, current_item_state, score
+            """
+            user_id = user_id.strip()
+            if allowed_users and user_id not in allowed_users:
+                gr.Warning('User ID not allowed', duration=3)
+                return (
+                    gr.update(),
+                    gr.update(),
+                    gr.update(),
+                    gr.update(visible=True, value='User ID not allowed'),
+                    gr.update(),
+                    gr.update(),
+                    state,
+                    None,
+                    None,
+                )
+            if not user_id or user_id == '':
+                gr.Warning('Please input the user ID', duration=3)
+                return (
+                    gr.update(),
+                    gr.update(),
+                    gr.update(),
+                    gr.update(visible=True, value='Please input the user ID.'),
+                    gr.update(),
+                    gr.update(),
+                    state,
+                    None,
+                    None,
+                )
+            state = {'user_id': user_id}
+            db.cleanup_expired_leases(args.round_id)
+            items_list = get_user_items_list(user_id)
+            # Auto-allocate for new users
+            if not items_list and auto_allo_num > 0:
+                engine = AllocationEngine(db, args.round_id, operator='auto_system')
+                result = engine.allocate(
+                    from_source='unassigned',
+                    to_target=f'user:{user_id}',
+                    amount_spec=f'count:{auto_allo_num}',
+                    redundancy=args.redundancy,
+                    force=False,
+                    dry_run=False,
+                    reason='Auto-allocation for new user on first login',
+                )
+                if result['success']:
+                    allocated_count = result['affected_count']
+                    gr.Info(f'Welcome! You have {allocated_count} tasks to evaluate. Enjoy!', duration=5)
+                    items_list = get_user_items_list(user_id)
+                else:
+                    error_msg = result.get('error', 'Failed to automatically allocate tasks')
+                    if 'No tasks available' in error_msg:
+                        gr.Warning('No tasks available. Stay tuned!', duration=5)
+                    else:
+                        gr.Warning(
+                            f'Failed to automatically allocate tasks: {error_msg}. '
+                            'Please contact the admin([email protected]).',
+                            duration=8,
+                        )
+            if not items_list:
+                gr.Warning('No tasks assigned to this user. Please contact admin.', duration=5)
+                return (
+                    gr.update(visible=False),
+                    gr.update(),
+                    gr.update(),
+                    gr.update(visible=True, value='No tasks assigned to this user. Please contact admin.'),
+                    gr.update(),
+                    gr.update(),
+                    state,
+                    None,
+                    None,
+                )
+            total = len(items_list)
+            completed_items = [it for it in items_list if it['status'] == 'completed']
+            done_count = len(completed_items)
+            nxt = get_next_item(user_id)
+            if nxt is None:  # all items are done
+                # Load the last completed item and show completion message
+                gr.Info('Congratulations! All tasks completed. Thank you for participating! ☺️', duration=10)
+                last_item = items_list[-1] if items_list else None
+                if last_item:
+                    # Get the 8-element result from helper and insert user_state
+                    (
+                        img_up,
+                        label_up,
+                        words_up_cb,
+                        done_up,
+                        idx_up,
+                        prog_up,
+                        item_id_val,
+                        score_up,
+                    ) = _load_item_helper(user_id, last_item, items_list, len(items_list))
+                    # Return 9 elements with user_state inserted at position 7
+                    return (
+                        img_up,
+                        label_up,
+                        words_up_cb,
+                        done_up,
+                        idx_up,
+                        prog_up,
+                        state,
+                        item_id_val,
+                        score_up,
+                    )
+                else:
+                    return (
+                        gr.update(visible=False),
+                        gr.update(),
+                        gr.update(),
+                        gr.update(visible=True, value='All tasks completed. Thank you for participating!'),
+                        gr.update(),
+                        gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
+                        state,
+                        None,
+                        None,
+                    )
+            # Load the next pending item
+            item_id = nxt['item_id']
+            # Update status to in_progress and acquire lease
+            now_dt = datetime.now(timezone.utc)
+            lease_until = (now_dt + timedelta(seconds=600)).isoformat()
+            with db._connect() as conn, db._lock:
+                conn.execute(
+                    """
+                    UPDATE assignments
+                    SET status = 'in_progress', started_at = ?, lease_until = ?
+                    WHERE round_id = ? AND item_id = ? AND user_id = ?
+                    """,
+                    (now_dt.isoformat(), lease_until, args.round_id, item_id, user_id),
+                )
+            img_path = format_image_path(args.image_template, nxt['label'], nxt['path'])
+            current_idx = next((i + 1 for i, it in enumerate(items_list) if it['item_id'] == item_id), 1)
+            label_md_value = f'## Label: <span style="color: #00AA00; font-weight: bold;">{nxt["label"]}</span>'
+            return (
+                gr.update(value=img_path, visible=True),
+                gr.update(visible=True, value=label_md_value),
+                gr.update(visible=True, choices=nxt['words'] + ['ALL WORDS PRESENT'], value=[]),
+                gr.update(visible=False),
+                gr.update(visible=True, value=f'**At:** {current_idx}/{total}'),
+                gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
+                state,
+                item_id,
+                gr.update(value=None),
+            )
+        def submit(
+            user_state_val: dict[str, Any],
+            current_item_id: str | None,
+            score_val: str | None,
+            words_not_present: list[str],
+        ) -> ReturnTuple:
+            """Submit the answer and move to the next item."""
+            if not user_state_val or not user_state_val.get('user_id'):
+                return _error_return('Please start/continue', current_item_id=current_item_id)
+            if not current_item_id:
+                return _error_return('No current item', current_item_id=current_item_id)
+            # check if user selected 'ALL WORDS PRESENT', no other words should be selected
+            if 'ALL WORDS PRESENT' in words_not_present and len(words_not_present) > 1:
+                return _error_return(
+                    'Please select either ALL WORDS PRESENT or specific words', current_item_id=current_item_id
+                )
+            # check 2 questions are answered
+            if score_val is None or len(words_not_present) == 0:
+                return _error_return(
+                    'Please answer the 2 questions before submitting', current_item_id=current_item_id
+                )
+            user_id = user_state_val['user_id']
+            # Get item info
+            with db._connect() as conn:
+                cur = conn.execute(
+                    'SELECT label, path FROM tasks WHERE round_id = ? AND item_id = ?',
+                    (args.round_id, current_item_id),
+                )
+                row = cur.fetchone()
+                if not row:
+                    return _error_return('Item not found in database', current_item_id=current_item_id)
+                label, path = row
+            img_path = format_image_path(args.image_template, label, path)
+            # Filter out 'ALL WORDS PRESENT' from the selected words - if selected, means no words are missing
+            filtered_words = [w for w in (words_not_present or []) if w != 'ALL WORDS PRESENT']
+            # Record answer (filtered_words is the list of words not in the image, empty if 'ALL PRESENT' was selected)
+            db.record_answer(args.round_id, user_id, current_item_id, label, img_path, int(score_val), filtered_words)
+            # Update assignment status to completed
+            now = datetime.now(timezone.utc).isoformat()
+            with db._connect() as conn, db._lock:
+                conn.execute(
+                    """
+                    UPDATE assignments
+                    SET status = 'completed', completed_at = ?, lease_until = NULL
+                    WHERE round_id = ? AND item_id = ? AND user_id = ?
+                    """,
+                    (now, args.round_id, current_item_id, user_id),
+                )
+                # update redundancy_completed
+                conn.execute(
+                    """
+                    UPDATE task_config
+                    SET redundancy_completed = (
+                        SELECT COUNT(*) FROM assignments
+                        WHERE round_id = ? AND item_id = ? AND status = 'completed'
+                    )
+                    WHERE round_id = ? AND item_id = ?
+                    """,
+                    (args.round_id, current_item_id, args.round_id, current_item_id),
+                )
+            items_list = get_user_items_list(user_id)
+            total = len(items_list)
+            completed_items = [it for it in items_list if it['status'] == 'completed']
+            done_count = len(completed_items)
+            # Find current index and go to the next item in order
+            current_idx_in_list = next(
+                (i for i, item in enumerate(items_list) if item['item_id'] == current_item_id), None
+            )
+            # db auto commit
+            if (
+                args.auto_commit > 0
+                and current_idx_in_list is not None
+                and (current_idx_in_list + 1) % args.auto_commit == 0
+            ):
+                try:
+                    db.commit_and_push_db()
+                except Exception as e:
+                    print(f'Failed to commit and push the database: {e}')
+            if current_idx_in_list is None or current_idx_in_list >= len(items_list) - 1:
+                # This was the last item - show completion message but stay on current item
+                gr.Info('Congratulations! All tasks completed. Thank you for participating! ☺️', duration=10)
+                # Reload the current (last) item to show it with answers
+                last_item = items_list[-1] if items_list else None
+                if last_item:
+                    return _load_item_helper(user_id, last_item, items_list, len(items_list))
+                else:
+                    # This should not happen (items_list empty after completion check)
+                    gr.Info('All tasks completed. Thank you for participating!', duration=10)
+                    return (
+                        gr.update(visible=False),
+                        gr.update(),
+                        gr.update(),
+                        gr.update(visible=True, value='All tasks completed. Thank you for participating!'),
+                        gr.update(),
+                        gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
+                        current_item_id,
+                        gr.update(),
+                    )
+            # Go to the next item in sequence
+            next_item = items_list[current_idx_in_list + 1]
+            return _load_item_helper(user_id, next_item, items_list, current_idx_in_list + 2)
+        def _load_item_helper(
+            user_id: str, item: dict[str, Any], items_list: list[dict[str, Any]], current_idx: int
+        ) -> tuple:
+            """Load an item and return Gradio updates."""
+            item_id = item['item_id']
+            total = len(items_list)
+            completed_items = [it for it in items_list if it['status'] == 'completed']
+            done_count = len(completed_items)
+            # Handle different status: completed vs pending/in_progress
+            if item['status'] == 'completed':
+                # Completed item: no lease needed, just load existing answer
+                existing = db.get_answer(args.round_id, user_id, item_id)
+                if existing:
+                    s, words_not_present_list = existing
+                    s_val = str(int(s))
+                    # If no words are missing (empty list), show 'ALL WORDS PRESENT' as selected
+                    words_cb_val = words_not_present_list if words_not_present_list else ['ALL WORDS PRESENT']
+                else:
+                    # This should not happen: completed item without answer
+                    raise ValueError(f'Item {item_id} is marked as completed but has no answer in database')
+            else:
+                # Pending or in_progress: acquire lease
+                if item['status'] == 'pending':
+                    # Update to in_progress
+                    now_dt = datetime.now(timezone.utc)
+                    lease_until = (now_dt + timedelta(seconds=600)).isoformat()
+                    with db._connect() as conn, db._lock:
+                        conn.execute(
+                            """
+                            UPDATE assignments
+                            SET status = 'in_progress', started_at = ?, lease_until = ?
+                            WHERE round_id = ? AND item_id = ? AND user_id = ?
+                            """,
+                            (now_dt.isoformat(), lease_until, args.round_id, item_id, user_id),
+                        )
+                elif item['status'] == 'in_progress':
+                    # Renew lease
+                    now_dt = datetime.now(timezone.utc)
+                    lease_until = (now_dt + timedelta(seconds=600)).isoformat()
+                    with db._connect() as conn, db._lock:
+                        conn.execute(
+                            """
+                            UPDATE assignments
+                            SET lease_until = ?
+                            WHERE round_id = ? AND item_id = ? AND user_id = ?
+                            """,
+                            (lease_until, args.round_id, item_id, user_id),
+                        )
+                s_val, words_cb_val = None, []
+            img_path = format_image_path(args.image_template, item['label'], item['path'])
+            label_md_value = f'## Label: <span style="color: #00AA00; font-weight: bold;">{item["label"]}</span>'
+            return (
+                gr.update(value=img_path, visible=True),
+                gr.update(visible=True, value=label_md_value),
+                gr.update(visible=True, choices=item['words'] + ['ALL WORDS PRESENT'], value=words_cb_val),
+                gr.update(visible=False),
+                gr.update(visible=True, value=f'**At:** {current_idx}/{total}'),
+                gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
+                item_id,
+                gr.update(value=s_val),
+            )
+        def _find_item_by_index(user_state_val: dict[str, Any], index1: int) -> str | None:
+            """Get the item id by index in a user's assigned items."""
+            if not user_state_val or not user_state_val.get('user_id'):
+                return None
+            user_id = user_state_val['user_id']
+            items_list = get_user_items_list(user_id)
+            if index1 < 1 or index1 > len(items_list):
+                return None
+            return items_list[index1 - 1]['item_id']
+        def _load_item(user_state_val: dict[str, Any], item_id: str) -> ReturnTuple:
+            """Load and display an item."""
+            if not user_state_val or not user_state_val.get('user_id'):
+                return _error_return('Please start/continue', keep_current_item=False)
+            user_id = user_state_val['user_id']
+            items_list = get_user_items_list(user_id)
+            # Find the item
+            item = next((it for it in items_list if it['item_id'] == item_id), None)
+            if not item:
+                return _error_return('Item not found in your assignments', keep_current_item=False)
+            current_idx = next((i + 1 for i, it in enumerate(items_list) if it['item_id'] == item_id), 1)
+            return _load_item_helper(user_id, item, items_list, current_idx)
+        def jump_to(
+            user_state_val: dict[str, Any], index_number: float | None, current_item_id: str | None
+        ) -> ReturnTuple:
+            """Jump to the item by index (only completed items)."""
+            if not user_state_val or not user_state_val.get('user_id'):
+                return _error_return('Please start/continue', current_item_id=current_item_id)
+            if index_number is None:
+                return _error_return('Please input the index', current_item_id=current_item_id)
+            target_index = int(index_number)
+            target_item_id = _find_item_by_index(user_state_val, target_index)
+            if not target_item_id:
+                return _error_return('Index out of range', current_item_id=current_item_id)
+            # Check jump constraints
+            user_id = user_state_val['user_id']
+            items_list = get_user_items_list(user_id)
+            target_item = next((it for it in items_list if it['item_id'] == target_item_id), None)
+            if not target_item:
+                return _error_return('Target item not found', current_item_id=current_item_id)
+            # Find the first non-completed item index
+            first_pending_idx = next(
+                (i for i, it in enumerate(items_list) if it['status'] != 'completed'), len(items_list)
+            )
+            # Allow jump to: completed items OR the first non-completed item (but not beyond)
+            if target_item['status'] != 'completed' and target_index - 1 > first_pending_idx:
+                return _error_return(
+                    'Can only jump to completed items or the first pending item', current_item_id=current_item_id
+                )
+            return _load_item(user_state_val, target_item_id)
+        def handle_prev(user_state_val: dict[str, Any], current_item_id: str | None) -> ReturnTuple:
+            """Handle Prev button: navigate to previous item (completed items only)."""
+            if not user_state_val or not user_state_val.get('user_id'):
+                return _error_return('Please start/continue', current_item_id=current_item_id)
+            user_id = user_state_val['user_id']
+            items_list = get_user_items_list(user_id)
+            if not items_list:
+                return _error_return('No items assigned', keep_current_item=False)
+            if current_item_id is None:
+                return _error_return('No current item', current_item_id=current_item_id)
+            current_idx = next((i for i, it in enumerate(items_list) if it['item_id'] == current_item_id), None)
+            if current_idx is None:
+                return _error_return('Current item not found in your assignments', current_item_id=current_item_id)
+            if current_idx == 0:
+                return _error_return('Already at the first item', current_item_id=current_item_id)
+            target_idx = current_idx - 1
+            target_item = items_list[target_idx]
+            # Only allow navigating to completed items
+            if target_item['status'] != 'completed':
+                return _error_return(
+                    'Can only navigate to completed items using Prev', current_item_id=current_item_id
+                )
+            return _load_item(user_state_val, target_item['item_id'])
+        def handle_next(
+            user_state_val: dict[str, Any],
+            current_item_id: str | None,
+        ) -> ReturnTuple:
+            """Navigate to next item.
+            Rules:
+            - If current item is NOT completed, require submission first
+            - If current item is completed, allow free navigation
+            """
+            if not user_state_val or not user_state_val.get('user_id'):
+                return _error_return('Please start/continue', current_item_id=current_item_id)
+            user_id = user_state_val['user_id']
+            items_list = get_user_items_list(user_id)
+            if not items_list:
+                return _error_return('No items assigned', keep_current_item=False)
+            if current_item_id is None:
+                return _error_return('No current item', current_item_id=current_item_id)
+            current_idx = next((i for i, it in enumerate(items_list) if it['item_id'] == current_item_id), None)
+            if current_idx is None:
+                return _error_return('Current item not found in your assignments', current_item_id=current_item_id)
+            if current_idx >= len(items_list) - 1:
+                return _error_return('Already at the last item', current_item_id=current_item_id)
+            current_item = items_list[current_idx]
+            current_is_completed = current_item['status'] == 'completed'
+            # If current item is NOT completed, require submission
+            if not current_is_completed:
+                return _error_return(
+                    'Please submit your answer before moving to the next item', current_item_id=current_item_id
+                )
+            # Move to next item
+            target_idx = current_idx + 1
+            target_item = items_list[target_idx]
+            return _load_item(user_state_val, target_item['item_id'])
+        start_btn.click(
+            fn=start_or_resume,
+            inputs=[user_id_inp, user_state],
+            outputs=[
+                img,
+                label_md,
+                words_checkbox,
+                done_md,
+                current_idx_md,
+                progress_md,
+                user_state,
+                current_item_state,
+                score,
+            ],
+            queue=True,
+        )
+        submit_btn.click(
+            fn=submit,
+            inputs=[user_state, current_item_state, score, words_checkbox],
+            outputs=[
+                img,
+                label_md,
+                words_checkbox,
+                done_md,
+                current_idx_md,
+                progress_md,
+                current_item_state,
+                score,
+            ],
+            queue=True,
+        )
+        jump_btn.click(
+            fn=jump_to,
+            inputs=[user_state, jump_idx, current_item_state],
+            outputs=[
+                img,
+                label_md,
+                words_checkbox,
+                done_md,
+                current_idx_md,
+                progress_md,
+                current_item_state,
+                score,
+            ],
+            queue=True,
+        )
+        prev_btn.click(
+            fn=handle_prev,
+            inputs=[user_state, current_item_state],
+            outputs=[
+                img,
+                label_md,
+                words_checkbox,
+                done_md,
+                current_idx_md,
+                progress_md,
+                current_item_state,
+                score,
+            ],
+            queue=True,
+        )
+        next_btn.click(
+            fn=handle_next,
+            inputs=[user_state, current_item_state, score, words_checkbox],
+            outputs=[
+                img,
+                label_md,
+                words_checkbox,
+                done_md,
+                current_idx_md,
+                progress_md,
+                current_item_state,
+                score,
+            ],
+            queue=True,
+        )
+    basic_user = args.basic_user
+    basic_pass = args.basic_pass
+    auth_tuple = (basic_user, basic_pass)
+    # Extract image directory from template to add to allowed_paths
+    # e.g., "/path/to/img/{label}/{path}" -> "/path/to/img"
+    image_dir = args.image_template.split('{')[0].rstrip('/')
+    if image_dir:
+        allowed_paths = [image_dir]
+    else:
+        allowed_paths = None
+    print(f'Allowed paths: {allowed_paths}')
+    demo.queue(max_size=256).launch(
+        share=True,
+        auth=auth_tuple,
+        allowed_paths=allowed_paths,
+    )

src/instuction_md.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""Task instruction for icon evaluation."""
+login_instruction = """
+## Login
+Every time you start evaluating, input your user ID (e.g.: janeding) in the input box, and click the "Start/Resume" button. This will load your previous evaluation progress if you have logged in before.
+"""
+q1_instruction = """
+## Question 1: Relevance Score (1-5)
+Rate how relevant the icon is to the given label on a scale of 1 to 5:
+- **5**: Strongly related (highly consistent with the label, clear match)
+- **4**: Moderately related (clear partial connection, overall closer to related)
+- **3**: Neutral/uncertain (ambiguous, could be seen as either related or unrelated)
+- **2**: Weakly related (some minor or indirect connection, overall closer to unrelated)
+- **1**: Completely unrelated (no clear connection)
+"""
+q2_instruction = """
+## Question 2: Keyword Presence
+Check the 10 keywords and indicate which ones are **NOT** visible in the icon image:
+- Select **specific keywords** that are **missing** from the image, OR
+- Select **"ALL WORDS PRESENT"** if all 10 keywords are visible in the image
+You must select at least one option (either specific missing keywords or "ALL WORDS PRESENT")
+"""
+important_notes_instruction = """
+## Notes
+- Always **SUBMIT** your answer.
+- You can resume from where you left off by logging in with the same User ID.
+- You can modify your previous answers by navigating back.
+"""
+nav_instruction = """
+## Navigation
+- **Submit and Next**: Submit your answer and move to the next icon
+- **Next**: Navigate forward (up to your latest progress)
+- **Prev**: Navigate backward to review previous answers
+- **Jump to**: Jump to a specific index (1-indexed, up to your latest progress)
+"""