hailey2024 commited on
Commit
309ceaa
·
1 Parent(s): 126eac4

init src files

Browse files
Files changed (6) hide show
  1. README copy.md +14 -0
  2. app.py +33 -0
  3. src/allocation.py +917 -0
  4. src/database.py +146 -0
  5. src/eval_server.py +815 -0
  6. src/instuction_md.py +40 -0
README copy.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Regrounding
3
+ emoji: 🏃
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-nd-4.0
11
+ short_description: Icon evaluation interface.
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """App for the IconEval interface."""
2
+
3
+ import os
4
+ from argparse import Namespace
5
+
6
+ from src.eval_server import app_main
7
+
8
+ REPO_ROOT = os.path.dirname(os.path.abspath(__file__))
9
+
10
+ DB_PATH = os.path.join(REPO_ROOT, 'eval_round_r1.db')
11
+ IMAGE_TEMPLATE = os.path.join(REPO_ROOT, 'images', '{label}', '{path}')
12
+ REDUNDANCY = int(os.environ['REDUNDANCY'])
13
+ ALLOWED_USERS_STR = os.environ['ALLOW_USERS']
14
+ ALLOWED_USERS = [u.strip() for u in ALLOWED_USERS_STR.split(',') if u.strip()]
15
+
16
+ BASIC_USER = os.environ['BASIC_USER']
17
+ BASIC_PASS = os.environ['BASIC_PASS']
18
+ ROUND_ID = 'r1'
19
+ AUTO_ALLO_NUM = int(os.environ['AUTO_ALLO_NUM'])
20
+ AUTO_COMMIT = int(os.environ.get('AUTO_COMMIT', '10'))
21
+
22
+ args = Namespace(
23
+ db_path=DB_PATH,
24
+ round_id=ROUND_ID,
25
+ redundancy=REDUNDANCY,
26
+ image_template=IMAGE_TEMPLATE,
27
+ allowed_users=ALLOWED_USERS,
28
+ auto_allo_num=AUTO_ALLO_NUM,
29
+ auto_commit=AUTO_COMMIT,
30
+ basic_user=BASIC_USER,
31
+ basic_pass=BASIC_PASS,
32
+ )
33
+ app_main(args)
src/allocation.py ADDED
@@ -0,0 +1,917 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task allocation engine for dynamic workload management."""
2
+
3
+ import hashlib
4
+ import json
5
+ from datetime import datetime, timezone
6
+ from typing import Any
7
+
8
+ from database import DB
9
+
10
+
11
+ class AllocationValidator:
12
+ """Input validator for allocation operations."""
13
+
14
+ @staticmethod
15
+ def parse_amount_spec(spec: str) -> dict[str, Any]:
16
+ """Parse amount specification.
17
+
18
+ Examples:
19
+ 'count:100' -> {'type': 'count', 'value': 100}
20
+ 'ratio:0.5' -> {'type': 'ratio', 'value': 0.5}
21
+ 'equal' -> {'type': 'equal'}
22
+ 'ratio:A:0.3,B:0.7' -> {'type': 'ratio_per_user', 'users': {'A': 0.3, 'B': 0.7}}
23
+ 'all' -> {'type': 'all'}
24
+
25
+ """
26
+ if spec == 'equal':
27
+ return {'type': 'equal'}
28
+
29
+ if spec == 'all':
30
+ return {'type': 'all'}
31
+
32
+ if spec.startswith('count:'):
33
+ count = int(spec[6:])
34
+ if count <= 0:
35
+ raise ValueError(f'Count must be positive, got: {count}')
36
+ return {'type': 'count', 'value': count}
37
+
38
+ if spec.startswith('ratio:'):
39
+ rest = spec[6:]
40
+ # Check if per-user ratio
41
+ if ':' in rest:
42
+ # Format: ratio:A:0.3,B:0.3,C:0.4
43
+ user_ratios = {}
44
+ for part in rest.split(','):
45
+ user, ratio_str = part.split(':')
46
+ ratio = float(ratio_str)
47
+ if ratio < 0 or ratio > 1:
48
+ raise ValueError(f'Ratio must be in [0, 1], got: {ratio} for user {user}')
49
+ user_ratios[user.strip()] = ratio
50
+
51
+ total = sum(user_ratios.values())
52
+ if abs(total - 1.0) > 0.001:
53
+ raise ValueError(f'User ratios must sum to 1.0, got: {total}')
54
+
55
+ return {'type': 'ratio_per_user', 'users': user_ratios}
56
+ else:
57
+ # Single ratio
58
+ ratio = float(rest)
59
+ if ratio <= 0 or ratio > 1:
60
+ raise ValueError(f'Ratio must be in (0, 1], got: {ratio}')
61
+ return {'type': 'ratio', 'value': ratio}
62
+
63
+ raise ValueError(f'Invalid amount spec: {spec}')
64
+
65
+ @staticmethod
66
+ def parse_target(target: str) -> dict[str, Any]:
67
+ """Parse target specification.
68
+
69
+ Examples:
70
+ 'user:A' -> {'type': 'single', 'user': 'A'}
71
+ 'users:A,B,C' -> {'type': 'multiple', 'users': ['A', 'B', 'C']}
72
+
73
+ """
74
+ if target.startswith('user:'):
75
+ user = target[5:].strip()
76
+ if not user:
77
+ raise ValueError('User ID cannot be empty')
78
+ return {'type': 'single', 'user': user}
79
+
80
+ if target.startswith('users:'):
81
+ users = [u.strip() for u in target[6:].split(',')]
82
+ users = list(dict.fromkeys(users)) # Remove duplicates, preserve order
83
+ if not users:
84
+ raise ValueError('User list cannot be empty')
85
+ if len(users) != len(target[6:].split(',')):
86
+ print('Warning: Duplicate users removed from target')
87
+ return {'type': 'multiple', 'users': users}
88
+
89
+ raise ValueError(f'Invalid target: {target}')
90
+
91
+ @staticmethod
92
+ def validate_no_self_overlap(from_users: list[str], to_users: list[str]) -> list[str]:
93
+ """Remove self-overlapping users and return filtered list."""
94
+ overlap = set(from_users) & set(to_users)
95
+ if overlap:
96
+ print(f'Warning: Removing self-overlapping users: {overlap}')
97
+ return [u for u in to_users if u not in overlap]
98
+ return to_users
99
+
100
+ @staticmethod
101
+ def check_users_exist(db: DB, round_id: str, users: list[str]) -> None:
102
+ """Check if users exist in this round (warn for new users)."""
103
+ with db._connect() as conn:
104
+ # Check if assignments table exists
105
+ cur = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='assignments'")
106
+ if not cur.fetchone():
107
+ return # New schema not initialized, skip check
108
+
109
+ for user in users:
110
+ cur = conn.execute(
111
+ 'SELECT COUNT(*) FROM assignments WHERE round_id = ? AND user_id = ?', (round_id, user)
112
+ )
113
+ count = cur.fetchone()[0]
114
+ if count == 0:
115
+ print(f"Info: User '{user}' is new to this round")
116
+
117
+
118
+ class AllocationEngine:
119
+ """Task allocation engine for managing workload distribution.
120
+
121
+ Methods:
122
+ - allocate(from_source, to_target, amount_spec, redundancy, force, dry_run, reason)
123
+ - release(from_user, amount_spec, dry_run, reason)
124
+ - rebalance(users, mode_spec, dry_run, reason)
125
+
126
+ """
127
+
128
+ def __init__(
129
+ self,
130
+ db: DB,
131
+ round_id: str,
132
+ operator: str = 'admin',
133
+ ):
134
+ """Initialize allocation engine.
135
+
136
+ Args:
137
+ db: Database instance
138
+ round_id: Round ID
139
+ operator: Operator name for audit logging
140
+
141
+ """
142
+ self.db = db
143
+ self.round_id = round_id
144
+ self.operator = operator
145
+
146
+ def allocate(
147
+ self,
148
+ from_source: str,
149
+ to_target: str,
150
+ amount_spec: str,
151
+ redundancy: int,
152
+ force: bool = False,
153
+ dry_run: bool = False,
154
+ reason: str = '',
155
+ ) -> dict[str, Any]:
156
+ """Core allocation function.
157
+
158
+ Redundancy design:
159
+ - Each task needs N different annotators (redundancy parameter)
160
+ - Each user can only annotate a task ONCE (enforced by PRIMARY KEY)
161
+ - If a user already has a task, it cannot be assigned to them again
162
+ - This may reduce the actual number of tasks allocated if users already
163
+ have some of the source tasks
164
+
165
+ Args:
166
+ from_source: Source of tasks ('unassigned', 'user:A')
167
+ to_target: Target users ('user:A', 'users:A,B,C')
168
+ amount_spec: Amount specification ('count:100', 'ratio:0.5', 'equal')
169
+ redundancy: Number of different people who should label each task
170
+ force: Allow transferring in_progress tasks (default: False)
171
+ dry_run: Preview only, don't execute (default: False)
172
+ reason: Reason for this allocation (for audit)
173
+
174
+ Returns:
175
+ {
176
+ 'success': bool,
177
+ 'affected_users': list[str],
178
+ 'affected_count': int,
179
+ 'distribution': dict[str, int], # user -> count
180
+ 'item_ids_sample': list[str],
181
+ 'error': str # only if success=False
182
+ }
183
+
184
+ """
185
+ # 1. Validate inputs
186
+ validator = AllocationValidator()
187
+ amount_parsed = validator.parse_amount_spec(amount_spec)
188
+ target_parsed = validator.parse_target(to_target)
189
+
190
+ # 2. Get target users
191
+ target_users = self._get_target_users(target_parsed)
192
+ if not target_users:
193
+ return {'success': False, 'error': 'No target users specified', 'affected_count': 0}
194
+
195
+ validator.check_users_exist(self.db, self.round_id, target_users)
196
+
197
+ # 3. Get available source tasks for EACH user (filtered by redundancy at SQL level)
198
+ user_available_tasks = self._get_available_source_items(from_source, target_users, redundancy, force)
199
+
200
+ # Check if any user has available tasks
201
+ total_available = sum(len(tasks) for tasks in user_available_tasks.values())
202
+ if total_available == 0:
203
+ return {
204
+ 'success': False,
205
+ 'error': f'No available tasks from source {from_source} for target users '
206
+ '(tasks may already be assigned or at redundancy limit)',
207
+ 'affected_count': 0,
208
+ }
209
+
210
+ # 4. Calculate distribution considering each user's available tasks
211
+ distribution = self._calculate_distribution_per_user(user_available_tasks, amount_parsed)
212
+
213
+ # 6. Execute or preview
214
+ if dry_run:
215
+ result = self._preview_allocation(distribution)
216
+ else:
217
+ result = self._execute_allocation(from_source, to_target, amount_spec, distribution, force, reason)
218
+
219
+ return result
220
+
221
+ def release(self, from_user: str, amount_spec: str, dry_run: bool = False, reason: str = '') -> dict[str, Any]:
222
+ """Release tasks from a user back to unassigned pool.
223
+
224
+ Args:
225
+ from_user: User ID to release tasks from
226
+ amount_spec: Amount to release ('count:50', 'ratio:0.3', 'all')
227
+ dry_run: Preview only
228
+ reason: Reason for release
229
+
230
+ Returns:
231
+ Result dictionary with success status
232
+
233
+ """
234
+ validator = AllocationValidator()
235
+ amount_parsed = validator.parse_amount_spec(amount_spec)
236
+
237
+ # Get user's pending tasks
238
+ with self.db._connect() as conn:
239
+ cur = conn.execute(
240
+ """
241
+ SELECT item_id
242
+ FROM assignments
243
+ WHERE round_id = ? AND user_id = ? AND status = 'pending'
244
+ ORDER BY assigned_at
245
+ """,
246
+ (self.round_id, from_user),
247
+ )
248
+ pending_items = [row[0] for row in cur.fetchall()]
249
+
250
+ if not pending_items:
251
+ return {'success': False, 'error': 'No pending tasks to release'}
252
+
253
+ # Calculate how many to release
254
+ if amount_parsed['type'] == 'count':
255
+ to_release = pending_items[: min(amount_parsed['value'], len(pending_items))]
256
+ elif amount_parsed['type'] == 'ratio':
257
+ count = int(len(pending_items) * amount_parsed['value'])
258
+ to_release = pending_items[:count]
259
+ elif amount_parsed['type'] == 'all':
260
+ to_release = pending_items
261
+ else:
262
+ return {'success': False, 'error': f'Invalid amount_spec for release: {amount_spec}'}
263
+
264
+ # Preview release
265
+ if dry_run:
266
+ return {
267
+ 'success': True,
268
+ 'dry_run': True,
269
+ 'affected_users': [from_user],
270
+ 'affected_count': len(to_release),
271
+ 'item_ids_sample': to_release[:10],
272
+ }
273
+
274
+ # Execute release
275
+ now = datetime.now(timezone.utc).isoformat()
276
+ with self.db._connect() as conn, self.db._lock:
277
+ placeholders = ','.join('?' * len(to_release))
278
+ conn.execute(
279
+ f"""
280
+ DELETE FROM assignments
281
+ WHERE round_id = ? AND user_id = ? AND item_id IN ({placeholders})
282
+ """,
283
+ (self.round_id, from_user, *to_release),
284
+ )
285
+
286
+ # Record log
287
+ conn.execute(
288
+ """
289
+ INSERT INTO allocation_history
290
+ (round_id, operation, operator, from_source, to_target, amount_spec,
291
+ affected_users, affected_count, item_ids_sample, item_ids_hash,
292
+ dry_run, force, reason, created_at)
293
+ VALUES (?, 'release', ?, ?, 'pool', ?, ?, ?, ?, ?, 0, 0, ?, ?)
294
+ """,
295
+ (
296
+ self.round_id,
297
+ self.operator,
298
+ f'user:{from_user}',
299
+ amount_spec,
300
+ json.dumps([from_user]),
301
+ len(to_release),
302
+ json.dumps(to_release[:10]),
303
+ self._compute_item_hash(to_release),
304
+ reason,
305
+ now,
306
+ ),
307
+ )
308
+
309
+ return {
310
+ 'success': True,
311
+ 'affected_users': [from_user],
312
+ 'affected_count': len(to_release),
313
+ 'item_ids_sample': to_release[:10],
314
+ }
315
+
316
+ def rebalance(self, users: list[str], mode_spec: str, dry_run: bool = False, reason: str = '') -> dict[str, Any]:
317
+ """Rebalance pending tasks among selected users.
318
+
319
+ Args:
320
+ users: List of user IDs to rebalance among
321
+ mode_spec: Rebalance mode ('equal', 'ratio:A:0.3,B:0.7')
322
+ dry_run: Preview only
323
+ reason: Reason for rebalance
324
+
325
+ Returns:
326
+ Result dictionary with success status
327
+
328
+ """
329
+ # 1. Collect all unique pending tasks from these users (avoid duplicates for redundancy > 1)
330
+ with self.db._connect() as conn:
331
+ placeholders_users = ','.join('?' * len(users))
332
+ cur = conn.execute(
333
+ f"""
334
+ SELECT DISTINCT item_id
335
+ FROM assignments
336
+ WHERE round_id = ? AND user_id IN ({placeholders_users}) AND status = 'pending'
337
+ """,
338
+ (self.round_id, *users),
339
+ )
340
+ all_pending = [row[0] for row in cur.fetchall()]
341
+
342
+ if not all_pending:
343
+ return {'success': False, 'error': 'No pending tasks to rebalance'}
344
+
345
+ # 2. Check for redundancy > 1 tasks and collect their current assignment counts
346
+ task_redundancy_info = {}
347
+ with self.db._connect() as conn:
348
+ for item_id in all_pending:
349
+ # Get redundancy config
350
+ cur = conn.execute(
351
+ """
352
+ SELECT redundancy_required
353
+ FROM task_config
354
+ WHERE round_id = ? AND item_id = ?
355
+ """,
356
+ (self.round_id, item_id),
357
+ )
358
+ row = cur.fetchone()
359
+ redundancy_required = row[0] if row else 1
360
+
361
+ # Count current assignments among rebalance users
362
+ placeholders = ','.join('?' * len(users))
363
+ cur = conn.execute(
364
+ f"""
365
+ SELECT COUNT(*)
366
+ FROM assignments
367
+ WHERE round_id = ? AND item_id = ? AND user_id IN ({placeholders}) AND status = 'pending'
368
+ """,
369
+ (self.round_id, item_id, *users),
370
+ )
371
+ current_count = cur.fetchone()[0]
372
+
373
+ task_redundancy_info[item_id] = {
374
+ 'redundancy_required': redundancy_required,
375
+ 'current_count': current_count,
376
+ }
377
+
378
+ # 3. Parse distribution mode
379
+ validator = AllocationValidator()
380
+ amount_parsed = validator.parse_amount_spec(mode_spec)
381
+
382
+ # 4. Warn if there are redundancy > 1 tasks
383
+ redundancy_gt1_tasks = [
384
+ item_id for item_id, info in task_redundancy_info.items() if info['redundancy_required'] > 1
385
+ ]
386
+ if redundancy_gt1_tasks:
387
+ print(
388
+ f'Warning: {len(redundancy_gt1_tasks)} tasks have redundancy > 1. '
389
+ 'Rebalance will redistribute unique tasks (may change redundancy counts). '
390
+ f'Consider using allocate/release for fine control. Sample: {redundancy_gt1_tasks[:3]}'
391
+ )
392
+
393
+ # 5. Build per-user available pools from all_pending (exclude items the user already has)
394
+ # We do NOT filter by redundancy limit here because we are redistributing
395
+ # the same set of tasks within the same user group; distinct-user counts
396
+ # will remain within limits. We only prevent giving a user a task they
397
+ # already have (any status).
398
+ user_available_tasks: dict[str, list[str]] = {}
399
+ with self.db._connect() as conn:
400
+ for user in users:
401
+ if not all_pending:
402
+ user_available_tasks[user] = []
403
+ continue
404
+ # exclude items that the user already has
405
+ placeholders = ','.join('?' * len(all_pending))
406
+ cur = conn.execute(
407
+ f"""
408
+ SELECT x.item_id
409
+ FROM (
410
+ SELECT ? AS round_id, ? AS user_id
411
+ ) p
412
+ JOIN (
413
+ SELECT item_id FROM (
414
+ VALUES {','.join(['(?)'] * len(all_pending))}
415
+ ) AS v(item_id)
416
+ ) AS x ON 1=1
417
+ WHERE NOT EXISTS (
418
+ SELECT 1
419
+ FROM assignments a
420
+ WHERE a.round_id = p.round_id
421
+ AND a.item_id = x.item_id
422
+ AND a.user_id = p.user_id
423
+ )
424
+ """,
425
+ (self.round_id, user, *all_pending),
426
+ )
427
+ user_available_tasks[user] = [row[0] for row in cur.fetchall()]
428
+
429
+ # 6. Calculate new distribution using per-user pools
430
+ distribution = self._calculate_distribution_per_user(user_available_tasks, amount_parsed)
431
+
432
+ # Preview rebalance
433
+ if dry_run:
434
+ return {
435
+ 'success': True,
436
+ 'dry_run': True,
437
+ 'affected_users': users,
438
+ 'affected_count': len(all_pending),
439
+ 'distribution': {u: len(items) for u, items in distribution.items()},
440
+ 'item_ids_sample': all_pending[:10],
441
+ }
442
+
443
+ # 7. Execute rebalance (delete old assignments, insert new ones)
444
+ now = datetime.now(timezone.utc).isoformat()
445
+ with self.db._connect() as conn, self.db._lock:
446
+ conn.execute('BEGIN IMMEDIATE')
447
+
448
+ try:
449
+ # Delete all pending tasks from these users
450
+ placeholders_users = ','.join('?' * len(users))
451
+ conn.execute(
452
+ f"""
453
+ DELETE FROM assignments
454
+ WHERE round_id = ? AND user_id IN ({placeholders_users}) AND status = 'pending'
455
+ """,
456
+ (self.round_id, *users),
457
+ )
458
+
459
+ # Insert new assignments
460
+ for user, item_ids in distribution.items():
461
+ for item_id in item_ids:
462
+ # Check if this user already has this task (avoid PRIMARY KEY violation)
463
+ # Each user can only have ONE assignment per task
464
+ cur = conn.execute(
465
+ """
466
+ SELECT 1
467
+ FROM assignments
468
+ WHERE round_id = ? AND item_id = ? AND user_id = ?
469
+ """,
470
+ (self.round_id, item_id, user),
471
+ )
472
+ if cur.fetchone():
473
+ # User already has this task, skip
474
+ continue
475
+
476
+ # Compute next slot based on how many DISTINCT users already have this task
477
+ cur = conn.execute(
478
+ """
479
+ SELECT COUNT(DISTINCT user_id)
480
+ FROM assignments
481
+ WHERE round_id = ? AND item_id = ?
482
+ """,
483
+ (self.round_id, item_id),
484
+ )
485
+ next_slot = cur.fetchone()[0]
486
+
487
+ # Insert new assignment
488
+ conn.execute(
489
+ """
490
+ INSERT INTO assignments
491
+ (round_id, item_id, user_id, status, assigned_at, redundancy_slot)
492
+ VALUES (?, ?, ?, 'pending', ?, ?)
493
+ """,
494
+ (self.round_id, item_id, user, now, next_slot),
495
+ )
496
+
497
+ # Record log
498
+ conn.execute(
499
+ """
500
+ INSERT INTO allocation_history
501
+ (round_id, operation, operator, from_source, to_target, amount_spec,
502
+ affected_users, affected_count, item_ids_sample, item_ids_hash,
503
+ dry_run, force, reason, created_at)
504
+ VALUES (?, 'rebalance', ?, ?, ?, ?, ?, ?, ?, ?, 0, 0, ?, ?)
505
+ """,
506
+ (
507
+ self.round_id,
508
+ self.operator,
509
+ f'users:{",".join(users)}',
510
+ f'users:{",".join(users)}',
511
+ mode_spec,
512
+ json.dumps(users),
513
+ len(all_pending),
514
+ json.dumps(all_pending[:10]),
515
+ self._compute_item_hash(all_pending),
516
+ reason,
517
+ now,
518
+ ),
519
+ )
520
+
521
+ conn.execute('COMMIT')
522
+
523
+ return {
524
+ 'success': True,
525
+ 'affected_users': users,
526
+ 'affected_count': len(all_pending),
527
+ 'distribution': {u: len(items) for u, items in distribution.items()},
528
+ }
529
+
530
+ except Exception as e:
531
+ conn.execute('ROLLBACK')
532
+ raise e
533
+
534
+ # ========== Internal Methods ==========
535
+
536
+ def _get_available_source_items(
537
+ self, from_source: str, target_users: list[str], redundancy: int, force: bool
538
+ ) -> dict[str, list[str]]:
539
+ """Get available source tasks for EACH target user separately.
540
+
541
+ This method returns a dictionary mapping each user to their available tasks.
542
+ Different users have different available tasks because they
543
+ already have different existing assignments.
544
+
545
+ For each user, filters at SQL level to return tasks that:
546
+ 1. Come from the specified source (unassigned pool or another user)
547
+ 2. Are not already assigned to THIS SPECIFIC USER
548
+ 3. Have not reached their redundancy limit
549
+
550
+ Args:
551
+ from_source: Source specification ('unassigned' or 'user:UserID')
552
+ target_users: List of users who will receive these tasks
553
+ redundancy: Number of different annotators required per task
554
+ force: If True, allow transferring in_progress tasks (only for user source)
555
+
556
+ Returns:
557
+ Dictionary mapping user_id to list of available item_ids
558
+ Example: {'Alice': ['task1', 'task2'], 'Bob': ['task1', 'task3'], ...}
559
+
560
+ """
561
+ user_available_tasks = {}
562
+
563
+ with self.db._connect() as conn:
564
+ # Query available tasks for EACH user separately
565
+ for user in target_users:
566
+ if from_source == 'unassigned':
567
+ # Get tasks from unassigned pool that this user doesn't have
568
+ cur = conn.execute(
569
+ """
570
+ SELECT t.item_id
571
+ FROM tasks t
572
+ LEFT JOIN task_config tc
573
+ ON t.round_id = tc.round_id AND t.item_id = tc.item_id
574
+ WHERE t.round_id = ?
575
+ -- This specific user doesn't have this task
576
+ AND NOT EXISTS (
577
+ SELECT 1
578
+ FROM assignments a
579
+ WHERE a.round_id = t.round_id
580
+ AND a.item_id = t.item_id
581
+ AND a.user_id = ?
582
+ )
583
+ -- Check redundancy limit (current assignments < required)
584
+ AND (
585
+ SELECT COUNT(DISTINCT a2.user_id)
586
+ FROM assignments a2
587
+ WHERE a2.round_id = t.round_id AND a2.item_id = t.item_id
588
+ ) < COALESCE(tc.redundancy_required, ?)
589
+ -- Check redundancy completion
590
+ AND COALESCE(tc.redundancy_completed, 0) < COALESCE(tc.redundancy_required, ?)
591
+ ORDER BY t.order_key
592
+ """,
593
+ (self.round_id, user, redundancy, redundancy),
594
+ )
595
+ elif from_source.startswith('user:'):
596
+ source_user_id = from_source[5:]
597
+
598
+ if force:
599
+ status_filter = "('pending', 'in_progress')"
600
+ else:
601
+ status_filter = "('pending')"
602
+
603
+ cur = conn.execute(
604
+ f"""
605
+ SELECT DISTINCT a1.item_id
606
+ FROM assignments a1
607
+ LEFT JOIN task_config tc
608
+ ON a1.round_id = tc.round_id AND a1.item_id = a1.item_id
609
+ WHERE a1.round_id = ?
610
+ AND a1.user_id = ?
611
+ AND a1.status IN {status_filter}
612
+ -- This specific target user doesn't have this task
613
+ AND NOT EXISTS (
614
+ SELECT 1
615
+ FROM assignments a2
616
+ WHERE a2.round_id = a1.round_id
617
+ AND a2.item_id = a1.item_id
618
+ AND a2.user_id = ?
619
+ )
620
+ -- Check redundancy limit
621
+ AND (
622
+ SELECT COUNT(DISTINCT a3.user_id)
623
+ FROM assignments a3
624
+ WHERE a3.round_id = a1.round_id AND a3.item_id = a1.item_id
625
+ ) < COALESCE(tc.redundancy_required, ?)
626
+ -- Check redundancy completion
627
+ AND COALESCE(tc.redundancy_completed, 0) < COALESCE(tc.redundancy_required, ?)
628
+ ORDER BY a1.assigned_at
629
+ """,
630
+ (self.round_id, source_user_id, user, redundancy, redundancy),
631
+ )
632
+ else:
633
+ raise ValueError(f'Invalid from_source: {from_source}')
634
+
635
+ user_available_tasks[user] = [row[0] for row in cur.fetchall()]
636
+
637
+ return user_available_tasks
638
+
639
+ def _get_target_users(self, target_parsed: dict) -> list[str]:
640
+ """Get target user list."""
641
+ if target_parsed['type'] == 'single':
642
+ return [target_parsed['user']]
643
+ else:
644
+ return target_parsed['users']
645
+
646
+ def _calculate_distribution_per_user(
647
+ self, user_available_tasks: dict[str, list[str]], amount_parsed: dict
648
+ ) -> dict[str, list[str]]:
649
+ """Calculate task distribution for each user based on their available tasks.
650
+
651
+ Key insight: Each user has a different pool of available tasks because they
652
+ have different existing assignments. This method respects that and assigns
653
+ tasks from each user's individual available pool.
654
+
655
+ Args:
656
+ user_available_tasks: Dict mapping user_id to list of available task IDs
657
+ Example: {'Alice': ['task1', 'task2', 'task3'],
658
+ 'Bob': ['task1', 'task4']}
659
+ amount_parsed: Parsed amount specification
660
+
661
+ Returns:
662
+ Dict mapping user_id to list of assigned task IDs
663
+ Example: {'Alice': ['task1', 'task2'], 'Bob': ['task1']}
664
+
665
+ """
666
+ target_users = list(user_available_tasks.keys())
667
+ distribution = {}
668
+
669
+ if amount_parsed['type'] == 'count':
670
+ # Each user gets up to N tasks from their available pool
671
+ count = amount_parsed['value']
672
+ for user in target_users:
673
+ available = user_available_tasks[user]
674
+ distribution[user] = available[: min(count, len(available))]
675
+
676
+ elif amount_parsed['type'] == 'ratio':
677
+ # Each user gets X% of their available tasks
678
+ ratio = amount_parsed['value']
679
+ for user in target_users:
680
+ available = user_available_tasks[user]
681
+ count = int(len(available) * ratio)
682
+ distribution[user] = available[:count]
683
+
684
+ elif amount_parsed['type'] == 'equal':
685
+ # Try to give each user roughly equal number of tasks
686
+ # Strategy: Calculate target count as average, but respect each user's limit
687
+ total_available = sum(len(tasks) for tasks in user_available_tasks.values())
688
+ target_per_user = total_available // len(target_users)
689
+
690
+ for user in target_users:
691
+ available = user_available_tasks[user]
692
+ distribution[user] = available[: min(target_per_user, len(available))]
693
+
694
+ elif amount_parsed['type'] == 'ratio_per_user':
695
+ # User-specific ratios: calculate from total available pool
696
+ user_ratios = amount_parsed['users']
697
+ total_available = sum(len(tasks) for tasks in user_available_tasks.values())
698
+
699
+ # Sort by user_id for deterministic behavior
700
+ sorted_users = sorted(user_ratios.items(), key=lambda x: x[0])
701
+
702
+ for user, ratio in sorted_users:
703
+ if user not in user_available_tasks:
704
+ distribution[user] = []
705
+ continue
706
+
707
+ available = user_available_tasks[user]
708
+ target_count = int(total_available * ratio)
709
+ # Take up to target_count, but limited by what's available for this user
710
+ distribution[user] = available[: min(target_count, len(available))]
711
+
712
+ elif amount_parsed['type'] == 'all':
713
+ # Give each user all their available tasks
714
+ for user in target_users:
715
+ distribution[user] = user_available_tasks[user]
716
+
717
+ else:
718
+ raise ValueError(f'Unknown amount type: {amount_parsed["type"]}')
719
+
720
+ return distribution
721
+
722
+ def _calculate_distribution(
723
+ self, item_ids: list[str], target_users: list[str], amount_parsed: dict
724
+ ) -> dict[str, list[str]]:
725
+ """Calculate task distribution for each user (old method, kept for compatibility).
726
+
727
+ Returns:
728
+ {'user_A': ['item_1', 'item_2'], 'user_B': ['item_3'], ...}
729
+
730
+ """
731
+ n_items = len(item_ids)
732
+
733
+ if amount_parsed['type'] == 'count':
734
+ count = min(amount_parsed['value'], n_items)
735
+ selected_items = item_ids[:count]
736
+ return self._distribute_equal(selected_items, target_users)
737
+
738
+ elif amount_parsed['type'] == 'ratio':
739
+ ratio = amount_parsed['value']
740
+ count = int(n_items * ratio)
741
+ selected_items = item_ids[:count]
742
+ return self._distribute_equal(selected_items, target_users)
743
+
744
+ elif amount_parsed['type'] in ('equal', 'all'):
745
+ return self._distribute_equal(item_ids, target_users)
746
+
747
+ elif amount_parsed['type'] == 'ratio_per_user':
748
+ user_ratios = amount_parsed['users']
749
+ distribution = {}
750
+ start_idx = 0
751
+
752
+ # Sort by user_id for deterministic behavior
753
+ sorted_users = sorted(user_ratios.items(), key=lambda x: x[0])
754
+
755
+ for i, (user, ratio) in enumerate(sorted_users):
756
+ if i == len(sorted_users) - 1:
757
+ # Last user gets all remaining (avoid floating point errors)
758
+ distribution[user] = item_ids[start_idx:]
759
+ else:
760
+ count = int(n_items * ratio)
761
+ distribution[user] = item_ids[start_idx : start_idx + count]
762
+ start_idx += count
763
+
764
+ return distribution
765
+
766
+ raise ValueError(f'Unknown amount type: {amount_parsed["type"]}')
767
+
768
+ def _distribute_equal(self, item_ids: list[str], users: list[str]) -> dict[str, list[str]]:
769
+ """Distribute tasks equally among users (deterministic remainder allocation)."""
770
+ n_items = len(item_ids)
771
+ n_users = len(users)
772
+ base_count = n_items // n_users
773
+ remainder = n_items % n_users
774
+
775
+ # Sort by user_id for deterministic behavior
776
+ sorted_users = sorted(users)
777
+
778
+ distribution = {}
779
+ start_idx = 0
780
+
781
+ for i, user in enumerate(sorted_users):
782
+ # First 'remainder' users get one extra item
783
+ count = base_count + (1 if i < remainder else 0)
784
+ distribution[user] = item_ids[start_idx : start_idx + count]
785
+ start_idx += count
786
+
787
+ return distribution
788
+
789
+ def _compute_item_hash(self, item_ids: list[str]) -> str:
790
+ """Compute hash of item_id list for audit."""
791
+ content = ','.join(sorted(item_ids))
792
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
793
+
794
+ def _preview_allocation(self, distribution: dict[str, list[str]]) -> dict[str, Any]:
795
+ """Dry-run preview."""
796
+ total_allocated = sum(len(items) for items in distribution.values())
797
+
798
+ # Sample: first 2 items from each user
799
+ sample_ids = []
800
+ for items in distribution.values():
801
+ sample_ids.extend(items[:2])
802
+ sample_ids = sample_ids[:10]
803
+
804
+ return {
805
+ 'success': True,
806
+ 'dry_run': True,
807
+ 'affected_users': list(distribution.keys()),
808
+ 'affected_count': total_allocated,
809
+ 'distribution': {u: len(items) for u, items in distribution.items()},
810
+ 'item_ids_sample': sample_ids,
811
+ 'item_ids_hash': self._compute_item_hash([iid for items in distribution.values() for iid in items]),
812
+ }
813
+
814
+ def _execute_allocation(
815
+ self,
816
+ from_source: str,
817
+ to_target: str,
818
+ amount_spec: str,
819
+ distribution: dict[str, list[str]],
820
+ force: bool,
821
+ reason: str,
822
+ ) -> dict[str, Any]:
823
+ """Execute actual allocation."""
824
+ now = datetime.now(timezone.utc).isoformat()
825
+ all_assigned_items = [iid for items in distribution.values() for iid in items]
826
+
827
+ with self.db._connect() as conn, self.db._lock:
828
+ conn.execute('BEGIN IMMEDIATE')
829
+
830
+ try:
831
+ # 1. If transferring from a user, delete their assignments (no skipped state)
832
+ if from_source.startswith('user:'):
833
+ source_user = from_source[5:]
834
+ placeholders = ','.join('?' * len(all_assigned_items))
835
+ conn.execute(
836
+ f"""
837
+ DELETE FROM assignments
838
+ WHERE round_id = ? AND user_id = ? AND item_id IN ({placeholders})
839
+ """,
840
+ (self.round_id, source_user, *all_assigned_items),
841
+ )
842
+
843
+ # 2. Create assignments for target users
844
+ for user, item_ids in distribution.items():
845
+ for item_id in item_ids:
846
+ # Check if this user already has this task (avoid PRIMARY KEY violation)
847
+ # Each user can only have ONE assignment per task
848
+ cur = conn.execute(
849
+ """
850
+ SELECT 1
851
+ FROM assignments
852
+ WHERE round_id = ? AND item_id = ? AND user_id = ?
853
+ """,
854
+ (self.round_id, item_id, user),
855
+ )
856
+ if cur.fetchone():
857
+ # User already has this task, skip
858
+ continue
859
+
860
+ # Compute next slot based on how many DISTINCT users already have this task
861
+ cur = conn.execute(
862
+ """
863
+ SELECT COUNT(DISTINCT user_id)
864
+ FROM assignments
865
+ WHERE round_id = ? AND item_id = ?
866
+ """,
867
+ (self.round_id, item_id),
868
+ )
869
+ next_slot = cur.fetchone()[0]
870
+
871
+ conn.execute(
872
+ """
873
+ INSERT INTO assignments
874
+ (round_id, item_id, user_id, status, assigned_at, redundancy_slot)
875
+ VALUES (?, ?, ?, 'pending', ?, ?)
876
+ """,
877
+ (self.round_id, item_id, user, now, next_slot),
878
+ )
879
+
880
+ # 3. Record log
881
+ conn.execute(
882
+ """
883
+ INSERT INTO allocation_history
884
+ (round_id, operation, operator, from_source, to_target, amount_spec,
885
+ affected_users, affected_count, item_ids_sample, item_ids_hash,
886
+ dry_run, force, reason, created_at)
887
+ VALUES (?, 'allocate', ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?)
888
+ """,
889
+ (
890
+ self.round_id,
891
+ self.operator,
892
+ from_source,
893
+ to_target,
894
+ amount_spec,
895
+ json.dumps(list(distribution.keys())),
896
+ len(all_assigned_items),
897
+ json.dumps(all_assigned_items[:10]),
898
+ self._compute_item_hash(all_assigned_items),
899
+ 1 if force else 0,
900
+ reason,
901
+ now,
902
+ ),
903
+ )
904
+
905
+ conn.execute('COMMIT')
906
+
907
+ return {
908
+ 'success': True,
909
+ 'affected_users': list(distribution.keys()),
910
+ 'affected_count': len(all_assigned_items),
911
+ 'distribution': {u: len(items) for u, items in distribution.items()},
912
+ 'item_ids_sample': all_assigned_items[:10],
913
+ }
914
+
915
+ except Exception as e:
916
+ conn.execute('ROLLBACK')
917
+ raise e
src/database.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Database class for the evaluation."""
2
+
3
+ import json
4
+ import os
5
+ import sqlite3
6
+ import subprocess
7
+ import threading
8
+ from contextlib import contextmanager
9
+ from datetime import datetime, timezone
10
+ from typing import Optional
11
+
12
+ REQUIRED_TABLES = ('tasks', 'assignments', 'task_config', 'answers', 'allocation_history')
13
+
14
+
15
+ def _get_time_now() -> str:
16
+ return datetime.now(timezone.utc).isoformat()
17
+
18
+
19
+ class DB:
20
+ """Database for the evaluation."""
21
+
22
+ def __init__(self, db_path: str, schema_path: Optional[str] = None, verify_only: bool = True):
23
+ """Initialize the database."""
24
+ os.makedirs(os.path.dirname(db_path) or '.', exist_ok=True)
25
+ self.db_path = db_path
26
+ self._lock = threading.Lock()
27
+
28
+ with self._connect() as conn:
29
+ if schema_path is not None and os.path.exists(schema_path):
30
+ with open(schema_path, encoding='utf-8') as f:
31
+ schema_sql = f.read()
32
+ conn.executescript(schema_sql)
33
+ elif verify_only:
34
+ missing = []
35
+ for t in REQUIRED_TABLES:
36
+ cur = conn.execute('SELECT name FROM sqlite_master WHERE type="table" AND name=?', (t,))
37
+ if cur.fetchone() is None:
38
+ missing.append(t)
39
+ if missing:
40
+ raise FileNotFoundError(f'Missing required tables: {", ".join(missing)}')
41
+ else:
42
+ raise FileNotFoundError(f'SQL schema file not found at: {schema_path or "None"}')
43
+
44
+ @contextmanager
45
+ def _connect(self):
46
+ conn = sqlite3.connect(self.db_path, timeout=30, isolation_level=None) # autocommit
47
+ conn.execute('PRAGMA foreign_keys=ON;')
48
+ try:
49
+ yield conn
50
+ finally:
51
+ conn.close()
52
+
53
+ # ========== Answer Recording ==========
54
+
55
+ def record_answer(
56
+ self,
57
+ round_id: str,
58
+ user_id: str,
59
+ item_id: str,
60
+ label: str,
61
+ image_path: str,
62
+ score: int,
63
+ words_not_present: list[str],
64
+ ) -> None:
65
+ """Record an answer into the database."""
66
+ now = _get_time_now()
67
+ with self._connect() as conn, self._lock:
68
+ conn.execute(
69
+ 'INSERT OR REPLACE INTO answers(round_id, user_id, item_id, label, image_path, '
70
+ 'score, words_not_present, answered_at) '
71
+ 'VALUES(?,?,?,?,?,?,?,?)',
72
+ (
73
+ round_id,
74
+ user_id,
75
+ item_id,
76
+ label,
77
+ image_path,
78
+ score,
79
+ json.dumps(words_not_present, ensure_ascii=False),
80
+ now,
81
+ ),
82
+ )
83
+
84
+ def get_answered_item_ids(self, round_id: str, user_id: str) -> set[str]:
85
+ """Get all the answered item ids of a user in a round."""
86
+ with self._connect() as conn:
87
+ cur = conn.execute(
88
+ 'SELECT item_id FROM answers WHERE round_id=? AND user_id=?',
89
+ (round_id, user_id),
90
+ )
91
+ return {r[0] for r in cur.fetchall()} # fetchall returns a list of tuples
92
+
93
+ def get_answer(self, round_id: str, user_id: str, item_id: str) -> Optional[tuple[int, list[str]]]:
94
+ """Get an answer of a user in a round for a specific item.
95
+
96
+ Returns:
97
+ tuple[int, list[str]]: score (int) and list of words not present in the image.
98
+ None if the answer does not exist.
99
+
100
+ """
101
+ with self._connect() as conn:
102
+ cur = conn.execute(
103
+ 'SELECT score, words_not_present FROM answers WHERE round_id=? AND user_id=? AND item_id=?',
104
+ (round_id, user_id, item_id),
105
+ )
106
+ row = cur.fetchone()
107
+ if not row:
108
+ return None
109
+
110
+ score, words_not_present = row
111
+ return int(score), json.loads(words_not_present)
112
+
113
+ # ========== Lease Management (for in_progress tasks) ==========
114
+
115
+ def cleanup_expired_leases(self, round_id: str) -> int:
116
+ """Auto-recover expired in_progress tasks to pending status.
117
+
118
+ Returns the number of tasks recovered.
119
+ """
120
+ now = _get_time_now()
121
+ with self._connect() as conn, self._lock:
122
+ cur = conn.execute(
123
+ """
124
+ UPDATE assignments
125
+ SET status = 'pending', lease_until = NULL, started_at = NULL
126
+ WHERE round_id = ?
127
+ AND status = 'in_progress'
128
+ AND lease_until < ?
129
+ """,
130
+ (round_id, now),
131
+ )
132
+ return cur.rowcount or 0
133
+
134
+ # ========== Auto Commit (For HF Spaces) ==========
135
+ def commit_and_push_db(self) -> None:
136
+ """Commit and push the database to the repository."""
137
+ if not os.getenv('SPACE_ID'):
138
+ return
139
+
140
+ try:
141
+ ts = _get_time_now()
142
+ subprocess.run(['git', 'add', self.db_path], check=True)
143
+ subprocess.run(['git', 'commit', '-m', f'update db {ts}'], check=True)
144
+ subprocess.run(['git', 'push', 'origin', 'main'], check=True)
145
+ except Exception as e:
146
+ raise RuntimeError(f'Failed to commit and push the database: {e}')
src/eval_server.py ADDED
@@ -0,0 +1,815 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Evaluation server."""
2
+
3
+ import json
4
+ from datetime import datetime, timedelta, timezone
5
+ from typing import Any
6
+
7
+ import gradio as gr
8
+ from allocation import AllocationEngine
9
+ from database import DB
10
+ from instuction_md import (
11
+ important_notes_instruction,
12
+ login_instruction,
13
+ nav_instruction,
14
+ q1_instruction,
15
+ q2_instruction,
16
+ )
17
+
18
+ # Type alias for return tuples
19
+ # Most functions return 8 elements (without user_state)
20
+ ReturnTuple = tuple[
21
+ dict[str, Any], # img
22
+ dict[str, Any], # label_md
23
+ dict[str, Any], # words_checkbox
24
+ dict[str, Any], # done_md
25
+ dict[str, Any], # current_idx_md
26
+ dict[str, Any], # progress_md
27
+ str | None, # current_item_state
28
+ dict[str, Any], # score
29
+ ]
30
+
31
+ # start_or_resume returns 9 elements (includes user_state)
32
+ StartReturnTuple = tuple[
33
+ dict[str, Any], # img
34
+ dict[str, Any], # label_md
35
+ dict[str, Any], # words_checkbox
36
+ dict[str, Any], # done_md
37
+ dict[str, Any], # current_idx_md
38
+ dict[str, Any], # progress_md
39
+ dict[str, Any], # user_state
40
+ str | None, # current_item_state
41
+ str | None, # score
42
+ ]
43
+
44
+
45
+ def _error_return(message: str, keep_current_item: bool = True, current_item_id: str | None = None) -> ReturnTuple:
46
+ """Create a standard error return tuple with notification.
47
+
48
+ Args:
49
+ message: Error message to display
50
+ keep_current_item: If True, preserve current_item_id; if False, set to None
51
+ current_item_id: The current item ID (only used if keep_current_item is True)
52
+
53
+ Returns:
54
+ Standard 8-element return tuple with error message
55
+
56
+ """
57
+ # Show warning notification (3 seconds)
58
+ gr.Warning(message, duration=3)
59
+
60
+ return (
61
+ gr.update(),
62
+ gr.update(),
63
+ gr.update(),
64
+ gr.update(value=message),
65
+ gr.update(),
66
+ gr.update(),
67
+ current_item_id if keep_current_item else None,
68
+ gr.update(),
69
+ )
70
+
71
+
72
+ def format_image_path(template: str, label: str, path: str) -> str:
73
+ """Replace the template with the label and path."""
74
+ return template.format(label=label, path=path)
75
+
76
+
77
+ def app_main(args) -> None:
78
+ """Run the evaluation server."""
79
+ # Set custom temp directory to avoid permission issues with shared /tmp/gradio
80
+ # user_temp_dir = os.path.join(tempfile.gettempdir(), f'gradio_{os.getenv("USER", "user")}')
81
+ # os.makedirs(user_temp_dir, exist_ok=True)
82
+ # os.environ['GRADIO_TEMP_DIR'] = user_temp_dir
83
+
84
+ db_path = args.db_path
85
+ db = DB(db_path)
86
+
87
+ # Get auto-allocation setting
88
+ auto_allo_num = args.auto_allo_num
89
+
90
+ allowed_users = args.allowed_users
91
+
92
+ with gr.Blocks(title='Icon Evaluation', theme=gr.themes.Base()) as demo:
93
+ user_state = gr.State({'user_id': None})
94
+ current_item_state = gr.State(None) # current item_id
95
+
96
+ # Instructions page (shown first)
97
+ with gr.Column(visible=True) as instructions_page:
98
+ # gr.Markdown(task_instructions)
99
+ gr.Markdown('# Instructions')
100
+ gr.Markdown(
101
+ 'Welcome to the Icon Evaluation Task! Please read the instructions first time you start the task.'
102
+ )
103
+ with gr.Walkthrough(selected=0) as walkthrough:
104
+ with gr.Step('Login', id=0):
105
+ gr.Markdown(login_instruction)
106
+ gr.HTML(
107
+ """<img src="https://image2url.com/images/1760534494987-2ee98f6a-8ec5-40fc-b48b-026fc57b8b00.png">"""
108
+ )
109
+ btn = gr.Button('Next Step')
110
+ btn.click(lambda: gr.Walkthrough(selected=1), outputs=walkthrough)
111
+ with gr.Step('Question 1', id=1):
112
+ gr.Markdown(q1_instruction)
113
+ gr.HTML(
114
+ """<img src="https://image2url.com/images/1760534393840-1f609fc4-66a9-4033-9bca-6b6517ab2e4c.png">"""
115
+ )
116
+ btn = gr.Button('Next Step')
117
+ btn.click(lambda: gr.Walkthrough(selected=2), outputs=walkthrough)
118
+ with gr.Step('Question 2', id=2):
119
+ gr.Markdown(q2_instruction)
120
+ gr.HTML(
121
+ """<img src="https://image2url.com/images/1760534411276-0aa6e148-935d-4901-97c6-2697ae26f52e.png">"""
122
+ )
123
+ btn = gr.Button('Next Step')
124
+ btn.click(lambda: gr.Walkthrough(selected=3), outputs=walkthrough)
125
+ with gr.Step('Important Notes', id=3):
126
+ gr.Markdown(important_notes_instruction)
127
+ gr.HTML(
128
+ """<img src="https://image2url.com/images/1760534430736-43665629-d687-4fa9-8d13-8e577fbee25d.png">"""
129
+ )
130
+ gr.Markdown(nav_instruction)
131
+ gr.HTML(
132
+ """<img src="https://image2url.com/images/1760534445418-88e9259a-76f8-47ca-80df-7efe06c546c8.png">"""
133
+ )
134
+
135
+ start_task_btn = gr.Button('🚀 Start Evaluation Task', variant='primary', size='lg')
136
+
137
+ # Main evaluation interface (hidden initially)
138
+ with gr.Column(visible=False) as main_page:
139
+ with gr.Row():
140
+ user_id_inp = gr.Textbox(label='User ID', placeholder='e.g.: alice_01', scale=2)
141
+ start_btn = gr.Button('Start/Resume', variant='primary', scale=1)
142
+
143
+ # Progress indicators (At: current index, Done: completed count)
144
+ with gr.Row():
145
+ current_idx_md = gr.Markdown('**At:** -/-', visible=False)
146
+ progress_md = gr.Markdown('**Done:** -/-', visible=False)
147
+
148
+ # Image display (disable download function)
149
+ img = gr.Image(label='Image', type='filepath', height=256, show_download_button=False)
150
+
151
+ # Collapsible task guide
152
+ with gr.Accordion('Task Instructions (Click to expand)', open=False):
153
+ gr.Markdown(
154
+ '## Question 1: Relevance Score (1-5)\n'
155
+ 'Rate how relevant the icon is to the given label on a scale of 1 to 5:\n'
156
+ '- **5**: Strongly related (highly consistent with the label, clear match)\n'
157
+ '- **4**: Moderately related (clear partial connection, overall closer to related)\n'
158
+ '- **3**: Neutral/uncertain (ambiguous, could be seen as either related or unrelated)\n'
159
+ '- **2**: Weakly related (some minor or indirect connection, overall closer to unrelated)\n'
160
+ '- **1**: Completely unrelated (no clear connection)\n'
161
+ '## Question 2: Keyword Presence\n'
162
+ 'Check the 10 keywords and indicate which ones are **NOT** visible in the icon image:\n'
163
+ '- Select **specific keywords** that are missing from the image, OR\n'
164
+ '- Select **"ALL WORDS PRESENT"** if all 10 keywords are visible in the image\n'
165
+ 'You must select at least one option (either specific missing keywords or "ALL WORDS PRESENT")'
166
+ )
167
+
168
+ # Relevance score
169
+ label_md = gr.Markdown(visible=False)
170
+ score = gr.Radio(
171
+ choices=['1', '2', '3', '4', '5'],
172
+ label='1-5 points: the relevance of the image to the label',
173
+ interactive=True,
174
+ )
175
+
176
+ # 10 words checkbox
177
+ words_checkbox = gr.CheckboxGroup(
178
+ choices=[],
179
+ label='10 words: Select the words that are NOT presented in the image or "ALL WORDS PRESENT"',
180
+ interactive=True,
181
+ visible=False,
182
+ )
183
+
184
+ # Action buttons
185
+ with gr.Row():
186
+ submit_btn = gr.Button('Submit and Next', variant='primary')
187
+
188
+ # Navigation controls
189
+ with gr.Row():
190
+ jump_idx = gr.Number(label='Jump to the index (1-indexed)', precision=0)
191
+ jump_btn = gr.Button('Jump')
192
+ prev_btn = gr.Button('Prev')
193
+ next_btn = gr.Button('Next')
194
+
195
+ # Status messages
196
+ done_md = gr.Markdown(visible=False)
197
+
198
+ # Button to show main page
199
+ start_task_btn.click(
200
+ fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
201
+ outputs=[instructions_page, main_page],
202
+ queue=False,
203
+ )
204
+
205
+ def get_user_items_list(user_id: str) -> list[dict[str, Any]]:
206
+ """Get ordered list of items assigned to user."""
207
+ with db._connect() as conn:
208
+ cur = conn.execute(
209
+ """
210
+ SELECT t.item_id, t.label, t.path, t.words, t.order_key, a.status
211
+ FROM assignments a
212
+ JOIN tasks t ON a.round_id = t.round_id AND a.item_id = t.item_id
213
+ WHERE a.round_id = ? AND a.user_id = ?
214
+ ORDER BY t.order_key
215
+ """,
216
+ (args.round_id, user_id),
217
+ )
218
+ rows = cur.fetchall()
219
+
220
+ items_list = []
221
+ for row in rows:
222
+ item_id, label, path, words_json, order_key, status = row
223
+ items_list.append(
224
+ {
225
+ 'item_id': item_id,
226
+ 'label': label,
227
+ 'path': path,
228
+ 'words': json.loads(words_json),
229
+ 'order_key': order_key,
230
+ 'status': status,
231
+ }
232
+ )
233
+ return items_list
234
+
235
+ def get_next_item(user_id: str) -> dict[str, Any] | None:
236
+ """Get the next pending or in_progress item for user to resume."""
237
+ items_list = get_user_items_list(user_id)
238
+ for item in items_list:
239
+ if item['status'] in ('pending', 'in_progress'):
240
+ return item
241
+ return None
242
+
243
+ def start_or_resume(user_id: str, state: dict[str, Any]) -> StartReturnTuple:
244
+ """Start or resume the evaluation.
245
+
246
+ Return: img, label_md, words_checkbox, done_md, current_idx_md, progress_md,
247
+ state, current_item_state, score
248
+ """
249
+ user_id = user_id.strip()
250
+ if allowed_users and user_id not in allowed_users:
251
+ gr.Warning('User ID not allowed', duration=3)
252
+ return (
253
+ gr.update(),
254
+ gr.update(),
255
+ gr.update(),
256
+ gr.update(visible=True, value='User ID not allowed'),
257
+ gr.update(),
258
+ gr.update(),
259
+ state,
260
+ None,
261
+ None,
262
+ )
263
+
264
+ if not user_id or user_id == '':
265
+ gr.Warning('Please input the user ID', duration=3)
266
+ return (
267
+ gr.update(),
268
+ gr.update(),
269
+ gr.update(),
270
+ gr.update(visible=True, value='Please input the user ID.'),
271
+ gr.update(),
272
+ gr.update(),
273
+ state,
274
+ None,
275
+ None,
276
+ )
277
+
278
+ state = {'user_id': user_id}
279
+ db.cleanup_expired_leases(args.round_id)
280
+ items_list = get_user_items_list(user_id)
281
+
282
+ # Auto-allocate for new users
283
+ if not items_list and auto_allo_num > 0:
284
+ engine = AllocationEngine(db, args.round_id, operator='auto_system')
285
+ result = engine.allocate(
286
+ from_source='unassigned',
287
+ to_target=f'user:{user_id}',
288
+ amount_spec=f'count:{auto_allo_num}',
289
+ redundancy=args.redundancy,
290
+ force=False,
291
+ dry_run=False,
292
+ reason='Auto-allocation for new user on first login',
293
+ )
294
+
295
+ if result['success']:
296
+ allocated_count = result['affected_count']
297
+ gr.Info(f'Welcome! You have {allocated_count} tasks to evaluate. Enjoy!', duration=5)
298
+ items_list = get_user_items_list(user_id)
299
+ else:
300
+ error_msg = result.get('error', 'Failed to automatically allocate tasks')
301
+ if 'No tasks available' in error_msg:
302
+ gr.Warning('No tasks available. Stay tuned!', duration=5)
303
+ else:
304
+ gr.Warning(
305
+ f'Failed to automatically allocate tasks: {error_msg}. '
306
+ 'Please contact the admin([email protected]).',
307
+ duration=8,
308
+ )
309
+
310
+ if not items_list:
311
+ gr.Warning('No tasks assigned to this user. Please contact admin.', duration=5)
312
+ return (
313
+ gr.update(visible=False),
314
+ gr.update(),
315
+ gr.update(),
316
+ gr.update(visible=True, value='No tasks assigned to this user. Please contact admin.'),
317
+ gr.update(),
318
+ gr.update(),
319
+ state,
320
+ None,
321
+ None,
322
+ )
323
+
324
+ total = len(items_list)
325
+ completed_items = [it for it in items_list if it['status'] == 'completed']
326
+ done_count = len(completed_items)
327
+
328
+ nxt = get_next_item(user_id)
329
+ if nxt is None: # all items are done
330
+ # Load the last completed item and show completion message
331
+ gr.Info('Congratulations! All tasks completed. Thank you for participating! ☺️', duration=10)
332
+ last_item = items_list[-1] if items_list else None
333
+ if last_item:
334
+ # Get the 8-element result from helper and insert user_state
335
+ (
336
+ img_up,
337
+ label_up,
338
+ words_up_cb,
339
+ done_up,
340
+ idx_up,
341
+ prog_up,
342
+ item_id_val,
343
+ score_up,
344
+ ) = _load_item_helper(user_id, last_item, items_list, len(items_list))
345
+ # Return 9 elements with user_state inserted at position 7
346
+ return (
347
+ img_up,
348
+ label_up,
349
+ words_up_cb,
350
+ done_up,
351
+ idx_up,
352
+ prog_up,
353
+ state,
354
+ item_id_val,
355
+ score_up,
356
+ )
357
+ else:
358
+ return (
359
+ gr.update(visible=False),
360
+ gr.update(),
361
+ gr.update(),
362
+ gr.update(visible=True, value='All tasks completed. Thank you for participating!'),
363
+ gr.update(),
364
+ gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
365
+ state,
366
+ None,
367
+ None,
368
+ )
369
+
370
+ # Load the next pending item
371
+ item_id = nxt['item_id']
372
+
373
+ # Update status to in_progress and acquire lease
374
+ now_dt = datetime.now(timezone.utc)
375
+ lease_until = (now_dt + timedelta(seconds=600)).isoformat()
376
+ with db._connect() as conn, db._lock:
377
+ conn.execute(
378
+ """
379
+ UPDATE assignments
380
+ SET status = 'in_progress', started_at = ?, lease_until = ?
381
+ WHERE round_id = ? AND item_id = ? AND user_id = ?
382
+ """,
383
+ (now_dt.isoformat(), lease_until, args.round_id, item_id, user_id),
384
+ )
385
+
386
+ img_path = format_image_path(args.image_template, nxt['label'], nxt['path'])
387
+ current_idx = next((i + 1 for i, it in enumerate(items_list) if it['item_id'] == item_id), 1)
388
+ label_md_value = f'## Label: <span style="color: #00AA00; font-weight: bold;">{nxt["label"]}</span>'
389
+
390
+ return (
391
+ gr.update(value=img_path, visible=True),
392
+ gr.update(visible=True, value=label_md_value),
393
+ gr.update(visible=True, choices=nxt['words'] + ['ALL WORDS PRESENT'], value=[]),
394
+ gr.update(visible=False),
395
+ gr.update(visible=True, value=f'**At:** {current_idx}/{total}'),
396
+ gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
397
+ state,
398
+ item_id,
399
+ gr.update(value=None),
400
+ )
401
+
402
+ def submit(
403
+ user_state_val: dict[str, Any],
404
+ current_item_id: str | None,
405
+ score_val: str | None,
406
+ words_not_present: list[str],
407
+ ) -> ReturnTuple:
408
+ """Submit the answer and move to the next item."""
409
+ if not user_state_val or not user_state_val.get('user_id'):
410
+ return _error_return('Please start/continue', current_item_id=current_item_id)
411
+ if not current_item_id:
412
+ return _error_return('No current item', current_item_id=current_item_id)
413
+
414
+ # check if user selected 'ALL WORDS PRESENT', no other words should be selected
415
+ if 'ALL WORDS PRESENT' in words_not_present and len(words_not_present) > 1:
416
+ return _error_return(
417
+ 'Please select either ALL WORDS PRESENT or specific words', current_item_id=current_item_id
418
+ )
419
+ # check 2 questions are answered
420
+ if score_val is None or len(words_not_present) == 0:
421
+ return _error_return(
422
+ 'Please answer the 2 questions before submitting', current_item_id=current_item_id
423
+ )
424
+
425
+ user_id = user_state_val['user_id']
426
+
427
+ # Get item info
428
+ with db._connect() as conn:
429
+ cur = conn.execute(
430
+ 'SELECT label, path FROM tasks WHERE round_id = ? AND item_id = ?',
431
+ (args.round_id, current_item_id),
432
+ )
433
+ row = cur.fetchone()
434
+ if not row:
435
+ return _error_return('Item not found in database', current_item_id=current_item_id)
436
+ label, path = row
437
+
438
+ img_path = format_image_path(args.image_template, label, path)
439
+
440
+ # Filter out 'ALL WORDS PRESENT' from the selected words - if selected, means no words are missing
441
+ filtered_words = [w for w in (words_not_present or []) if w != 'ALL WORDS PRESENT']
442
+
443
+ # Record answer (filtered_words is the list of words not in the image, empty if 'ALL PRESENT' was selected)
444
+ db.record_answer(args.round_id, user_id, current_item_id, label, img_path, int(score_val), filtered_words)
445
+
446
+ # Update assignment status to completed
447
+ now = datetime.now(timezone.utc).isoformat()
448
+ with db._connect() as conn, db._lock:
449
+ conn.execute(
450
+ """
451
+ UPDATE assignments
452
+ SET status = 'completed', completed_at = ?, lease_until = NULL
453
+ WHERE round_id = ? AND item_id = ? AND user_id = ?
454
+ """,
455
+ (now, args.round_id, current_item_id, user_id),
456
+ )
457
+
458
+ # update redundancy_completed
459
+ conn.execute(
460
+ """
461
+ UPDATE task_config
462
+ SET redundancy_completed = (
463
+ SELECT COUNT(*) FROM assignments
464
+ WHERE round_id = ? AND item_id = ? AND status = 'completed'
465
+ )
466
+ WHERE round_id = ? AND item_id = ?
467
+ """,
468
+ (args.round_id, current_item_id, args.round_id, current_item_id),
469
+ )
470
+
471
+ items_list = get_user_items_list(user_id)
472
+ total = len(items_list)
473
+ completed_items = [it for it in items_list if it['status'] == 'completed']
474
+ done_count = len(completed_items)
475
+
476
+ # Find current index and go to the next item in order
477
+ current_idx_in_list = next(
478
+ (i for i, item in enumerate(items_list) if item['item_id'] == current_item_id), None
479
+ )
480
+
481
+ # db auto commit
482
+ if (
483
+ args.auto_commit > 0
484
+ and current_idx_in_list is not None
485
+ and (current_idx_in_list + 1) % args.auto_commit == 0
486
+ ):
487
+ try:
488
+ db.commit_and_push_db()
489
+ except Exception as e:
490
+ print(f'Failed to commit and push the database: {e}')
491
+
492
+ if current_idx_in_list is None or current_idx_in_list >= len(items_list) - 1:
493
+ # This was the last item - show completion message but stay on current item
494
+ gr.Info('Congratulations! All tasks completed. Thank you for participating! ☺️', duration=10)
495
+ # Reload the current (last) item to show it with answers
496
+ last_item = items_list[-1] if items_list else None
497
+ if last_item:
498
+ return _load_item_helper(user_id, last_item, items_list, len(items_list))
499
+ else:
500
+ # This should not happen (items_list empty after completion check)
501
+ gr.Info('All tasks completed. Thank you for participating!', duration=10)
502
+ return (
503
+ gr.update(visible=False),
504
+ gr.update(),
505
+ gr.update(),
506
+ gr.update(visible=True, value='All tasks completed. Thank you for participating!'),
507
+ gr.update(),
508
+ gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
509
+ current_item_id,
510
+ gr.update(),
511
+ )
512
+
513
+ # Go to the next item in sequence
514
+ next_item = items_list[current_idx_in_list + 1]
515
+ return _load_item_helper(user_id, next_item, items_list, current_idx_in_list + 2)
516
+
517
+ def _load_item_helper(
518
+ user_id: str, item: dict[str, Any], items_list: list[dict[str, Any]], current_idx: int
519
+ ) -> tuple:
520
+ """Load an item and return Gradio updates."""
521
+ item_id = item['item_id']
522
+ total = len(items_list)
523
+ completed_items = [it for it in items_list if it['status'] == 'completed']
524
+ done_count = len(completed_items)
525
+
526
+ # Handle different status: completed vs pending/in_progress
527
+ if item['status'] == 'completed':
528
+ # Completed item: no lease needed, just load existing answer
529
+ existing = db.get_answer(args.round_id, user_id, item_id)
530
+ if existing:
531
+ s, words_not_present_list = existing
532
+ s_val = str(int(s))
533
+ # If no words are missing (empty list), show 'ALL WORDS PRESENT' as selected
534
+ words_cb_val = words_not_present_list if words_not_present_list else ['ALL WORDS PRESENT']
535
+ else:
536
+ # This should not happen: completed item without answer
537
+ raise ValueError(f'Item {item_id} is marked as completed but has no answer in database')
538
+ else:
539
+ # Pending or in_progress: acquire lease
540
+ if item['status'] == 'pending':
541
+ # Update to in_progress
542
+ now_dt = datetime.now(timezone.utc)
543
+ lease_until = (now_dt + timedelta(seconds=600)).isoformat()
544
+ with db._connect() as conn, db._lock:
545
+ conn.execute(
546
+ """
547
+ UPDATE assignments
548
+ SET status = 'in_progress', started_at = ?, lease_until = ?
549
+ WHERE round_id = ? AND item_id = ? AND user_id = ?
550
+ """,
551
+ (now_dt.isoformat(), lease_until, args.round_id, item_id, user_id),
552
+ )
553
+ elif item['status'] == 'in_progress':
554
+ # Renew lease
555
+ now_dt = datetime.now(timezone.utc)
556
+ lease_until = (now_dt + timedelta(seconds=600)).isoformat()
557
+ with db._connect() as conn, db._lock:
558
+ conn.execute(
559
+ """
560
+ UPDATE assignments
561
+ SET lease_until = ?
562
+ WHERE round_id = ? AND item_id = ? AND user_id = ?
563
+ """,
564
+ (lease_until, args.round_id, item_id, user_id),
565
+ )
566
+
567
+ s_val, words_cb_val = None, []
568
+
569
+ img_path = format_image_path(args.image_template, item['label'], item['path'])
570
+ label_md_value = f'## Label: <span style="color: #00AA00; font-weight: bold;">{item["label"]}</span>'
571
+
572
+ return (
573
+ gr.update(value=img_path, visible=True),
574
+ gr.update(visible=True, value=label_md_value),
575
+ gr.update(visible=True, choices=item['words'] + ['ALL WORDS PRESENT'], value=words_cb_val),
576
+ gr.update(visible=False),
577
+ gr.update(visible=True, value=f'**At:** {current_idx}/{total}'),
578
+ gr.update(visible=True, value=f'**Done:** {done_count}/{total}'),
579
+ item_id,
580
+ gr.update(value=s_val),
581
+ )
582
+
583
+ def _find_item_by_index(user_state_val: dict[str, Any], index1: int) -> str | None:
584
+ """Get the item id by index in a user's assigned items."""
585
+ if not user_state_val or not user_state_val.get('user_id'):
586
+ return None
587
+ user_id = user_state_val['user_id']
588
+ items_list = get_user_items_list(user_id)
589
+ if index1 < 1 or index1 > len(items_list):
590
+ return None
591
+ return items_list[index1 - 1]['item_id']
592
+
593
+ def _load_item(user_state_val: dict[str, Any], item_id: str) -> ReturnTuple:
594
+ """Load and display an item."""
595
+ if not user_state_val or not user_state_val.get('user_id'):
596
+ return _error_return('Please start/continue', keep_current_item=False)
597
+
598
+ user_id = user_state_val['user_id']
599
+ items_list = get_user_items_list(user_id)
600
+
601
+ # Find the item
602
+ item = next((it for it in items_list if it['item_id'] == item_id), None)
603
+ if not item:
604
+ return _error_return('Item not found in your assignments', keep_current_item=False)
605
+
606
+ current_idx = next((i + 1 for i, it in enumerate(items_list) if it['item_id'] == item_id), 1)
607
+ return _load_item_helper(user_id, item, items_list, current_idx)
608
+
609
+ def jump_to(
610
+ user_state_val: dict[str, Any], index_number: float | None, current_item_id: str | None
611
+ ) -> ReturnTuple:
612
+ """Jump to the item by index (only completed items)."""
613
+ if not user_state_val or not user_state_val.get('user_id'):
614
+ return _error_return('Please start/continue', current_item_id=current_item_id)
615
+ if index_number is None:
616
+ return _error_return('Please input the index', current_item_id=current_item_id)
617
+
618
+ target_index = int(index_number)
619
+ target_item_id = _find_item_by_index(user_state_val, target_index)
620
+ if not target_item_id:
621
+ return _error_return('Index out of range', current_item_id=current_item_id)
622
+
623
+ # Check jump constraints
624
+ user_id = user_state_val['user_id']
625
+ items_list = get_user_items_list(user_id)
626
+ target_item = next((it for it in items_list if it['item_id'] == target_item_id), None)
627
+
628
+ if not target_item:
629
+ return _error_return('Target item not found', current_item_id=current_item_id)
630
+
631
+ # Find the first non-completed item index
632
+ first_pending_idx = next(
633
+ (i for i, it in enumerate(items_list) if it['status'] != 'completed'), len(items_list)
634
+ )
635
+
636
+ # Allow jump to: completed items OR the first non-completed item (but not beyond)
637
+ if target_item['status'] != 'completed' and target_index - 1 > first_pending_idx:
638
+ return _error_return(
639
+ 'Can only jump to completed items or the first pending item', current_item_id=current_item_id
640
+ )
641
+
642
+ return _load_item(user_state_val, target_item_id)
643
+
644
+ def handle_prev(user_state_val: dict[str, Any], current_item_id: str | None) -> ReturnTuple:
645
+ """Handle Prev button: navigate to previous item (completed items only)."""
646
+ if not user_state_val or not user_state_val.get('user_id'):
647
+ return _error_return('Please start/continue', current_item_id=current_item_id)
648
+
649
+ user_id = user_state_val['user_id']
650
+ items_list = get_user_items_list(user_id)
651
+ if not items_list:
652
+ return _error_return('No items assigned', keep_current_item=False)
653
+
654
+ if current_item_id is None:
655
+ return _error_return('No current item', current_item_id=current_item_id)
656
+
657
+ current_idx = next((i for i, it in enumerate(items_list) if it['item_id'] == current_item_id), None)
658
+ if current_idx is None:
659
+ return _error_return('Current item not found in your assignments', current_item_id=current_item_id)
660
+
661
+ if current_idx == 0:
662
+ return _error_return('Already at the first item', current_item_id=current_item_id)
663
+
664
+ target_idx = current_idx - 1
665
+ target_item = items_list[target_idx]
666
+
667
+ # Only allow navigating to completed items
668
+ if target_item['status'] != 'completed':
669
+ return _error_return(
670
+ 'Can only navigate to completed items using Prev', current_item_id=current_item_id
671
+ )
672
+
673
+ return _load_item(user_state_val, target_item['item_id'])
674
+
675
+ def handle_next(
676
+ user_state_val: dict[str, Any],
677
+ current_item_id: str | None,
678
+ ) -> ReturnTuple:
679
+ """Navigate to next item.
680
+
681
+ Rules:
682
+ - If current item is NOT completed, require submission first
683
+ - If current item is completed, allow free navigation
684
+ """
685
+ if not user_state_val or not user_state_val.get('user_id'):
686
+ return _error_return('Please start/continue', current_item_id=current_item_id)
687
+
688
+ user_id = user_state_val['user_id']
689
+ items_list = get_user_items_list(user_id)
690
+ if not items_list:
691
+ return _error_return('No items assigned', keep_current_item=False)
692
+
693
+ if current_item_id is None:
694
+ return _error_return('No current item', current_item_id=current_item_id)
695
+
696
+ current_idx = next((i for i, it in enumerate(items_list) if it['item_id'] == current_item_id), None)
697
+ if current_idx is None:
698
+ return _error_return('Current item not found in your assignments', current_item_id=current_item_id)
699
+
700
+ if current_idx >= len(items_list) - 1:
701
+ return _error_return('Already at the last item', current_item_id=current_item_id)
702
+
703
+ current_item = items_list[current_idx]
704
+ current_is_completed = current_item['status'] == 'completed'
705
+
706
+ # If current item is NOT completed, require submission
707
+ if not current_is_completed:
708
+ return _error_return(
709
+ 'Please submit your answer before moving to the next item', current_item_id=current_item_id
710
+ )
711
+
712
+ # Move to next item
713
+ target_idx = current_idx + 1
714
+ target_item = items_list[target_idx]
715
+
716
+ return _load_item(user_state_val, target_item['item_id'])
717
+
718
+ start_btn.click(
719
+ fn=start_or_resume,
720
+ inputs=[user_id_inp, user_state],
721
+ outputs=[
722
+ img,
723
+ label_md,
724
+ words_checkbox,
725
+ done_md,
726
+ current_idx_md,
727
+ progress_md,
728
+ user_state,
729
+ current_item_state,
730
+ score,
731
+ ],
732
+ queue=True,
733
+ )
734
+ submit_btn.click(
735
+ fn=submit,
736
+ inputs=[user_state, current_item_state, score, words_checkbox],
737
+ outputs=[
738
+ img,
739
+ label_md,
740
+ words_checkbox,
741
+ done_md,
742
+ current_idx_md,
743
+ progress_md,
744
+ current_item_state,
745
+ score,
746
+ ],
747
+ queue=True,
748
+ )
749
+
750
+ jump_btn.click(
751
+ fn=jump_to,
752
+ inputs=[user_state, jump_idx, current_item_state],
753
+ outputs=[
754
+ img,
755
+ label_md,
756
+ words_checkbox,
757
+ done_md,
758
+ current_idx_md,
759
+ progress_md,
760
+ current_item_state,
761
+ score,
762
+ ],
763
+ queue=True,
764
+ )
765
+
766
+ prev_btn.click(
767
+ fn=handle_prev,
768
+ inputs=[user_state, current_item_state],
769
+ outputs=[
770
+ img,
771
+ label_md,
772
+ words_checkbox,
773
+ done_md,
774
+ current_idx_md,
775
+ progress_md,
776
+ current_item_state,
777
+ score,
778
+ ],
779
+ queue=True,
780
+ )
781
+
782
+ next_btn.click(
783
+ fn=handle_next,
784
+ inputs=[user_state, current_item_state, score, words_checkbox],
785
+ outputs=[
786
+ img,
787
+ label_md,
788
+ words_checkbox,
789
+ done_md,
790
+ current_idx_md,
791
+ progress_md,
792
+ current_item_state,
793
+ score,
794
+ ],
795
+ queue=True,
796
+ )
797
+
798
+ basic_user = args.basic_user
799
+ basic_pass = args.basic_pass
800
+ auth_tuple = (basic_user, basic_pass)
801
+
802
+ # Extract image directory from template to add to allowed_paths
803
+ # e.g., "/path/to/img/{label}/{path}" -> "/path/to/img"
804
+ image_dir = args.image_template.split('{')[0].rstrip('/')
805
+ if image_dir:
806
+ allowed_paths = [image_dir]
807
+ else:
808
+ allowed_paths = None
809
+ print(f'Allowed paths: {allowed_paths}')
810
+
811
+ demo.queue(max_size=256).launch(
812
+ share=True,
813
+ auth=auth_tuple,
814
+ allowed_paths=allowed_paths,
815
+ )
src/instuction_md.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task instruction for icon evaluation."""
2
+
3
+ login_instruction = """
4
+ ## Login
5
+ Every time you start evaluating, input your user ID (e.g.: janeding) in the input box, and click the "Start/Resume" button. This will load your previous evaluation progress if you have logged in before.
6
+ """
7
+
8
+ q1_instruction = """
9
+ ## Question 1: Relevance Score (1-5)
10
+ Rate how relevant the icon is to the given label on a scale of 1 to 5:
11
+ - **5**: Strongly related (highly consistent with the label, clear match)
12
+ - **4**: Moderately related (clear partial connection, overall closer to related)
13
+ - **3**: Neutral/uncertain (ambiguous, could be seen as either related or unrelated)
14
+ - **2**: Weakly related (some minor or indirect connection, overall closer to unrelated)
15
+ - **1**: Completely unrelated (no clear connection)
16
+ """
17
+
18
+ q2_instruction = """
19
+ ## Question 2: Keyword Presence
20
+ Check the 10 keywords and indicate which ones are **NOT** visible in the icon image:
21
+ - Select **specific keywords** that are **missing** from the image, OR
22
+ - Select **"ALL WORDS PRESENT"** if all 10 keywords are visible in the image
23
+
24
+ You must select at least one option (either specific missing keywords or "ALL WORDS PRESENT")
25
+ """
26
+
27
+ important_notes_instruction = """
28
+ ## Notes
29
+ - Always **SUBMIT** your answer.
30
+ - You can resume from where you left off by logging in with the same User ID.
31
+ - You can modify your previous answers by navigating back.
32
+ """
33
+
34
+ nav_instruction = """
35
+ ## Navigation
36
+ - **Submit and Next**: Submit your answer and move to the next icon
37
+ - **Next**: Navigate forward (up to your latest progress)
38
+ - **Prev**: Navigate backward to review previous answers
39
+ - **Jump to**: Jump to a specific index (1-indexed, up to your latest progress)
40
+ """