| [ | |
| { | |
| "agent_name": "OrbyAgent-ActIO-72b", | |
| "study_id": "b5fc5be7-54cc-4fc1-a9ee-73447b9c3eae", | |
| "benchmark": "WebArena", | |
| "score": 34.7, | |
| "std_err": 0.25, | |
| "benchmark_specific": "No", | |
| "benchmark_tuned": "No", | |
| "followed_evaluation_protocol": "Yes", | |
| "reproducible": "Yes", | |
| "comments": "Use original WebArena eval protocol and task definitions", | |
| "original_or_reproduced": "Original", | |
| "date_time": "2025-02-21 15:05:12" | |
| } | |
| ] |