meghsn's picture
orby-agent-v0 (#8)
3011461 verified
raw
history blame
517 Bytes
[
{
"agent_name": "OrbyAgent-ActIO-72b",
"study_id": "b5fc5be7-54cc-4fc1-a9ee-73447b9c3eae",
"benchmark": "WebArena",
"score": 34.7,
"std_err": 0.25,
"benchmark_specific": "No",
"benchmark_tuned": "No",
"followed_evaluation_protocol": "Yes",
"reproducible": "Yes",
"comments": "Use original WebArena eval protocol and task definitions",
"original_or_reproduced": "Original",
"date_time": "2025-02-21 15:05:12"
}
]