import re def create_gradio_anchor_id(text: str, validation) -> str: """ Replicates the ID format created by gr.Markdown(header_links=True). Example: "Paper Finder Validation" -> "h-paper-finder-validation" """ text = text.lower() text = re.sub(r'\s+', '-', text) # Replace spaces with hyphens text = re.sub(r'[^\w-]', '', text) # Remove non-word characters if validation: return f"h-{text}-leaderboard-1" return f"h-{text}-leaderboard" TITLE = """
AstaBench provides an aggregated view of agent performance and efficiency across all benchmarks in all four categories. We report:
This view is designed for quick comparison of general-purpose scientific agents. For more details on how we calculate scores and cost, please see the About Page.
""" SCATTER_DISCLAIMER = """ **Note:** Agents without cost data are displayed to the right of the vertical divider line. """ PARETO_DISCLAIMER = """ Agents names that are green are Pareto optimal, meaning they achieve the best performance for their cost. """ LIT_DESCRIPTION = """ The **Literature Understanding** category evaluates how well agents comprehend and interact with scientific literature—testing their ability to find research papers, assess citation quality, extract information from text, and more.The Allen Institute for Artificial Intelligence (Ai2) maintains this repository for agent evaluation submissions to AstaBench. To keep AstaBench fair and auditable, all evaluation logs and associated submission files will be made publicly available. This includes your benchmark inputs, model output responses, and other data and information related to your submission as needed to verify the results.
Your submissions to AstaBench will be posted, scored, and ranked on the leaderboard at https://huggingface.co/spaces/allenai/asta-bench-leaderboard. You agree you have the rights to the materials you submit and that you will not share any personal, sensitive, proprietary, or confidential information.
""" def format_error(msg): return f"{msg}
" def format_warning(msg): return f"{msg}
" def format_log(msg): return f"{msg}
" def hyperlink(link_url: str, text: str = "🔗") -> str: if not link_url or not isinstance(link_url, str): return str(text) # Or simply "" if link_url is bad return f'{text}' def hf_uri_to_web_url(uri: str) -> str: """ Convert a Hugging Face-style URI like: hf://datasets/{namespace}/{repo}/{path...} into a public web URL: https://huggingface.co/datasets/{namespace}/{repo}/tree/main/{path...} """ prefix = "hf://datasets/" if not uri.startswith(prefix): raise ValueError("URI must start with 'hf://datasets/'") parts = uri[len(prefix) :].split("/", 2) if len(parts) < 3: raise ValueError("Expected format: hf://datasets/{namespace}/{repo}/{path...}") namespace, repo, path = parts return f"https://huggingface.co/datasets/{namespace}/{repo}/tree/main/{path}" css = """ /* CSS Color Variables using Gradio theme */ :root { --color-primary-green: var(--primary-900); /* #0FCB8C */ --color-primary-pink: var(--secondary-900); /* #f0529c */ --color-neutral-light: var(--neutral-200); /* #C9C9C3 */ --color-background-light: var(--neutral-50); /* #FAF2E9 */ --color-background-dark: var(--neutral-900); /* #032629 */ --color-text-light: var(--neutral-50); /* #FAF2E9 */ } /* This makes space for the huggingface header bar which must shown on HF spaces. */ /* FIXME Media queries don't seem to survive rendering. */ /* @media (min-width: 768px) { ... } */ gradio-app { padding-top: 65px; } /* Global Styles */ h2 { overflow: hidden; } #intro-paragraph { font-size: 18px; max-width: 90%; padding-left: 35px; margin-top: 20px; } #intro-paragraph p, #intro-paragraph li { font-size: 16px; line-height: 1.8; } #intro-paragraph ul { margin-top: 20px; margin-bottom: 20px; } #diagram-image { height: 100%; } #diagram-image img { width: 100%; height: 100%; object-fit: cover; } #intro-category-paragraph { font-size: 18px; max-width: 90%; margin-top: 20px; } #intro-category-paragraph p, #intro-category-paragraph li { font-size: 16px; line-height: 1.8; } #intro-category-paragraph ul { margin-top: 20px; margin-bottom: 20px; } #about-content { font-size: 18px; max-width: 60%; padding-left: 25px; } #category-intro { font-size: 18px; max-width: 60%; } #logo-image { margin: 0; margin-bottom: 30px; justify-content: flex-start; max-width: 250px; height: auto; } #page-content-wrapper{ padding-left: 25px; } .table-component{ height: auto !important; max-height: none !important; } .table-wrap { max-height: none !important; height: auto !important; overflow-y: visible !important; } /* --- New Rules for Table Density --- */ table.gr-table th, table.gr-table td { padding: 4px 4px !important; width: 1%; white-space: nowrap; } table.svelte-1e98i6s td { vertical-align: top !important; } table.gr-table { font-size: 14px !important; } .html-container { padding-top: 0 !important; } #scatter-disclaimer { overflow: visible !important; } #pareto-disclaimer { color: #f0529c !important; } thead.svelte-1e98i6s th { background: white !important; } .dark thead.svelte-1e98i6s th { background: #091a1a !important; } .cell-wrap.svelte-v1pjjd { font-family: 'Manrope'; } nav.svelte-ti537g.svelte-ti537g { justify-content: flex-start; } .nav-holder { padding-left: 20px !important; } #legend-markdown span { margin-right: 15px !important; } #leaderboard-accordion .label-wrap { font-size: 1.4rem !important; z-index: 10 !important; position: relative !important; } .dark #leaderboard-accordion .label-wrap { color: #0FCB8C !important; } .dark block.svelte-1svsvh2 { background: #032629 !important; } .padding.svelte-phx28p { padding: 0 !important; } .sub-nav-bar-container { display: flex !important; flex-wrap: wrap !important; align-items: center !important; gap: 10px !important; } .dark .primary-link-button { color: var(--color-primary-green); } .primary-link-button { background: none; border: none; padding: 0; margin: 0; font-family: inherit; font-size: 16px; color: var(--color-primary-pink); text-decoration: none; cursor: pointer; white-space: nowrap; } .primary-link-button:hover { text-decoration: underline; } .sub-nav-label { font-weight: bold; font-size: 16px; display: flex; align-items: center; } .wrap-header-df th span{ white-space: normal !important; word-break: normal !important; overflow-wrap: break-word !important; line-height: 1.2 !important; vertical-align: top !important; font-size: 12px !important; font-family: 'Manrope'; } .wrap-header-df th { height: auto !important; } .wrap-header-df .cell-wrap img { width: 16px; height: 16px; vertical-align: middle; } #legend-markdown img { width: 16px; height: 16px; vertical-align: middle; } /*------ Global tooltip styles ------*/ .tooltip-icon { display: inline-block; cursor: help; position: relative; } .tooltip-icon::after { content: attr(data-tooltip); position: absolute; bottom: 125%; background-color: #105257; color: #fff; padding: 10px; border-radius: 4px; font-size: 12px; opacity: 0; transition: opacity 0.2s; white-space: pre-line; width: max-content; text-align: left; pointer-events: none; max-width: 300px; left: 50%; transform: translateX(-50%); z-index: 1000; } @media (max-width: 768px) { .tooltip-icon::after { max-width: 250px; } } .tooltip-icon:hover::after { opacity: 1; } /*------ Openness label tooltip styles ------*/ .styler, #openness-label-html, #agent-tooling-label-html { overflow: visible !important; } /*------ Table cell tooltip styles ------*/ .wrap.default.full, span.wrap[tabindex="0"][role="button"][data-editable="false"] { overflow: visible !important; } .cell-tooltip-icon::after { height: fit-content; top: 125%; } /*------ Table column description tooltip styles ------*/ #legend-markdown, #leaderboard-accordion { overflow: visible !important; } /* --- inside table tooltips --- */ .native-tooltip-icon { cursor: help; text-decoration: underline dotted 1px; } /* Main Nav bar styling */ .nav-holder nav { display: grid !important; grid-template-columns: auto auto auto auto auto 1fr auto auto !important; gap: 10px 20px !important; /* Vertical and horizontal spacing */ width: 100% !important; align-items: center; } .nav-holder nav a[href*="about"] { grid-row: 1 !important; grid-column: 7 !important; } .nav-holder nav a[href*="submit"] { grid-row: 1 !important; grid-column: 8 !important; white-space: nowrap !important; } /* Divider line between header and category nav */ .nav-holder nav::after { content: ''; /* Required for pseudo-elements to appear */ background-color: #C9C9C3; height: 1px; grid-row: 2 !important; grid-column: 1 / -1 !important; } /* Horizontal scrolling for navigation */ .nav-holder nav { overflow-x: auto; scrollbar-width: none; -ms-overflow-style: none; } .nav-holder nav::-webkit-scrollbar { display: none; } /* Category navigation buttons in row 3 */ .nav-holder nav a[href*="literature-understanding"], .nav-holder nav a[href*="code-execution"], .nav-holder nav a[href*="data-analysis"], .nav-holder nav a[href*="discovery"] { grid-row: 3 !important; justify-self: center !important; width: fit-content !important; white-space: nowrap; flex-shrink: 0; } .nav-holder nav a[href*="literature-understanding"] { grid-column: 1 !important; } .nav-holder nav a[href*="code-execution"] { grid-column: 2 !important; } .nav-holder nav a[href*="data-analysis"] { grid-column: 3 !important; } .nav-holder nav a[href*="discovery"] { grid-column: 4 !important; } /* Navigation hover styles */ .nav-holder nav a[href*="about"]:hover, .nav-holder nav a[href*="submit"]:hover, .nav-holder nav a[href*="literature-understanding"]:hover, .nav-holder nav a[href*="code-execution"]:hover, .nav-holder nav a[href*="data-analysis"]:hover, .nav-holder nav a[href*="discovery"]:hover { background-color: #FDF9F4; } .dark .nav-holder nav a[href*="about"]:hover, .dark .nav-holder nav a[href*="submit"]:hover, .dark .nav-holder nav a[href*="literature-understanding"]:hover, .dark .nav-holder nav a[href*="code-execution"]:hover, .dark .nav-holder nav a[href*="data-analysis"]:hover, .dark .nav-holder nav a[href*="discovery"]:hover { background-color: #1C3A3C; } .benchmark-main-subtitle{ color: var(--color-primary-green); overflow: hidden; padding-top: 120px; } .benchmark-title{ color: var(--color-primary-pink); margin-top: 50px; font-size: 20px; } .dark .benchmark-title{ color: var(--color-primary-green); } .benchmark-description { margin: 20px 0; max-width: 800px; } /*------ Submission Page CSS ------*/ #submission-modal .modal-container, #success-modal .modal-container { height: auto; max-width: 600px; } #submission-modal-content, #success-modal .submission-modal-content { padding: 20px; background-color: inherit; border-radius: 8px; text-align: center; } #submission-modal-content p, #success-modal .submission-modal-content p { font-size: 16px; } #legal-modal-content { padding: 30px; background-color: inherit; border-radius: 8px; text-align: left; font-size: 14px; } #legal-modal-content h2 { text-align: center; } #legal-modal-content button { width: fit-content; } .spinner-container { display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 30px; } .spinner { width: 50px; height: 50px; border: 5px solid #dee2e6; border-top: 5px solid #007bff; border-radius: 50%; animation: spin 1s linear infinite; margin-bottom: 20px; } @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } } #submission-page-container { max-width: 800px; margin: 0 auto; } #submission-file-label { padding: 10px; } #submission-button { max-width: fit-content; font-size: 14px; } .custom-form-group { border: 1px solid #000 !important; border-radius: 4px !important; padding: 24px !important; overflow: visible !important; } #openness-label-html, #agent-tooling-label-html, #agent-info-label-html, #submitter-info-label-html, #username-label-html, #email-label-html, #role-label-html { padding-left: 12px; } .form-label { margin: 4px 0px 0px 6px; } .form-label-fieldset { padding-top: 10px !important; } #agent-tooling-label-html { padding-top: 6px; } .custom-form-group, .styler { background: none; } #feedback-button { display: inline-block; background-color: #345d60; color: white; border: none; border-radius: 4px; padding: 15px 20px; font-size: 16px; cursor: pointer; transition: all 0.3s ease; text-decoration: none; } #feedback-button:hover { background-color: #5d888b; transform: translateY(-2px); box-shadow: 0 6px 12px rgba(0,0,0,0.3); } .dark #main-header h2 { color: #0fcb8c; } #main-header h2 { color: #f0529c; } /* --- New HTML-Based Tooltip Styles --- */ .tooltip-icon-legend { position: relative; cursor: help; display: inline-block; } /* The HTML pop-up card tooltips.*/ .tooltip-card { /* Hiding mechanism */ opacity: 0; visibility: hidden; transition: opacity 0.2s; pointer-events: none; /* Card appearance */ position: fixed; z-index: 1000; background-color: #083c40; color: #e5e7eb; border-radius: 12px; padding: 15px; width: max-content; max-width: 400px; text-align: left; } .tooltip-card.visible { opacity: 1; visibility: visible; } .tooltip-card h3 { font-size: 18px; color: #fff; margin-top: 0; margin-bottom: 12px; } .tooltip-card .tooltip-description { margin-bottom: 20px; line-height: 1.3; } .tooltip-card .tooltip-items-container { display: flex; flex-direction: column; gap: 10px; } .tooltip-card .tooltip-legend-item { display: flex; align-items: flex-start; gap: 10px; } .tooltip-card .tooltip-legend-item img { width: 20px; height: 20px; margin-top: 2px; } .tooltip-card .tooltip-legend-item div { display: flex; flex-direction: column; } .tooltip-card .tooltip-legend-item strong { font-weight: 600; color: #fff; } .tooltip-card .tooltip-legend-item span { font-size: 13px; line-height: 1.3; } .tooltip-sub-list { list-style-type: '• '; padding-left: 18px; font-size: 13px; line-height: 1.3; display: flex; flex-direction: column; } .table-legend-item { display: flex; align-items: center; white-space: nowrap; margin-top: 8px; flex-wrap: wrap; } /* About Page CSS */ #about-page-content-wrapper { margin-left: auto; margin-right: auto; max-width: 800px; padding: 0 24px; display: flex; flex-direction: column; gap: 40px; margin-top: 40px; opacity: 85%; margin-bottom: 60px; } .link-buttons-container { display: flex; flex-wrap: wrap; /* Allows buttons to stack on very narrow screens */ gap: 16px; margin-top: 16px; } .link-button { display: flex; justify-content: space-between; align-items: center; flex-grow: 1; background-color: #083c40; padding: 16px 20px; font-weight: 600; border-radius: 12px; text-decoration: none; transition: background-color 0.2s ease-in-out; } .link-button:hover { background-color: #0a4c52; } .external-link-icon { font-size: 20px; line-height: 1; margin-left: 12px; } #leaderboard-accordion table { width: auto !important; margin-right: auto !important; } .info-list { padding-left: 20px; } /* Smooth scrolling for the entire page */ html { scroll-behavior: smooth; } /* Home Page Styling */ .diagram-placeholder { width: 100%; height: 100%; min-height: 250px; display: flex; align-items: center; justify-content: center; background-color: #FAF2E9; color: #F0529C; border-radius: 8px; font-size: 14px; text-align: center; } /* 2. Responsive behavior for smaller screens */ @media (max-width: 900px) { #intro-row { flex-direction: column; } } /* Plot legend styles */ .plot-legend-container { min-height: 572px; background-color: #fff; padding: 24px 32px; border: 1px solid black; border-radius: 4px; } .dark .plot-legend-container { background: rgba(250, 242, 233, 0.1); border-color: rgb(159, 234, 209); } #plot-legend-logo { margin-bottom: 24px; } #plot-legend-logo img { height: 19px; } .plot-legend-category-heading { font-size: 16px; font-weight: 700; } .plot-legend-item { display: flex; margin-top: 8px; } .plot-legend-item-text .description { color: #888; font-size: 12px; } .plot-legend-item-svg { margin-top: 3px; width: 14px; height: 14px; margin-right: 8px; } .plot-legend-tooling-svg { height: 16px; width: 16px; margin-top: 2px; } #plot-legend-item-pareto-svg { width: 18px; height: 18px; margin-right: 2px; } h3 .header-link-icon { font-size: 12px; vertical-align: text-top; margin-left: 6px; text-decoration: none; } /* Targets all "overall stats" columns in the main leaderboard for each category */ #main-leaderboard td:nth-child(6) .prose, #main-leaderboard td:nth-child(7) .prose { font-weight: 700 !important; } """