from pathlib import Path import tempfile from typing import BinaryIO import json import gradio as gr from datetime import datetime, timezone import uuid from constants import API, SUBMISSIONS_REPO, REGISTRATION_CODE from validation import validate_csv_file, validate_username def upload_submission( file_content: str, user_state, submission_type: str, model_name: str, model_description: str, anonymous: bool = False, ): """Upload submission without validation (assumes validation already done)""" timestamp = datetime.now(timezone.utc).isoformat() date = datetime.now(timezone.utc).date().isoformat() submission_id = str(uuid.uuid4()) # write to dataset filename = f"{user_state}/{date}_{model_name}_{submission_id}.json" # Writing to a bit more structured name # Note: This may be represented as Parquet in the future, so the schema needs to stay consistent record = { "submission_id": submission_id, "submission_filename": filename, "submission_time": timestamp, "evaluated": False, "user": user_state, "model_name": model_name, "model_description": model_description, "csv_content": file_content, "dataset": submission_type, "anonymous": anonymous, } with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp: # json.dump(record, tmp, indent=2) json.dump( record, tmp ) # Note: No indent because indents and large text contents cause this error: https://github.com/huggingface/datasets/issues/3227 tmp.flush() tmp_name = tmp.name API.upload_file( path_or_fileobj=tmp_name, path_in_repo=filename, repo_id=SUBMISSIONS_REPO, repo_type="dataset", commit_message=f"Add submission for {user_state} at {timestamp}", ) Path(tmp_name).unlink() def safe_read_encoding(path_obj: Path) -> str: # Try utf-8 otherwise latin-1 (can tweak as needed) try: with path_obj.open("rb") as f: return f.read().decode("utf-8") except UnicodeDecodeError: with path_obj.open("rb") as f: return f.read().decode("latin-1") def validate_file_requirements(file: BinaryIO, file_type: str) -> Path: """Validate basic file requirements and return Path object""" file_path = file.name if not file_path: raise gr.Error( f"Uploaded {file_type} file object does not have a valid file path." ) path_obj = Path(file_path) if path_obj.suffix.lower() != ".csv": raise gr.Error( f"{file_type} file must be a CSV file. Please upload a .csv file." ) # Check it can be read try: safe_read_encoding(path_obj) except Exception as e: raise gr.Error( f"Error reading {file_type} file {path_obj.name}. Could be an encoding issue if using Windows. Full error: {e}" ) return path_obj def make_submission( cv_file: BinaryIO, test_file: BinaryIO, user_state, model_name: str = "", model_description: str = "", anonymous: bool = False, registration_code: str = "", # profile: gr.OAuthProfile | None = None, ): """ Make submissions for both GDPa1 cross-validation and private test set files. Both files are required. Validates both files before making any submissions. """ # if profile: # user_state = profile.name validate_username(user_state) username = user_state.strip() model_name = model_name.strip() model_description = model_description.strip() if not model_name: raise gr.Error("Please provide a model name.") if not model_description: model_description = "" if str(registration_code).strip().upper() != REGISTRATION_CODE: raise gr.Error( "Invalid registration code. Please register on the Competition Registration page or email antibodycompetition@ginkgobioworks.com." ) if cv_file is None: raise gr.Error( "Please upload the GDPa1 Cross-Validation CSV file before submitting." ) if test_file is None: raise gr.Error("Please upload the Private Test Set CSV file before submitting.") files = {} # Validate CV file cv_path = validate_file_requirements(cv_file, "GDPa1 Cross-Validation") cv_content = safe_read_encoding(cv_path) validate_csv_file(cv_content, "GDPa1_cross_validation") files["cv"] = cv_content # Validate test file test_path = validate_file_requirements(test_file, "Private Test Set") test_content = safe_read_encoding(test_path) validate_csv_file(test_content, "Heldout Test Set") files["test"] = test_content # If validation passes, make submissions for both files messages = [] for file_type, file_content in files.items(): if file_type == "cv": submission_type = "GDPa1_cross_validation" display_name = "Cross-Validation" else: # file_type == "test" submission_type = "Heldout Test Set" display_name = "Test Set" # Upload submission without re-validating (already done) upload_submission( file_content=file_content, user_state=username, submission_type=submission_type, model_name=model_name, model_description=model_description, anonymous=anonymous, ) messages.append( f"✅ {display_name}: Your submission has been received! Your results should appear on the leaderboard within a minute. If they don't, please reach out to antibodycompetition@ginkgobioworks.com." ) return "\n\n".join(messages)