from pathlib import Path
import tempfile
from typing import BinaryIO
import json
import gradio as gr
from datetime import datetime, timezone
import uuid
from constants import API, SUBMISSIONS_REPO, REGISTRATION_CODE
from validation import validate_csv_file, validate_username
def upload_submission(
file_content: str,
user_state,
submission_type: str,
model_name: str,
model_description: str,
anonymous: bool = False,
):
"""Upload submission without validation (assumes validation already done)"""
timestamp = datetime.now(timezone.utc).isoformat()
date = datetime.now(timezone.utc).date().isoformat()
submission_id = str(uuid.uuid4())
# write to dataset
filename = f"{user_state}/{date}_{model_name}_{submission_id}.json" # Writing to a bit more structured name
# Note: This may be represented as Parquet in the future, so the schema needs to stay consistent
record = {
"submission_id": submission_id,
"submission_filename": filename,
"submission_time": timestamp,
"evaluated": False,
"user": user_state,
"model_name": model_name,
"model_description": model_description,
"csv_content": file_content,
"dataset": submission_type,
"anonymous": anonymous,
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
# json.dump(record, tmp, indent=2)
json.dump(
record, tmp
) # Note: No indent because indents and large text contents cause this error: https://github.com/huggingface/datasets/issues/3227
tmp.flush()
tmp_name = tmp.name
API.upload_file(
path_or_fileobj=tmp_name,
path_in_repo=filename,
repo_id=SUBMISSIONS_REPO,
repo_type="dataset",
commit_message=f"Add submission for {user_state} at {timestamp}",
)
Path(tmp_name).unlink()
def safe_read_encoding(path_obj: Path) -> str:
# Try utf-8 otherwise latin-1 (can tweak as needed)
try:
with path_obj.open("rb") as f:
return f.read().decode("utf-8")
except UnicodeDecodeError:
with path_obj.open("rb") as f:
return f.read().decode("latin-1")
def validate_file_requirements(file: BinaryIO, file_type: str) -> Path:
"""Validate basic file requirements and return Path object"""
file_path = file.name
if not file_path:
raise gr.Error(
f"Uploaded {file_type} file object does not have a valid file path."
)
path_obj = Path(file_path)
if path_obj.suffix.lower() != ".csv":
raise gr.Error(
f"{file_type} file must be a CSV file. Please upload a .csv file."
)
# Check it can be read
try:
safe_read_encoding(path_obj)
except Exception as e:
raise gr.Error(
f"Error reading {file_type} file {path_obj.name}. Could be an encoding issue if using Windows. Full error: {e}"
)
return path_obj
def make_submission(
cv_file: BinaryIO,
test_file: BinaryIO,
user_state,
model_name: str = "",
model_description: str = "",
anonymous: bool = False,
registration_code: str = "",
# profile: gr.OAuthProfile | None = None,
):
"""
Make submissions for both GDPa1 cross-validation and private test set files.
Both files are required. Validates both files before making any submissions.
"""
# if profile:
# user_state = profile.name
validate_username(user_state)
username = user_state.strip()
model_name = model_name.strip()
model_description = model_description.strip()
if not model_name:
raise gr.Error("Please provide a model name.")
if not model_description:
model_description = ""
if str(registration_code).strip().upper() != REGISTRATION_CODE:
raise gr.Error(
"Invalid registration code. Please register on the Competition Registration page or email antibodycompetition@ginkgobioworks.com."
)
if cv_file is None:
raise gr.Error(
"Please upload the GDPa1 Cross-Validation CSV file before submitting."
)
if test_file is None:
raise gr.Error("Please upload the Private Test Set CSV file before submitting.")
files = {}
# Validate CV file
cv_path = validate_file_requirements(cv_file, "GDPa1 Cross-Validation")
cv_content = safe_read_encoding(cv_path)
validate_csv_file(cv_content, "GDPa1_cross_validation")
files["cv"] = cv_content
# Validate test file
test_path = validate_file_requirements(test_file, "Private Test Set")
test_content = safe_read_encoding(test_path)
validate_csv_file(test_content, "Heldout Test Set")
files["test"] = test_content
# If validation passes, make submissions for both files
messages = []
for file_type, file_content in files.items():
if file_type == "cv":
submission_type = "GDPa1_cross_validation"
display_name = "Cross-Validation"
else: # file_type == "test"
submission_type = "Heldout Test Set"
display_name = "Test Set"
# Upload submission without re-validating (already done)
upload_submission(
file_content=file_content,
user_state=username,
submission_type=submission_type,
model_name=model_name,
model_description=model_description,
anonymous=anonymous,
)
messages.append(
f"✅ {display_name}: Your submission has been received! Your results should appear on the leaderboard within a minute. If they don't, please reach out to antibodycompetition@ginkgobioworks.com."
)
return "\n\n".join(messages)