# handler.py from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer MODEL_ID = "Salesforce/codet5-base-multi-sum" # Load once at startup tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID) summarizer = pipeline( "text2text-generation", model=model, tokenizer=tokenizer, device=-1 # CPU ) def inference(inputs: dict): """ Hugging Face will call this for every request. `inputs` is like {"inputs": ""} """ patch = inputs.get("inputs", "") out = summarizer( patch, max_length=64, truncation=True, num_beams=4, early_stopping=True ) # Return in the HF expected format (list of dicts) return [{"summary_text": out[0]["generated_text"]}]