import gradio as gr from PIL import Image import torch import os from inference import load_for_inference, predict TAMGA_REPO = "Mueris/TurkishVLMTAMGA" tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO) from transformers import BlipProcessor, BlipForConditionalGeneration CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel" caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO) caption_model.to("cuda" if torch.cuda.is_available() else "cpu") caption_device = caption_model.device def answer(model_choice, image, question): if image is None: return "**Lütfen bir görsel yükleyin.**" if model_choice == "TAMGA VQA": if not question.strip(): return "**Bu model soru gerektirir.**" pil_image = Image.fromarray(image) resp = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question) return f"**Cevap:** {resp}" else: # BLIP pil_image = Image.fromarray(image) inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device) output = caption_model.generate(**inputs, max_new_tokens=64) caption = caption_processor.decode(output[0], skip_special_tokens=True) return f"**Açıklama:** {caption}" def toggle_question(model_choice): if model_choice == "BLIP Caption (Fine-Tuned)": return gr.update(interactive=False, value="") return gr.update(interactive=True) def load_example_image(path): # Gallery returns a list (value, index), so handle list type if isinstance(path, list): path = path[0] if os.path.exists(path): return Image.open(path) return None css = """ #col-container { max-width: 1100px; margin: auto; } .output-box { background-color:white; border-radius:10px; padding:15px; border:1px solid #d0d0d0; font-size:1.1rem; min-height:220px; } """ with gr.Blocks(css=css) as demo: gr.HTML("