import gradio as gr from PIL import Image import torch import os from inference import load_for_inference, predict TAMGA_REPO = "Mueris/TurkishVLMTAMGA" tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO) from transformers import BlipProcessor, BlipForConditionalGeneration CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel" caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO) caption_model.to("cuda" if torch.cuda.is_available() else "cpu") caption_device = caption_model.device def answer(model_choice, image, question): if image is None: return "**Lütfen bir görsel yükleyin.**" if model_choice == "TAMGA VQA": if not question.strip(): return "**Bu model soru gerektirir.**" pil_image = Image.fromarray(image) resp = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question) return f"**Cevap:** {resp}" else: # BLIP pil_image = Image.fromarray(image) inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device) output = caption_model.generate(**inputs, max_new_tokens=64) caption = caption_processor.decode(output[0], skip_special_tokens=True) return f"**Açıklama:** {caption}" def toggle_question(model_choice): if model_choice == "BLIP Caption (Fine-Tuned)": return gr.update(interactive=False, value="") return gr.update(interactive=True) def load_example_image(path): # Gallery returns a list (value, index), so handle list type if isinstance(path, list): path = path[0] if os.path.exists(path): return Image.open(path) return None css = """ #col-container { max-width: 1100px; margin: auto; } .output-box { background-color:white; border-radius:10px; padding:15px; border:1px solid #d0d0d0; font-size:1.1rem; min-height:220px; } """ with gr.Blocks(css=css) as demo: gr.HTML("

🇹🇷 TAMGA — Çok Modelli Görsel Dil Sistemi

") gr.HTML("
VQA veya BLIP modeli seçin.
") with gr.Row(elem_id="col-container"): with gr.Column(scale=1): model_choice = gr.Dropdown( choices=["TAMGA VQA", "BLIP Caption (Fine-Tuned)"], value="TAMGA VQA", label="🔧 Model Seç" ) image = gr.Image(type="numpy", label="📷 Görsel Yükle") # GALLERY — strictly string paths only example_gallery = gr.Gallery( label="Örnek Görseller", columns=4, height="150px", preview=True ) # <<< YOUR REAL FILES >>> example_gallery.value = [ "examples/Bir_grup_asker.jpg", "examples/tank.jpg", "examples/ucak.jpg", "examples/ucaklar.jpeg", ] question = gr.Textbox( label="Soru (VQA İçin)", placeholder="Örn: Bu araç ne sınıf?" ) run_btn = gr.Button("Çalıştır", variant="primary") with gr.Column(scale=1): output = gr.Markdown(elem_classes="output-box") # Disable/enable question box based on model model_choice.change(toggle_question, inputs=model_choice, outputs=question) # Gallery selection loads the image example_gallery.select( fn=load_example_image, inputs=example_gallery, outputs=image ) # Run model run_btn.click( fn=answer, inputs=[model_choice, image, question], outputs=output ) # Required for HuggingFace Spaces demo.launch()