Spaces:
Running
Running
| import gradio as gr | |
| from PIL import Image | |
| import torch | |
| import os | |
| from inference import load_for_inference, predict | |
| TAMGA_REPO = "Mueris/TurkishVLMTAMGA" | |
| tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO) | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel" | |
| caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO) | |
| caption_model.to("cuda" if torch.cuda.is_available() else "cpu") | |
| caption_device = caption_model.device | |
| def answer(model_choice, image, question): | |
| if image is None: | |
| return "**Lütfen bir görsel yükleyin.**" | |
| if model_choice == "TAMGA VQA": | |
| if not question.strip(): | |
| return "**Bu model soru gerektirir.**" | |
| pil_image = Image.fromarray(image) | |
| resp = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question) | |
| return f"**Cevap:** {resp}" | |
| else: # BLIP | |
| pil_image = Image.fromarray(image) | |
| inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device) | |
| output = caption_model.generate(**inputs, max_new_tokens=64) | |
| caption = caption_processor.decode(output[0], skip_special_tokens=True) | |
| return f"**Açıklama:** {caption}" | |
| def toggle_question(model_choice): | |
| if model_choice == "BLIP Caption (Fine-Tuned)": | |
| return gr.update(interactive=False, value="") | |
| return gr.update(interactive=True) | |
| def load_example_image(path): | |
| # Gallery returns a list (value, index), so handle list type | |
| if isinstance(path, list): | |
| path = path[0] | |
| if os.path.exists(path): | |
| return Image.open(path) | |
| return None | |
| css = """ | |
| #col-container { max-width: 1100px; margin: auto; } | |
| .output-box { | |
| background-color:white; border-radius:10px; | |
| padding:15px; border:1px solid #d0d0d0; | |
| font-size:1.1rem; min-height:220px; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| gr.HTML("<h1 style='text-align:center;'>🇹🇷 TAMGA — Çok Modelli Görsel Dil Sistemi</h1>") | |
| gr.HTML("<div style='text-align:center;margin-bottom:20px;'>VQA veya BLIP modeli seçin.</div>") | |
| with gr.Row(elem_id="col-container"): | |
| with gr.Column(scale=1): | |
| model_choice = gr.Dropdown( | |
| choices=["TAMGA VQA", "BLIP Caption (Fine-Tuned)"], | |
| value="TAMGA VQA", | |
| label="🔧 Model Seç" | |
| ) | |
| image = gr.Image(type="numpy", label="📷 Görsel Yükle") | |
| # GALLERY — strictly string paths only | |
| example_gallery = gr.Gallery( | |
| label="Örnek Görseller", | |
| columns=4, | |
| height="150px", | |
| preview=True | |
| ) | |
| # <<< YOUR REAL FILES >>> | |
| example_gallery.value = [ | |
| "examples/Bir_grup_asker.jpg", | |
| "examples/tank.jpg", | |
| "examples/ucak.jpg", | |
| "examples/ucaklar.jpeg", | |
| ] | |
| question = gr.Textbox( | |
| label="Soru (VQA İçin)", | |
| placeholder="Örn: Bu araç ne sınıf?" | |
| ) | |
| run_btn = gr.Button("Çalıştır", variant="primary") | |
| with gr.Column(scale=1): | |
| output = gr.Markdown(elem_classes="output-box") | |
| # Disable/enable question box based on model | |
| model_choice.change(toggle_question, inputs=model_choice, outputs=question) | |
| # Gallery selection loads the image | |
| example_gallery.select( | |
| fn=load_example_image, | |
| inputs=example_gallery, | |
| outputs=image | |
| ) | |
| # Run model | |
| run_btn.click( | |
| fn=answer, | |
| inputs=[model_choice, image, question], | |
| outputs=output | |
| ) | |
| # Required for HuggingFace Spaces | |
| demo.launch() | |