Mueris's picture
Update app.py
f7f1eba verified
raw
history blame
3.88 kB
import gradio as gr
from PIL import Image
import torch
import os
from inference import load_for_inference, predict
TAMGA_REPO = "Mueris/TurkishVLMTAMGA"
tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO)
from transformers import BlipProcessor, BlipForConditionalGeneration
CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel"
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO)
caption_model.to("cuda" if torch.cuda.is_available() else "cpu")
caption_device = caption_model.device
def answer(model_choice, image, question):
if image is None:
return "**Lütfen bir görsel yükleyin.**"
if model_choice == "TAMGA VQA":
if not question.strip():
return "**Bu model soru gerektirir.**"
pil_image = Image.fromarray(image)
resp = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question)
return f"**Cevap:** {resp}"
else: # BLIP
pil_image = Image.fromarray(image)
inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device)
output = caption_model.generate(**inputs, max_new_tokens=64)
caption = caption_processor.decode(output[0], skip_special_tokens=True)
return f"**Açıklama:** {caption}"
def toggle_question(model_choice):
if model_choice == "BLIP Caption (Fine-Tuned)":
return gr.update(interactive=False, value="")
return gr.update(interactive=True)
def load_example_image(path):
# Gallery returns a list (value, index), so handle list type
if isinstance(path, list):
path = path[0]
if os.path.exists(path):
return Image.open(path)
return None
css = """
#col-container { max-width: 1100px; margin: auto; }
.output-box {
background-color:white; border-radius:10px;
padding:15px; border:1px solid #d0d0d0;
font-size:1.1rem; min-height:220px;
}
"""
with gr.Blocks(css=css) as demo:
gr.HTML("<h1 style='text-align:center;'>🇹🇷 TAMGA — Çok Modelli Görsel Dil Sistemi</h1>")
gr.HTML("<div style='text-align:center;margin-bottom:20px;'>VQA veya BLIP modeli seçin.</div>")
with gr.Row(elem_id="col-container"):
with gr.Column(scale=1):
model_choice = gr.Dropdown(
choices=["TAMGA VQA", "BLIP Caption (Fine-Tuned)"],
value="TAMGA VQA",
label="🔧 Model Seç"
)
image = gr.Image(type="numpy", label="📷 Görsel Yükle")
# GALLERY — strictly string paths only
example_gallery = gr.Gallery(
label="Örnek Görseller",
columns=4,
height="150px",
preview=True
)
# <<< YOUR REAL FILES >>>
example_gallery.value = [
"examples/Bir_grup_asker.jpg",
"examples/tank.jpg",
"examples/ucak.jpg",
"examples/ucaklar.jpeg",
]
question = gr.Textbox(
label="Soru (VQA İçin)",
placeholder="Örn: Bu araç ne sınıf?"
)
run_btn = gr.Button("Çalıştır", variant="primary")
with gr.Column(scale=1):
output = gr.Markdown(elem_classes="output-box")
# Disable/enable question box based on model
model_choice.change(toggle_question, inputs=model_choice, outputs=question)
# Gallery selection loads the image
example_gallery.select(
fn=load_example_image,
inputs=example_gallery,
outputs=image
)
# Run model
run_btn.click(
fn=answer,
inputs=[model_choice, image, question],
outputs=output
)
# Required for HuggingFace Spaces
demo.launch()