Spaces:

Mueris
/

TurkishVLMTAMGAQA

Sleeping

App Files Files Community

Mueris commited on 10 days ago

Commit

f1db530

verified ·

1 Parent(s): ec3946b

Update app.py

Browse files

UI design fixes done

Files changed (1) hide show

app.py +25 -32

app.py CHANGED Viewed

@@ -3,26 +3,24 @@ from PIL import Image
 import torch
 import os
-# Assuming inference.py contains load_for_inference and predict
-# NOTE: inference.py must be present and contain these functions for VQA to work.
 from inference import load_for_inference, predict
-# -----------------------
 # Load TAMGA VQA model
-# -----------------------
 TAMGA_REPO = "Mueris/TurkishVLMTAMGA"
-# Added a check for the existence of the imported function
 if 'load_for_inference' in globals():
     tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO)
 else:
-    # Placeholder for running environment where inference module might not be fully available
     print("Warning: inference.py functions not loaded. Using placeholder values.")
     tamga_model, tamga_tokenizer, tamga_device = None, None, 'cpu'
-# -----------------------
 # Load BLIP Caption Model
-# -----------------------
 from transformers import BlipProcessor, BlipForConditionalGeneration
 CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel"
@@ -32,17 +30,15 @@ caption_model.to("cuda" if torch.cuda.is_available() else "cpu")
 caption_device = caption_model.device
-# -----------------------
 # Utility Functions
-# -----------------------
 def toggle_question_input(model_choice):
     if model_choice == "TAMGA VQA":
-        # VQA seçildiyse: Grubu GÖSTER, Metin kutusuna DOKUNMA (değişiklik yok)
         return gr.update(visible=True), gr.update()
     else:
-        # BLIP seçildiyse: Grubu GİZLE, Metin kutusunu TEMİZLE
         return gr.update(visible=False), gr.update(value="")
 def select_quick_question(quick_question):
@@ -52,9 +48,9 @@ def select_quick_question(quick_question):
     return gr.update(value=quick_question), gr.update(value=None)
-# -----------------------
 # Main Inference Function
-# -----------------------
 def answer(model_choice, image, question):
     if image is None:
@@ -70,8 +66,7 @@ def answer(model_choice, image, question):
              return "**Hata: TAMGA VQA modeli yüklenemedi. 'inference.py' dosyasını ve bağımlılıkları kontrol edin.**"
         pil_image = Image.fromarray(image)
-        # Note: tamga_device is determined by load_for_inference
-        # Assuming predict function is correctly implemented in inference.py
         response = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question)
         return f"**Cevap:** {response}"
@@ -79,9 +74,7 @@ def answer(model_choice, image, question):
     elif model_choice == "BLIP Caption (Fine-Tuned)":
         pil_image = Image.fromarray(image)
-        # Ensure device is correctly set for inputs
         inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device)
-        # Generate caption
         output = caption_model.generate(**inputs, max_new_tokens=64)
         caption = caption_processor.decode(output[0], skip_special_tokens=True)
         return f"**Açıklama:** {caption}"
@@ -89,9 +82,9 @@ def answer(model_choice, image, question):
     return "**Model bulunamadı.**"
-# -----------------------
-# CSS (Vibrant Theme Applied)
-# -----------------------
 css = """
 #col-container {
     max-width: 1100px;
@@ -224,19 +217,19 @@ button[variant="primary"]:hover {
 # -----------------------
 VQA_QUESTION_CHOICES = [
     "Bu görselde kaç tane insan figürü var?",
-    "Görseldeki baskın renk nedir?",
     "Fotoğrafta ne tür bir araç görülüyor?",
-    "Bu olayın gerçekleştiği mevsime dair ipuçları var mı?"
 ]
-# -----------------------
 # UI Layout
-# -----------------------
 with gr.Blocks(css=css) as demo:
-    gr.HTML("<div id='title'>🇹🇷 TAMGA — Çok Modelli Görsel Dil Sistemi</div>")
-    gr.HTML("<div id='subtitle'>VQA veya Fine-Tuned BLIP Captioning modellerinden birini seçin.</div>")
     with gr.Row(elem_id="col-container"):
@@ -260,7 +253,7 @@ with gr.Blocks(css=css) as demo:
             )
             # ----------------------
-            # Soru metin kutusu ve örnekleri bir gr.Group içine alıyoruz
             with gr.Group(visible=True) as vqa_inputs_group:
                 question = gr.Textbox(
                     label="Soru (Sadece VQA Modeli İçin)",
@@ -273,9 +266,9 @@ with gr.Blocks(css=css) as demo:
                 quick_question_radio = gr.Radio(
                     choices=VQA_QUESTION_CHOICES,
                     label="Hızlı Sorular",
-                    value=None, # Başlangıçta hiçbir şey seçili olmasın
                     elem_id="quick-questions",
-                    container=False # Label'ı yukarıda ayrı bir HTML ile kontrol ettiğimiz için
                 )
             # --------------------------------------
@@ -286,7 +279,7 @@ with gr.Blocks(css=css) as demo:
             output = gr.Markdown(elem_classes="output-box")
-    # Button click → run model
     submit_btn.click(
         fn=answer,
         inputs=[model_choice, image, question],
@@ -301,7 +294,7 @@ with gr.Blocks(css=css) as demo:
         outputs=[vqa_inputs_group, question],
         queue=False
     )
-    # -------------------------------------------------
 # --- Quick Question Selection Logic  ---

 import torch
 import os
 from inference import load_for_inference, predict
 # Load TAMGA VQA model
 TAMGA_REPO = "Mueris/TurkishVLMTAMGA"
 if 'load_for_inference' in globals():
     tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO)
 else:
     print("Warning: inference.py functions not loaded. Using placeholder values.")
     tamga_model, tamga_tokenizer, tamga_device = None, None, 'cpu'
 # Load BLIP Caption Model
 from transformers import BlipProcessor, BlipForConditionalGeneration
 CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel"
 caption_device = caption_model.device
 # Utility Functions
 def toggle_question_input(model_choice):
     if model_choice == "TAMGA VQA":
         return gr.update(visible=True), gr.update()
     else:
         return gr.update(visible=False), gr.update(value="")
 def select_quick_question(quick_question):
     return gr.update(value=quick_question), gr.update(value=None)
 # Main Inference Function
 def answer(model_choice, image, question):
     if image is None:
              return "**Hata: TAMGA VQA modeli yüklenemedi. 'inference.py' dosyasını ve bağımlılıkları kontrol edin.**"
         pil_image = Image.fromarray(image)
         response = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question)
         return f"**Cevap:** {response}"
     elif model_choice == "BLIP Caption (Fine-Tuned)":
         pil_image = Image.fromarray(image)
         inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device)
         output = caption_model.generate(**inputs, max_new_tokens=64)
         caption = caption_processor.decode(output[0], skip_special_tokens=True)
         return f"**Açıklama:** {caption}"
     return "**Model bulunamadı.**"
+# CSS
 css = """
 #col-container {
     max-width: 1100px;
 # -----------------------
 VQA_QUESTION_CHOICES = [
     "Bu görselde kaç tane insan figürü var?",
+    "Görselde ne görüyorsun?",
     "Fotoğrafta ne tür bir araç görülüyor?",
+    "Bu görselde hava aracı var mı?"
 ]
 # UI Layout
 with gr.Blocks(css=css) as demo:
+    gr.HTML("<div id='title'>🇹🇷 TAMGA — Çok Modelli Türkçe Görsel Dil Modeli</div>")
+    gr.HTML("<div id='subtitle'>TAMGA VQA (Soru Cevap) veya TAMGA Görsel Açıklama modellerinden birini seçin.</div>")
     with gr.Row(elem_id="col-container"):
             )
             # ----------------------
             with gr.Group(visible=True) as vqa_inputs_group:
                 question = gr.Textbox(
                     label="Soru (Sadece VQA Modeli İçin)",
                 quick_question_radio = gr.Radio(
                     choices=VQA_QUESTION_CHOICES,
                     label="Hızlı Sorular",
+                    value=None,
                     elem_id="quick-questions",
+                    container=False
                 )
             # --------------------------------------
             output = gr.Markdown(elem_classes="output-box")
+    # Button click  run model
     submit_btn.click(
         fn=answer,
         inputs=[model_choice, image, question],
         outputs=[vqa_inputs_group, question],
         queue=False
     )
 # --- Quick Question Selection Logic  ---