import gradio as gr from PIL import Image import torch import os from inference import load_for_inference, predict # Load TAMGA VQA model TAMGA_REPO = "Mueris/TurkishVLMTAMGA" if 'load_for_inference' in globals(): tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO) else: print("Warning: inference.py functions not loaded. Using placeholder values.") tamga_model, tamga_tokenizer, tamga_device = None, None, 'cpu' # Load BLIP Caption Model from transformers import BlipProcessor, BlipForConditionalGeneration CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel" caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO) caption_model.to("cuda" if torch.cuda.is_available() else "cpu") caption_device = caption_model.device # Utility Functions def toggle_question_input(model_choice): if model_choice == "TAMGA VQA": return gr.update(visible=True), gr.update() else: return gr.update(visible=False), gr.update(value="") def select_quick_question(quick_question): if not quick_question: return gr.update(), gr.update() return gr.update(value=quick_question), gr.update(value=None) # Main Inference Function def answer(model_choice, image, question): if image is None: return "**Lütfen bir görsel yükleyin.**" # MODEL 1 — VQA if model_choice == "TAMGA VQA": if not question.strip(): return "**Bu model soru gerektirir. Lütfen bir soru yazın.**" if tamga_model is None: return "**Hata: TAMGA VQA modeli yüklenemedi. 'inference.py' dosyasını ve bağımlılıkları kontrol edin.**" pil_image = Image.fromarray(image) response = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question) return f"**Cevap:** {response}" # MODEL 2 — Captioning elif model_choice == "BLIP Caption (Fine-Tuned)": pil_image = Image.fromarray(image) inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device) output = caption_model.generate(**inputs, max_new_tokens=64) caption = caption_processor.decode(output[0], skip_special_tokens=True) return f"**Açıklama:** {caption}" return "**Model bulunamadı.**" # CSS css = """ #col-container { max-width: 1100px; margin-left: auto; margin-right: auto; } .gradio-container { background-color: #ffffff !important; } h1, h2, h3, p, label { color: #000000 !important; } /* VIBRANT ACCENT COLOR DEFINITION */ :root { --gradio-primary-500: #1E90FF; /* Dodger Blue - Vibrant Accent */ } #title { text-align: center; font-size: 2.2rem; font-weight: bold; margin-top: 20px; color: #1E90FF !important; /* Apply vibrant color to title */ } #subtitle { text-align: center; font-size: 1.1rem; color: #666666 !important; /* Darker grey subtitle */ margin-bottom: 25px; } /* Modern Input and Dropdown Styling */ .wrap, .input_text, .scroll-hide, .gradio-dropdown { border-radius: 8px !important; } /* Primary Button Styling */ button[variant="primary"] { background-color: #1E90FF !important; border-color: #1E90FF !important; color: white !important; transition: all 0.2s ease-in-out; border-radius: 8px !important; box-shadow: 0 4px 10px rgba(30, 144, 255, 0.4); /* Stronger shadow */ font-weight: bold; padding: 10px 20px; } button[variant="primary"]:hover { background-color: #0080FF !important; } /* Output Box Styling */ .output-box { background-color: #fcfcfc !important; /* Very subtle background */ border-radius: 12px; /* More rounded */ padding: 20px; border: 1px solid #bbdffc; /* Light blue border matching the accent */ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); /* Noticeable shadow */ color: #000000 !important; font-size: 1.1rem; min-height: 200px; } /* Custom CSS for image examples */ #image-examples .thumbnail-item { max-width: 100px !important; height: auto; } /* --- Quick Questions Radio Button Styling (Matching User Image) --- */ #quick-questions-label { margin-bottom: 8px; font-weight: 600; } #quick-questions > label { display: block; /* Sadece başlığı değil, tüm radyo grubunu hedefler */ } #quick-questions > label > div > fieldset { display: flex; /* Düğmeleri yatay hizalar */ flex-wrap: wrap; /* Gerekirse alt satıra geçmesini sağlar */ gap: 8px; /* Düğmeler arasında boşluk bırakır */ padding: 0; margin: 0; border: none; } /* Radyo Düğmesi Görünümü */ #quick-questions label.radio { border: 1px solid #ddd; border-radius: 8px; padding: 8px 12px; cursor: pointer; transition: all 0.2s ease-in-out; background-color: #f9f9f9; } /* Seçili Radyo Düğmesi Görünümü */ #quick-questions input[type="radio"]:checked + span { color: white; /* Seçili metni beyaz yapar */ background-color: #1E90FF; /* Vibrant Accent rengini uygular */ border-color: #1E90FF; box-shadow: 0 2px 5px rgba(30, 144, 255, 0.3); } /* Radyo düğmesi input'unu gizle */ #quick-questions input[type="radio"] { display: none; } /* Metin kutusunu ayarla */ #quick-questions label.radio span { padding: 0; margin: 0; display: inline-block; color: #333; font-weight: 500; line-height: 1.2; } #quick-questions input[type="radio"]:checked + span { color: white; } /* ----------------------------------------------------------------- */ """ # ----------------------- # VQA Question Examples # ----------------------- VQA_QUESTION_CHOICES = [ "Bu görselde kaç tane insan figürü var?", "Görselde ne görüyorsun?", "Fotoğrafta ne tür bir araç görülüyor?", "Bu görselde hava aracı var mı?" ] # UI Layout with gr.Blocks(css=css) as demo: gr.HTML("