Spaces:

Mueris
/

TurkishVLMTAMGAQA

Running

App Files Files Community

TurkishVLMTAMGAQA / app.py

Mueris

Update app.py

f7f1eba verified 14 days ago

raw

history blame

3.88 kB

	import gradio as gr
	from PIL import Image
	import torch
	import os

	from inference import load_for_inference, predict


	TAMGA_REPO = "Mueris/TurkishVLMTAMGA"
	tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO)


	from transformers import BlipProcessor, BlipForConditionalGeneration

	CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel"
	caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO)
	caption_model.to("cuda" if torch.cuda.is_available() else "cpu")
	caption_device = caption_model.device



	def answer(model_choice, image, question):

	if image is None:
	return "Lütfen bir görsel yükleyin."

	if model_choice == "TAMGA VQA":
	if not question.strip():
	return "Bu model soru gerektirir."
	pil_image = Image.fromarray(image)
	resp = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question)
	return f"Cevap: {resp}"

	else: # BLIP
	pil_image = Image.fromarray(image)
	inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device)
	output = caption_model.generate(**inputs, max_new_tokens=64)
	caption = caption_processor.decode(output[0], skip_special_tokens=True)
	return f"Açıklama: {caption}"



	def toggle_question(model_choice):
	if model_choice == "BLIP Caption (Fine-Tuned)":
	return gr.update(interactive=False, value="")
	return gr.update(interactive=True)



	def load_example_image(path):
	# Gallery returns a list (value, index), so handle list type
	if isinstance(path, list):
	path = path[0]

	if os.path.exists(path):
	return Image.open(path)
	return None



	css = """
	#col-container { max-width: 1100px; margin: auto; }
	.output-box {
	background-color:white; border-radius:10px;
	padding:15px; border:1px solid #d0d0d0;
	font-size:1.1rem; min-height:220px;
	}
	"""



	with gr.Blocks(css=css) as demo:

	gr.HTML("<h1 style='text-align:center;'>🇹🇷 TAMGA — Çok Modelli Görsel Dil Sistemi</h1>")
	gr.HTML("<div style='text-align:center;margin-bottom:20px;'>VQA veya BLIP modeli seçin.</div>")

	with gr.Row(elem_id="col-container"):

	with gr.Column(scale=1):

	model_choice = gr.Dropdown(
	choices=["TAMGA VQA", "BLIP Caption (Fine-Tuned)"],
	value="TAMGA VQA",
	label="🔧 Model Seç"
	)

	image = gr.Image(type="numpy", label="📷 Görsel Yükle")

	# GALLERY — strictly string paths only
	example_gallery = gr.Gallery(
	label="Örnek Görseller",
	columns=4,
	height="150px",
	preview=True
	)

	# <<< YOUR REAL FILES >>>
	example_gallery.value = [
	"examples/Bir_grup_asker.jpg",
	"examples/tank.jpg",
	"examples/ucak.jpg",
	"examples/ucaklar.jpeg",
	]

	question = gr.Textbox(
	label="Soru (VQA İçin)",
	placeholder="Örn: Bu araç ne sınıf?"
	)

	run_btn = gr.Button("Çalıştır", variant="primary")

	with gr.Column(scale=1):
	output = gr.Markdown(elem_classes="output-box")


	# Disable/enable question box based on model
	model_choice.change(toggle_question, inputs=model_choice, outputs=question)

	# Gallery selection loads the image
	example_gallery.select(
	fn=load_example_image,
	inputs=example_gallery,
	outputs=image
	)

	# Run model
	run_btn.click(
	fn=answer,
	inputs=[model_choice, image, question],
	outputs=output
	)


	# Required for HuggingFace Spaces
	demo.launch()