import gradio as gr import requests import base64 import io from PIL import Image import os def encode_image_to_base64(image): """Convert PIL image to base64 string""" buffered = io.BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/jpeg;base64,{img_str}" def process_images_with_api(images, prompt, api_key): """ Process multiple images using Hugging Face Inference API Args: images: List of uploaded images prompt: User-provided prompt api_key: Hugging Face API key Returns: Generated descriptions """ if not images: return "Please upload at least one image." if not api_key: return "Please provide your Hugging Face API key." # API endpoint for Qwen2-VL model api_url = "https://api-inference.huggingface.co/models/Qwen/Qwen2-VL-7B-Instruct" headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } results = [] for i, image in enumerate(images): if image is None: continue try: # Convert numpy array to PIL Image pil_image = Image.fromarray(image) # Encode image to base64 base64_image = encode_image_to_base64(pil_image) # Prepare the request payload payload = { "inputs": [ { "role": "user", "content": [ { "type": "text", "text": prompt }, { "type": "image_url", "image_url": { "url": base64_image } } ] } ] } # Make API request response = requests.post(api_url, headers=headers, json=payload, timeout=60) if response.status_code == 200: result = response.json() if "choices" in result and len(result["choices"]) > 0: description = result["choices"][0]["message"]["content"] results.append(f"Image {i+1}: {description}") else: results.append(f"Image {i+1}: ❌ No response from API") else: error_msg = f"API Error (Status {response.status_code}): {response.text}" results.append(f"Image {i+1}: ❌ {error_msg}") except Exception as e: results.append(f"Image {i+1}: ❌ Error - {str(e)}") if not results: return "No valid images processed." return "\n\n".join(results) def create_gradio_interface(): """Create the Gradio interface for Hugging Face Spaces""" with gr.Blocks( title="Multi-Image AI Processor", theme=gr.themes.Soft(), fill_height=True ) as demo: gr.Markdown("# 🖼️ Multi-Image AI Processor") gr.Markdown("Upload multiple images and get AI-generated descriptions using the Qwen2-VL model via Hugging Face Inference API.") with gr.Row(): with gr.Column(scale=2): # Image upload area images_input = gr.File( file_count="multiple", file_types=["image"], label="Upload Images", height=300 ) # Prompt input prompt_input = gr.Textbox( label="Prompt", placeholder="Describe this image in detail...", value="Describe this image in detail.", lines=3 ) # API key input (required) api_key_input = gr.Textbox( label="Hugging Face API Key", placeholder="hf_...", type="password", info="Required: Get your API key from https://huggingface.co/settings/tokens" ) # Process button process_btn = gr.Button( "🚀 Process Images", variant="primary", size="lg" ) with gr.Column(scale=2): # Results area results_output = gr.Textbox( label="Results", lines=15, max_lines=25, interactive=False ) # Examples with gr.Accordion("Example Prompts", open=False): gr.Examples( examples=[ [ "Describe the architectural style and features of this building.", "Upload images of buildings to analyze their architectural style." ], [ "What are the key features and amenities shown in this property?", "Upload property images to get detailed descriptions of features and amenities." ], [ "Describe the interior design and layout of this space.", "Upload interior photos to get detailed descriptions of design and layout." ], [ "What type of property is this and what are its main characteristics?", "Upload property images to identify type and characteristics." ], [ "Describe the condition and quality of this property.", "Upload property images to assess condition and quality." ] ], inputs=[prompt_input], outputs=[results_output], label="Example Prompts" ) # Footer gr.Markdown("---") gr.Markdown(""" **How to use:** 1. Get your Hugging Face API key from https://huggingface.co/settings/tokens 2. Upload one or more images 3. Enter a prompt describing what you want to know about the images 4. Paste your API key 5. Click "Process Images" to get AI-generated descriptions **Tips:** - Use specific prompts for better results - The model works best with clear, high-quality images - You can process multiple images at once - Each image is processed individually with the same prompt """) # Connect the process button process_btn.click( fn=process_images_with_api, inputs=[images_input, prompt_input, api_key_input], outputs=[results_output] ) return demo # Create and launch the interface demo = create_gradio_interface() if __name__ == "__main__": demo.launch()