Spaces:

amberborici
/

Qwen-Qwen2-VL-7B-Instruct

Sleeping

App Files Files Community

amberborici commited on Aug 8

Commit

f03a25b

1 Parent(s): 02a4024

fix

Browse files

Files changed (2) hide show

__pycache__/app.cpython-310.pyc +0 -0
app.py +110 -180

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (7.11 kB). View file

app.py CHANGED Viewed

@@ -1,167 +1,102 @@
-#!/usr/bin/env python3
-"""
-Gradio interface for multi-image processing with Qwen2-VL model
-"""
 import gradio as gr
-import os
-import sys
-from pathlib import Path
-import logging
-from typing import List, Dict, Any
-import asyncio
-from PIL import Image
-import io
 import base64
-# Add the backend directory to Python path
-backend_dir = Path(__file__).parent
-sys.path.append(str(backend_dir))
-# Import the image processor
-try:
-    from app.services.ai.images.image_processor import ImageProcessor
-except ImportError as e:
-    print(f"Import error: {e}")
-    # Fallback for direct execution
-    ImageProcessor = None
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class GradioImageProcessor:
-    def __init__(self, use_api: bool = True, api_key: str = None):
-        """
-        Initialize the Gradio image processor
-        Args:
-            use_api: Whether to use API mode (True) or local model (False)
-            api_key: Hugging Face API key for API mode
-        """
-        self.use_api = use_api
-        self.api_key = api_key or os.getenv("HF_API_KEY")
-        if use_api and not self.api_key:
-            logger.warning("No API key provided. Please set HF_API_KEY environment variable.")
-        # Initialize the image processor
-        if ImageProcessor:
-            try:
-                self.processor = ImageProcessor()
-                logger.info("Image processor initialized successfully")
-            except Exception as e:
-                logger.error(f"Failed to initialize image processor: {e}")
-                self.processor = None
-        else:
-            self.processor = None
-            logger.warning("ImageProcessor not available")
-    def process_single_image(self, image: Image.Image, prompt: str = "Describe this image in detail.") -> str:
-        """
-        Process a single image with the given prompt
-        Args:
-            image: PIL Image object
-            prompt: Text prompt for the model
-        Returns:
-            Generated description
-        """
-        if not self.processor:
-            return "❌ Image processor not available"
         try:
-            # Convert PIL image to bytes for processing
-            img_byte_arr = io.BytesIO()
-            image.save(img_byte_arr, format='JPEG')
-            img_byte_arr = img_byte_arr.getvalue()
-            # Create a temporary file-like object
-            img_io = io.BytesIO(img_byte_arr)
-            img_io.seek(0)
-            # Process the image
-            result = self.processor._process_single_image(
-                image_data=img_io,
-                prompt_text=prompt
-            )
-            return result
         except Exception as e:
-            logger.error(f"Error processing image: {e}")
-            return f"❌ Error processing image: {str(e)}"
-    def process_multiple_images(self, images: List[Image.Image], prompt: str = "Describe each image in detail.") -> List[str]:
-        """
-        Process multiple images with the same prompt
-        Args:
-            images: List of PIL Image objects
-            prompt: Text prompt for the model
-        Returns:
-            List of generated descriptions
-        """
-        if not self.processor:
-            return ["❌ Image processor not available"] * len(images)
-        results = []
-        for i, image in enumerate(images):
-            try:
-                result = self.process_single_image(image, prompt)
-                results.append(f"Image {i+1}: {result}")
-            except Exception as e:
-                logger.error(f"Error processing image {i+1}: {e}")
-                results.append(f"Image {i+1}: ❌ Error - {str(e)}")
-        return results
 def create_gradio_interface():
-    """
-    Create the Gradio interface
-    """
-    # Initialize the processor
-    processor = GradioImageProcessor(use_api=True)
-    def process_images(images, prompt, api_key):
-        """
-        Process uploaded images
-        Args:
-            images: List of uploaded images
-            prompt: User-provided prompt
-            api_key: Optional API key override
-        Returns:
-            List of descriptions
-        """
-        if not images:
-            return "Please upload at least one image."
-        # Update API key if provided
-        if api_key:
-            processor.api_key = api_key
-        # Convert Gradio images to PIL Images
-        pil_images = []
-        for img in images:
-            if img is not None:
-                pil_images.append(Image.fromarray(img))
-        if not pil_images:
-            return "No valid images found."
-        # Process images
-        results = processor.process_multiple_images(pil_images, prompt)
-        # Format results
-        if len(results) == 1:
-            return results[0]
-        else:
-            return "\n\n".join(results)
-    # Create the interface
     with gr.Blocks(
         title="Multi-Image AI Processor",
         theme=gr.themes.Soft(),
@@ -169,7 +104,7 @@ def create_gradio_interface():
     ) as demo:
         gr.Markdown("# 🖼️ Multi-Image AI Processor")
-        gr.Markdown("Upload multiple images and get AI-generated descriptions using the Qwen2-VL model.")
         with gr.Row():
             with gr.Column(scale=2):
@@ -189,12 +124,12 @@ def create_gradio_interface():
                     lines=3
                 )
-                # API key input (optional)
                 api_key_input = gr.Textbox(
-                    label="Hugging Face API Key (optional)",
                     placeholder="hf_...",
                     type="password",
-                    info="Leave empty to use environment variable"
                 )
                 # Process button
@@ -208,26 +143,34 @@ def create_gradio_interface():
                 # Results area
                 results_output = gr.Textbox(
                     label="Results",
-                    lines=10,
-                    max_lines=20,
                     interactive=False
                 )
         # Examples
-        with gr.Accordion("Examples", open=False):
             gr.Examples(
                 examples=[
                     [
-                        ["Describe the architectural style and features of this building."],
                         "Upload images of buildings to analyze their architectural style."
                     ],
                     [
-                        ["What are the key features and amenities shown in this property?"],
                         "Upload property images to get detailed descriptions of features and amenities."
                     ],
                     [
-                        ["Describe the interior design and layout of this space."],
                         "Upload interior photos to get detailed descriptions of design and layout."
                     ]
                 ],
                 inputs=[prompt_input],
@@ -239,43 +182,30 @@ def create_gradio_interface():
         gr.Markdown("---")
         gr.Markdown("""
         **How to use:**
-        1. Upload one or more images
-        2. Enter a prompt describing what you want to know about the images
-        3. Optionally provide your Hugging Face API key
-        4. Click "Process Images" to get AI-generated descriptions
         **Tips:**
         - Use specific prompts for better results
         - The model works best with clear, high-quality images
         - You can process multiple images at once
         """)
         # Connect the process button
         process_btn.click(
-            fn=process_images,
             inputs=[images_input, prompt_input, api_key_input],
             outputs=[results_output]
         )
     return demo
-def main():
-    """
-    Main function to launch the Gradio app
-    """
-    print("🚀 Starting Multi-Image AI Processor...")
-    # Create the interface
-    demo = create_gradio_interface()
-    # Launch the app
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True,
-        show_tips=True
-    )
 if __name__ == "__main__":
-    main()

 import gradio as gr
+import requests
 import base64
+import io
+from PIL import Image
+import os
+def encode_image_to_base64(image):
+    """Convert PIL image to base64 string"""
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    return f"data:image/jpeg;base64,{img_str}"
+def process_images_with_api(images, prompt, api_key):
+    """
+    Process multiple images using Hugging Face Inference API
+    Args:
+        images: List of uploaded images
+        prompt: User-provided prompt
+        api_key: Hugging Face API key
+    Returns:
+        Generated descriptions
+    """
+    if not images:
+        return "Please upload at least one image."
+    if not api_key:
+        return "Please provide your Hugging Face API key."
+    # API endpoint for Qwen2-VL model
+    api_url = "https://api-inference.huggingface.co/models/Qwen/Qwen2-VL-7B-Instruct"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    results = []
+    for i, image in enumerate(images):
+        if image is None:
+            continue
         try:
+            # Convert numpy array to PIL Image
+            pil_image = Image.fromarray(image)
+            # Encode image to base64
+            base64_image = encode_image_to_base64(pil_image)
+            # Prepare the request payload
+            payload = {
+                "inputs": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": prompt
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": base64_image
+                                }
+                            }
+                        ]
+                    }
+                ]
+            }
+            # Make API request
+            response = requests.post(api_url, headers=headers, json=payload, timeout=60)
+            if response.status_code == 200:
+                result = response.json()
+                if "choices" in result and len(result["choices"]) > 0:
+                    description = result["choices"][0]["message"]["content"]
+                    results.append(f"Image {i+1}: {description}")
+                else:
+                    results.append(f"Image {i+1}: ❌ No response from API")
+            else:
+                error_msg = f"API Error (Status {response.status_code}): {response.text}"
+                results.append(f"Image {i+1}: ❌ {error_msg}")
         except Exception as e:
+            results.append(f"Image {i+1}: ❌ Error - {str(e)}")
+    if not results:
+        return "No valid images processed."
+    return "\n\n".join(results)
 def create_gradio_interface():
+    """Create the Gradio interface for Hugging Face Spaces"""
     with gr.Blocks(
         title="Multi-Image AI Processor",
         theme=gr.themes.Soft(),
     ) as demo:
         gr.Markdown("# 🖼️ Multi-Image AI Processor")
+        gr.Markdown("Upload multiple images and get AI-generated descriptions using the Qwen2-VL model via Hugging Face Inference API.")
         with gr.Row():
             with gr.Column(scale=2):
                     lines=3
                 )
+                # API key input (required)
                 api_key_input = gr.Textbox(
+                    label="Hugging Face API Key",
                     placeholder="hf_...",
                     type="password",
+                    info="Required: Get your API key from https://huggingface.co/settings/tokens"
                 )
                 # Process button
                 # Results area
                 results_output = gr.Textbox(
                     label="Results",
+                    lines=15,
+                    max_lines=25,
                     interactive=False
                 )
         # Examples
+        with gr.Accordion("Example Prompts", open=False):
             gr.Examples(
                 examples=[
                     [
+                        "Describe the architectural style and features of this building.",
                         "Upload images of buildings to analyze their architectural style."
                     ],
                     [
+                        "What are the key features and amenities shown in this property?",
                         "Upload property images to get detailed descriptions of features and amenities."
                     ],
                     [
+                        "Describe the interior design and layout of this space.",
                         "Upload interior photos to get detailed descriptions of design and layout."
+                    ],
+                    [
+                        "What type of property is this and what are its main characteristics?",
+                        "Upload property images to identify type and characteristics."
+                    ],
+                    [
+                        "Describe the condition and quality of this property.",
+                        "Upload property images to assess condition and quality."
                     ]
                 ],
                 inputs=[prompt_input],
         gr.Markdown("---")
         gr.Markdown("""
         **How to use:**
+        1. Get your Hugging Face API key from https://huggingface.co/settings/tokens
+        2. Upload one or more images
+        3. Enter a prompt describing what you want to know about the images
+        4. Paste your API key
+        5. Click "Process Images" to get AI-generated descriptions
         **Tips:**
         - Use specific prompts for better results
         - The model works best with clear, high-quality images
         - You can process multiple images at once
+        - Each image is processed individually with the same prompt
         """)
         # Connect the process button
         process_btn.click(
+            fn=process_images_with_api,
             inputs=[images_input, prompt_input, api_key_input],
             outputs=[results_output]
         )
     return demo
+# Create and launch the interface
+demo = create_gradio_interface()
 if __name__ == "__main__":
+    demo.launch()