Spaces:

andstor
/

Florence-2

Runtime error

App Files Files Community

andstor commited on Sep 13

Commit

a5819b5

verified ·

1 Parent(s): 5ab5afc

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -70

app.py CHANGED Viewed

@@ -146,106 +146,98 @@ def draw_ocr_bboxes(image, prediction):
                   fill=color)
     return image
 def process_image(image, task_prompt, text_input=None, model_id='microsoft/Florence-2-large'):
     image = Image.fromarray(image)  # Convert NumPy array to PIL Image
     if task_prompt == 'Caption':
-        task_prompt = '<CAPTION>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        return results, None
     elif task_prompt == 'Detailed Caption':
-        task_prompt = '<DETAILED_CAPTION>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        return results, None
     elif task_prompt == 'More Detailed Caption':
-        task_prompt = '<MORE_DETAILED_CAPTION>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        return results, None
     elif task_prompt == 'Caption + Grounding':
-        task_prompt = '<CAPTION>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        text_input = results[task_prompt]
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
-        results = run_example(task_prompt, image, text_input, model_id)
-        results['<CAPTION>'] = text_input
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
-        return results, fig_to_pil(fig)
     elif task_prompt == 'Detailed Caption + Grounding':
-        task_prompt = '<DETAILED_CAPTION>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        text_input = results[task_prompt]
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
-        results = run_example(task_prompt, image, text_input, model_id)
-        results['<DETAILED_CAPTION>'] = text_input
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
-        return results, fig_to_pil(fig)
     elif task_prompt == 'More Detailed Caption + Grounding':
-        task_prompt = '<MORE_DETAILED_CAPTION>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        text_input = results[task_prompt]
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
-        results = run_example(task_prompt, image, text_input, model_id)
-        results['<MORE_DETAILED_CAPTION>'] = text_input
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
-        return results, fig_to_pil(fig)
     elif task_prompt == 'Object Detection':
-        task_prompt = '<OD>'
-        results = run_example(task_prompt, image, model_id=model_id)
         fig = plot_bbox(image, results['<OD>'])
-        return results, fig_to_pil(fig)
     elif task_prompt == 'Dense Region Caption':
-        task_prompt = '<DENSE_REGION_CAPTION>'
-        results = run_example(task_prompt, image, model_id=model_id)
         fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
-        return results, fig_to_pil(fig)
     elif task_prompt == 'Region Proposal':
-        task_prompt = '<REGION_PROPOSAL>'
-        results = run_example(task_prompt, image, model_id=model_id)
         fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
-        return results, fig_to_pil(fig)
     elif task_prompt == 'Caption to Phrase Grounding':
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
-        results = run_example(task_prompt, image, text_input, model_id)
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
-        return results, fig_to_pil(fig)
     elif task_prompt == 'Referring Expression Segmentation':
-        task_prompt = '<REFERRING_EXPRESSION_SEGMENTATION>'
-        results = run_example(task_prompt, image, text_input, model_id)
-        output_image = copy.deepcopy(image)
-        output_image = draw_polygons(output_image, results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
-        return results, output_image
     elif task_prompt == 'Region to Segmentation':
-        task_prompt = '<REGION_TO_SEGMENTATION>'
-        results = run_example(task_prompt, image, text_input, model_id)
-        output_image = copy.deepcopy(image)
-        output_image = draw_polygons(output_image, results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
-        return results, output_image
     elif task_prompt == 'Open Vocabulary Detection':
-        task_prompt = '<OPEN_VOCABULARY_DETECTION>'
-        results = run_example(task_prompt, image, text_input, model_id)
         bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
         fig = plot_bbox(image, bbox_results)
-        return results, fig_to_pil(fig)
     elif task_prompt == 'Region to Category':
-        task_prompt = '<REGION_TO_CATEGORY>'
-        results = run_example(task_prompt, image, text_input, model_id)
-        return results, None
     elif task_prompt == 'Region to Description':
-        task_prompt = '<REGION_TO_DESCRIPTION>'
-        results = run_example(task_prompt, image, text_input, model_id)
-        return results, None
     elif task_prompt == 'OCR':
-        task_prompt = '<OCR>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        return results, None
     elif task_prompt == 'OCR with Region':
-        task_prompt = '<OCR_WITH_REGION>'
-        results = run_example(task_prompt, image, model_id=model_id)
-        output_image = copy.deepcopy(image)
-        output_image = draw_ocr_bboxes(output_image, results['<OCR_WITH_REGION>'])
-        return results, output_image
     else:
-        return "", None  # Return empty string and None for unknown task prompts
 css = """
 #col-container {

                   fill=color)
     return image
+import json
 def process_image(image, task_prompt, text_input=None, model_id='microsoft/Florence-2-large'):
     image = Image.fromarray(image)  # Convert NumPy array to PIL Image
+    results = {}
+    output_image = None
     if task_prompt == 'Caption':
+        results = run_example('<CAPTION>', image, model_id=model_id)
     elif task_prompt == 'Detailed Caption':
+        results = run_example('<DETAILED_CAPTION>', image, model_id=model_id)
     elif task_prompt == 'More Detailed Caption':
+        results = run_example('<MORE_DETAILED_CAPTION>', image, model_id=model_id)
     elif task_prompt == 'Caption + Grounding':
+        caption = run_example('<CAPTION>', image, model_id=model_id)['<CAPTION>']
+        results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
+        results['<CAPTION>'] = caption
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'Detailed Caption + Grounding':
+        caption = run_example('<DETAILED_CAPTION>', image, model_id=model_id)['<DETAILED_CAPTION>']
+        results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
+        results['<DETAILED_CAPTION>'] = caption
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'More Detailed Caption + Grounding':
+        caption = run_example('<MORE_DETAILED_CAPTION>', image, model_id=model_id)['<MORE_DETAILED_CAPTION>']
+        results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
+        results['<MORE_DETAILED_CAPTION>'] = caption
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'Object Detection':
+        results = run_example('<OD>', image, model_id=model_id)
         fig = plot_bbox(image, results['<OD>'])
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'Dense Region Caption':
+        results = run_example('<DENSE_REGION_CAPTION>', image, model_id=model_id)
         fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'Region Proposal':
+        results = run_example('<REGION_PROPOSAL>', image, model_id=model_id)
         fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'Caption to Phrase Grounding':
+        results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, text_input, model_id)
         fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'Referring Expression Segmentation':
+        results = run_example('<REFERRING_EXPRESSION_SEGMENTATION>', image, text_input, model_id)
+        output_image = draw_polygons(image.copy(), results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
     elif task_prompt == 'Region to Segmentation':
+        results = run_example('<REGION_TO_SEGMENTATION>', image, text_input, model_id)
+        output_image = draw_polygons(image.copy(), results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
     elif task_prompt == 'Open Vocabulary Detection':
+        results = run_example('<OPEN_VOCABULARY_DETECTION>', image, text_input, model_id)
         bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
         fig = plot_bbox(image, bbox_results)
+        output_image = fig_to_pil(fig)
     elif task_prompt == 'Region to Category':
+        results = run_example('<REGION_TO_CATEGORY>', image, text_input, model_id)
     elif task_prompt == 'Region to Description':
+        results = run_example('<REGION_TO_DESCRIPTION>', image, text_input, model_id)
     elif task_prompt == 'OCR':
+        results = run_example('<OCR>', image, model_id=model_id)
     elif task_prompt == 'OCR with Region':
+        results = run_example('<OCR_WITH_REGION>', image, model_id=model_id)
+        output_image = draw_ocr_bboxes(image.copy(), results['<OCR_WITH_REGION>'])
+    # Default: empty result
     else:
+        results = {}
+    # ✅ Single return point
+    return json.dumps(results), output_image
 css = """
 #col-container {