Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -146,106 +146,98 @@ def draw_ocr_bboxes(image, prediction):
|
|
| 146 |
fill=color)
|
| 147 |
return image
|
| 148 |
|
|
|
|
|
|
|
| 149 |
def process_image(image, task_prompt, text_input=None, model_id='microsoft/Florence-2-large'):
|
| 150 |
image = Image.fromarray(image) # Convert NumPy array to PIL Image
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
if task_prompt == 'Caption':
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
return results, None
|
| 155 |
elif task_prompt == 'Detailed Caption':
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
return results, None
|
| 159 |
elif task_prompt == 'More Detailed Caption':
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
return results, None
|
| 163 |
elif task_prompt == 'Caption + Grounding':
|
| 164 |
-
|
| 165 |
-
results = run_example(
|
| 166 |
-
|
| 167 |
-
task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
|
| 168 |
-
results = run_example(task_prompt, image, text_input, model_id)
|
| 169 |
-
results['<CAPTION>'] = text_input
|
| 170 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 171 |
-
|
|
|
|
| 172 |
elif task_prompt == 'Detailed Caption + Grounding':
|
| 173 |
-
|
| 174 |
-
results = run_example(
|
| 175 |
-
|
| 176 |
-
task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
|
| 177 |
-
results = run_example(task_prompt, image, text_input, model_id)
|
| 178 |
-
results['<DETAILED_CAPTION>'] = text_input
|
| 179 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 180 |
-
|
|
|
|
| 181 |
elif task_prompt == 'More Detailed Caption + Grounding':
|
| 182 |
-
|
| 183 |
-
results = run_example(
|
| 184 |
-
|
| 185 |
-
task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
|
| 186 |
-
results = run_example(task_prompt, image, text_input, model_id)
|
| 187 |
-
results['<MORE_DETAILED_CAPTION>'] = text_input
|
| 188 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 189 |
-
|
|
|
|
| 190 |
elif task_prompt == 'Object Detection':
|
| 191 |
-
|
| 192 |
-
results = run_example(task_prompt, image, model_id=model_id)
|
| 193 |
fig = plot_bbox(image, results['<OD>'])
|
| 194 |
-
|
|
|
|
| 195 |
elif task_prompt == 'Dense Region Caption':
|
| 196 |
-
|
| 197 |
-
results = run_example(task_prompt, image, model_id=model_id)
|
| 198 |
fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
|
| 199 |
-
|
|
|
|
| 200 |
elif task_prompt == 'Region Proposal':
|
| 201 |
-
|
| 202 |
-
results = run_example(task_prompt, image, model_id=model_id)
|
| 203 |
fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
|
| 204 |
-
|
|
|
|
| 205 |
elif task_prompt == 'Caption to Phrase Grounding':
|
| 206 |
-
|
| 207 |
-
results = run_example(task_prompt, image, text_input, model_id)
|
| 208 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 209 |
-
|
|
|
|
| 210 |
elif task_prompt == 'Referring Expression Segmentation':
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
output_image = draw_polygons(output_image, results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
|
| 215 |
-
return results, output_image
|
| 216 |
elif task_prompt == 'Region to Segmentation':
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
output_image = draw_polygons(output_image, results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
|
| 221 |
-
return results, output_image
|
| 222 |
elif task_prompt == 'Open Vocabulary Detection':
|
| 223 |
-
|
| 224 |
-
results = run_example(task_prompt, image, text_input, model_id)
|
| 225 |
bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
|
| 226 |
fig = plot_bbox(image, bbox_results)
|
| 227 |
-
|
|
|
|
| 228 |
elif task_prompt == 'Region to Category':
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
return results, None
|
| 232 |
elif task_prompt == 'Region to Description':
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
return results, None
|
| 236 |
elif task_prompt == 'OCR':
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
return results, None
|
| 240 |
elif task_prompt == 'OCR with Region':
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
return results, output_image
|
| 246 |
else:
|
| 247 |
-
|
| 248 |
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
css = """
|
| 251 |
#col-container {
|
|
|
|
| 146 |
fill=color)
|
| 147 |
return image
|
| 148 |
|
| 149 |
+
import json
|
| 150 |
+
|
| 151 |
def process_image(image, task_prompt, text_input=None, model_id='microsoft/Florence-2-large'):
|
| 152 |
image = Image.fromarray(image) # Convert NumPy array to PIL Image
|
| 153 |
+
|
| 154 |
+
results = {}
|
| 155 |
+
output_image = None
|
| 156 |
+
|
| 157 |
if task_prompt == 'Caption':
|
| 158 |
+
results = run_example('<CAPTION>', image, model_id=model_id)
|
| 159 |
+
|
|
|
|
| 160 |
elif task_prompt == 'Detailed Caption':
|
| 161 |
+
results = run_example('<DETAILED_CAPTION>', image, model_id=model_id)
|
| 162 |
+
|
|
|
|
| 163 |
elif task_prompt == 'More Detailed Caption':
|
| 164 |
+
results = run_example('<MORE_DETAILED_CAPTION>', image, model_id=model_id)
|
| 165 |
+
|
|
|
|
| 166 |
elif task_prompt == 'Caption + Grounding':
|
| 167 |
+
caption = run_example('<CAPTION>', image, model_id=model_id)['<CAPTION>']
|
| 168 |
+
results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
|
| 169 |
+
results['<CAPTION>'] = caption
|
|
|
|
|
|
|
|
|
|
| 170 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 171 |
+
output_image = fig_to_pil(fig)
|
| 172 |
+
|
| 173 |
elif task_prompt == 'Detailed Caption + Grounding':
|
| 174 |
+
caption = run_example('<DETAILED_CAPTION>', image, model_id=model_id)['<DETAILED_CAPTION>']
|
| 175 |
+
results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
|
| 176 |
+
results['<DETAILED_CAPTION>'] = caption
|
|
|
|
|
|
|
|
|
|
| 177 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 178 |
+
output_image = fig_to_pil(fig)
|
| 179 |
+
|
| 180 |
elif task_prompt == 'More Detailed Caption + Grounding':
|
| 181 |
+
caption = run_example('<MORE_DETAILED_CAPTION>', image, model_id=model_id)['<MORE_DETAILED_CAPTION>']
|
| 182 |
+
results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
|
| 183 |
+
results['<MORE_DETAILED_CAPTION>'] = caption
|
|
|
|
|
|
|
|
|
|
| 184 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 185 |
+
output_image = fig_to_pil(fig)
|
| 186 |
+
|
| 187 |
elif task_prompt == 'Object Detection':
|
| 188 |
+
results = run_example('<OD>', image, model_id=model_id)
|
|
|
|
| 189 |
fig = plot_bbox(image, results['<OD>'])
|
| 190 |
+
output_image = fig_to_pil(fig)
|
| 191 |
+
|
| 192 |
elif task_prompt == 'Dense Region Caption':
|
| 193 |
+
results = run_example('<DENSE_REGION_CAPTION>', image, model_id=model_id)
|
|
|
|
| 194 |
fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
|
| 195 |
+
output_image = fig_to_pil(fig)
|
| 196 |
+
|
| 197 |
elif task_prompt == 'Region Proposal':
|
| 198 |
+
results = run_example('<REGION_PROPOSAL>', image, model_id=model_id)
|
|
|
|
| 199 |
fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
|
| 200 |
+
output_image = fig_to_pil(fig)
|
| 201 |
+
|
| 202 |
elif task_prompt == 'Caption to Phrase Grounding':
|
| 203 |
+
results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, text_input, model_id)
|
|
|
|
| 204 |
fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
|
| 205 |
+
output_image = fig_to_pil(fig)
|
| 206 |
+
|
| 207 |
elif task_prompt == 'Referring Expression Segmentation':
|
| 208 |
+
results = run_example('<REFERRING_EXPRESSION_SEGMENTATION>', image, text_input, model_id)
|
| 209 |
+
output_image = draw_polygons(image.copy(), results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
|
| 210 |
+
|
|
|
|
|
|
|
| 211 |
elif task_prompt == 'Region to Segmentation':
|
| 212 |
+
results = run_example('<REGION_TO_SEGMENTATION>', image, text_input, model_id)
|
| 213 |
+
output_image = draw_polygons(image.copy(), results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
|
| 214 |
+
|
|
|
|
|
|
|
| 215 |
elif task_prompt == 'Open Vocabulary Detection':
|
| 216 |
+
results = run_example('<OPEN_VOCABULARY_DETECTION>', image, text_input, model_id)
|
|
|
|
| 217 |
bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
|
| 218 |
fig = plot_bbox(image, bbox_results)
|
| 219 |
+
output_image = fig_to_pil(fig)
|
| 220 |
+
|
| 221 |
elif task_prompt == 'Region to Category':
|
| 222 |
+
results = run_example('<REGION_TO_CATEGORY>', image, text_input, model_id)
|
| 223 |
+
|
|
|
|
| 224 |
elif task_prompt == 'Region to Description':
|
| 225 |
+
results = run_example('<REGION_TO_DESCRIPTION>', image, text_input, model_id)
|
| 226 |
+
|
|
|
|
| 227 |
elif task_prompt == 'OCR':
|
| 228 |
+
results = run_example('<OCR>', image, model_id=model_id)
|
| 229 |
+
|
|
|
|
| 230 |
elif task_prompt == 'OCR with Region':
|
| 231 |
+
results = run_example('<OCR_WITH_REGION>', image, model_id=model_id)
|
| 232 |
+
output_image = draw_ocr_bboxes(image.copy(), results['<OCR_WITH_REGION>'])
|
| 233 |
+
|
| 234 |
+
# Default: empty result
|
|
|
|
| 235 |
else:
|
| 236 |
+
results = {}
|
| 237 |
|
| 238 |
+
# ✅ Single return point
|
| 239 |
+
return json.dumps(results), output_image
|
| 240 |
+
|
| 241 |
|
| 242 |
css = """
|
| 243 |
#col-container {
|