andstor commited on
Commit
a5819b5
·
verified ·
1 Parent(s): 5ab5afc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -70
app.py CHANGED
@@ -146,106 +146,98 @@ def draw_ocr_bboxes(image, prediction):
146
  fill=color)
147
  return image
148
 
 
 
149
  def process_image(image, task_prompt, text_input=None, model_id='microsoft/Florence-2-large'):
150
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
 
 
 
 
151
  if task_prompt == 'Caption':
152
- task_prompt = '<CAPTION>'
153
- results = run_example(task_prompt, image, model_id=model_id)
154
- return results, None
155
  elif task_prompt == 'Detailed Caption':
156
- task_prompt = '<DETAILED_CAPTION>'
157
- results = run_example(task_prompt, image, model_id=model_id)
158
- return results, None
159
  elif task_prompt == 'More Detailed Caption':
160
- task_prompt = '<MORE_DETAILED_CAPTION>'
161
- results = run_example(task_prompt, image, model_id=model_id)
162
- return results, None
163
  elif task_prompt == 'Caption + Grounding':
164
- task_prompt = '<CAPTION>'
165
- results = run_example(task_prompt, image, model_id=model_id)
166
- text_input = results[task_prompt]
167
- task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
168
- results = run_example(task_prompt, image, text_input, model_id)
169
- results['<CAPTION>'] = text_input
170
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
171
- return results, fig_to_pil(fig)
 
172
  elif task_prompt == 'Detailed Caption + Grounding':
173
- task_prompt = '<DETAILED_CAPTION>'
174
- results = run_example(task_prompt, image, model_id=model_id)
175
- text_input = results[task_prompt]
176
- task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
177
- results = run_example(task_prompt, image, text_input, model_id)
178
- results['<DETAILED_CAPTION>'] = text_input
179
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
180
- return results, fig_to_pil(fig)
 
181
  elif task_prompt == 'More Detailed Caption + Grounding':
182
- task_prompt = '<MORE_DETAILED_CAPTION>'
183
- results = run_example(task_prompt, image, model_id=model_id)
184
- text_input = results[task_prompt]
185
- task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
186
- results = run_example(task_prompt, image, text_input, model_id)
187
- results['<MORE_DETAILED_CAPTION>'] = text_input
188
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
189
- return results, fig_to_pil(fig)
 
190
  elif task_prompt == 'Object Detection':
191
- task_prompt = '<OD>'
192
- results = run_example(task_prompt, image, model_id=model_id)
193
  fig = plot_bbox(image, results['<OD>'])
194
- return results, fig_to_pil(fig)
 
195
  elif task_prompt == 'Dense Region Caption':
196
- task_prompt = '<DENSE_REGION_CAPTION>'
197
- results = run_example(task_prompt, image, model_id=model_id)
198
  fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
199
- return results, fig_to_pil(fig)
 
200
  elif task_prompt == 'Region Proposal':
201
- task_prompt = '<REGION_PROPOSAL>'
202
- results = run_example(task_prompt, image, model_id=model_id)
203
  fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
204
- return results, fig_to_pil(fig)
 
205
  elif task_prompt == 'Caption to Phrase Grounding':
206
- task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
207
- results = run_example(task_prompt, image, text_input, model_id)
208
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
209
- return results, fig_to_pil(fig)
 
210
  elif task_prompt == 'Referring Expression Segmentation':
211
- task_prompt = '<REFERRING_EXPRESSION_SEGMENTATION>'
212
- results = run_example(task_prompt, image, text_input, model_id)
213
- output_image = copy.deepcopy(image)
214
- output_image = draw_polygons(output_image, results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
215
- return results, output_image
216
  elif task_prompt == 'Region to Segmentation':
217
- task_prompt = '<REGION_TO_SEGMENTATION>'
218
- results = run_example(task_prompt, image, text_input, model_id)
219
- output_image = copy.deepcopy(image)
220
- output_image = draw_polygons(output_image, results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
221
- return results, output_image
222
  elif task_prompt == 'Open Vocabulary Detection':
223
- task_prompt = '<OPEN_VOCABULARY_DETECTION>'
224
- results = run_example(task_prompt, image, text_input, model_id)
225
  bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
226
  fig = plot_bbox(image, bbox_results)
227
- return results, fig_to_pil(fig)
 
228
  elif task_prompt == 'Region to Category':
229
- task_prompt = '<REGION_TO_CATEGORY>'
230
- results = run_example(task_prompt, image, text_input, model_id)
231
- return results, None
232
  elif task_prompt == 'Region to Description':
233
- task_prompt = '<REGION_TO_DESCRIPTION>'
234
- results = run_example(task_prompt, image, text_input, model_id)
235
- return results, None
236
  elif task_prompt == 'OCR':
237
- task_prompt = '<OCR>'
238
- results = run_example(task_prompt, image, model_id=model_id)
239
- return results, None
240
  elif task_prompt == 'OCR with Region':
241
- task_prompt = '<OCR_WITH_REGION>'
242
- results = run_example(task_prompt, image, model_id=model_id)
243
- output_image = copy.deepcopy(image)
244
- output_image = draw_ocr_bboxes(output_image, results['<OCR_WITH_REGION>'])
245
- return results, output_image
246
  else:
247
- return "", None # Return empty string and None for unknown task prompts
248
 
 
 
 
249
 
250
  css = """
251
  #col-container {
 
146
  fill=color)
147
  return image
148
 
149
+ import json
150
+
151
  def process_image(image, task_prompt, text_input=None, model_id='microsoft/Florence-2-large'):
152
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
153
+
154
+ results = {}
155
+ output_image = None
156
+
157
  if task_prompt == 'Caption':
158
+ results = run_example('<CAPTION>', image, model_id=model_id)
159
+
 
160
  elif task_prompt == 'Detailed Caption':
161
+ results = run_example('<DETAILED_CAPTION>', image, model_id=model_id)
162
+
 
163
  elif task_prompt == 'More Detailed Caption':
164
+ results = run_example('<MORE_DETAILED_CAPTION>', image, model_id=model_id)
165
+
 
166
  elif task_prompt == 'Caption + Grounding':
167
+ caption = run_example('<CAPTION>', image, model_id=model_id)['<CAPTION>']
168
+ results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
169
+ results['<CAPTION>'] = caption
 
 
 
170
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
171
+ output_image = fig_to_pil(fig)
172
+
173
  elif task_prompt == 'Detailed Caption + Grounding':
174
+ caption = run_example('<DETAILED_CAPTION>', image, model_id=model_id)['<DETAILED_CAPTION>']
175
+ results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
176
+ results['<DETAILED_CAPTION>'] = caption
 
 
 
177
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
178
+ output_image = fig_to_pil(fig)
179
+
180
  elif task_prompt == 'More Detailed Caption + Grounding':
181
+ caption = run_example('<MORE_DETAILED_CAPTION>', image, model_id=model_id)['<MORE_DETAILED_CAPTION>']
182
+ results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, caption, model_id)
183
+ results['<MORE_DETAILED_CAPTION>'] = caption
 
 
 
184
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
185
+ output_image = fig_to_pil(fig)
186
+
187
  elif task_prompt == 'Object Detection':
188
+ results = run_example('<OD>', image, model_id=model_id)
 
189
  fig = plot_bbox(image, results['<OD>'])
190
+ output_image = fig_to_pil(fig)
191
+
192
  elif task_prompt == 'Dense Region Caption':
193
+ results = run_example('<DENSE_REGION_CAPTION>', image, model_id=model_id)
 
194
  fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
195
+ output_image = fig_to_pil(fig)
196
+
197
  elif task_prompt == 'Region Proposal':
198
+ results = run_example('<REGION_PROPOSAL>', image, model_id=model_id)
 
199
  fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
200
+ output_image = fig_to_pil(fig)
201
+
202
  elif task_prompt == 'Caption to Phrase Grounding':
203
+ results = run_example('<CAPTION_TO_PHRASE_GROUNDING>', image, text_input, model_id)
 
204
  fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
205
+ output_image = fig_to_pil(fig)
206
+
207
  elif task_prompt == 'Referring Expression Segmentation':
208
+ results = run_example('<REFERRING_EXPRESSION_SEGMENTATION>', image, text_input, model_id)
209
+ output_image = draw_polygons(image.copy(), results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
210
+
 
 
211
  elif task_prompt == 'Region to Segmentation':
212
+ results = run_example('<REGION_TO_SEGMENTATION>', image, text_input, model_id)
213
+ output_image = draw_polygons(image.copy(), results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
214
+
 
 
215
  elif task_prompt == 'Open Vocabulary Detection':
216
+ results = run_example('<OPEN_VOCABULARY_DETECTION>', image, text_input, model_id)
 
217
  bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
218
  fig = plot_bbox(image, bbox_results)
219
+ output_image = fig_to_pil(fig)
220
+
221
  elif task_prompt == 'Region to Category':
222
+ results = run_example('<REGION_TO_CATEGORY>', image, text_input, model_id)
223
+
 
224
  elif task_prompt == 'Region to Description':
225
+ results = run_example('<REGION_TO_DESCRIPTION>', image, text_input, model_id)
226
+
 
227
  elif task_prompt == 'OCR':
228
+ results = run_example('<OCR>', image, model_id=model_id)
229
+
 
230
  elif task_prompt == 'OCR with Region':
231
+ results = run_example('<OCR_WITH_REGION>', image, model_id=model_id)
232
+ output_image = draw_ocr_bboxes(image.copy(), results['<OCR_WITH_REGION>'])
233
+
234
+ # Default: empty result
 
235
  else:
236
+ results = {}
237
 
238
+ # ✅ Single return point
239
+ return json.dumps(results), output_image
240
+
241
 
242
  css = """
243
  #col-container {