Spaces:

Tonic
/

florence-pdf

Sleeping

Tonic commited on Sep 12, 2024

Commit

d4a53db

unverified ·

1 Parent(s): fa2eb8b

add quad boxes

Files changed (1) hide show

app.py CHANGED Viewed

@@ -112,14 +112,24 @@ def fig_to_pil(fig):
     buf.seek(0)
     return Image.open(buf)
-def plot_bbox(image, data):
     fig, ax = plt.subplots()
     ax.imshow(image)
-    for bbox, label in zip(data['bboxes'], data['labels']):
-        x1, y1, x2, y2 = bbox
-        rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none')
-        ax.add_patch(rect)
-        plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))
     ax.axis('off')
     return fig
@@ -173,8 +183,9 @@ def main_process(image, task):
     if task in IMAGE_TASKS:
         if task == "OCR with Region":
-            output_image = draw_ocr_bboxes(image.copy(), result['quad_boxes'])
-            text_output = result.get('recognized_text', 'No text found')  # Extract recognized text
             # Debugging: Print the recognized text
             print(f"Recognized Text: {text_output}")

     buf.seek(0)
     return Image.open(buf)
+def plot_bbox(image, data, use_quad_boxes=False):
     fig, ax = plt.subplots()
     ax.imshow(image)
+    # Handle both 'bboxes' and 'quad_boxes'
+    if use_quad_boxes:
+        for quad_box, label in zip(data['quad_boxes'], data['labels']):
+            quad_box = np.array(quad_box).reshape(-1, 2)
+            poly = patches.Polygon(quad_box, linewidth=1, edgecolor='r', facecolor='none')
+            ax.add_patch(poly)
+            plt.text(quad_box[0][0], quad_box[0][1], label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))
+    else:
+        for bbox, label in zip(data['bboxes'], data['labels']):
+            x1, y1, x2, y2 = bbox
+            rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor='r', facecolor='none')
+            ax.add_patch(rect)
+            plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))
     ax.axis('off')
     return fig
     if task in IMAGE_TASKS:
         if task == "OCR with Region":
+            fig = plot_bbox(image, result['<OCR_WITH_REGION>'], use_quad_boxes=True)
+            output_image = fig_to_pil(fig)
+            text_output = result.get('<OCR_WITH_REGION>', {}).get('recognized_text', 'No text found')
             # Debugging: Print the recognized text
             print(f"Recognized Text: {text_output}")