wjm55 commited on
Commit
529282d
·
1 Parent(s): 0388107
Files changed (2) hide show
  1. app.py +55 -5
  2. requirements.txt +3 -1
app.py CHANGED
@@ -8,7 +8,12 @@ import subprocess
8
  from datetime import datetime
9
  import numpy as np
10
  import os
 
11
 
 
 
 
 
12
 
13
  # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
 
@@ -52,14 +57,14 @@ assistant_prompt = '<|assistant|>\n'
52
  prompt_suffix = "<|end|>\n"
53
 
54
  @spaces.GPU
55
- def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct"):
 
56
  text_input = "Convert the image to text."
57
  image_path = array_to_image_path(image)
58
 
59
- print(image_path)
60
  model = models[model_id]
61
  processor = processors[model_id]
62
-
63
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
64
  image = Image.fromarray(image).convert("RGB")
65
  messages = [
@@ -98,7 +103,30 @@ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct"):
98
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
99
  )
100
 
101
- return output_text[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  css = """
104
  /* Overall app styling */
@@ -178,11 +206,33 @@ with gr.Blocks(css=css) as demo:
178
  with gr.Column(elem_classes="input-container"):
179
  input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
180
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct", elem_classes="gr-dropdown")
 
 
 
 
 
 
 
 
 
 
181
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
182
  with gr.Column(elem_classes="output-container"):
183
  output_text = gr.Textbox(label="Output Text", elem_id="output")
184
 
185
- submit_btn.click(run_example, [input_img, model_selector], [output_text])
 
 
 
 
 
 
 
 
 
 
 
 
186
  with gr.Row():
187
  filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
188
  download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
 
8
  from datetime import datetime
9
  import numpy as np
10
  import os
11
+ from gliner import GLiNER
12
 
13
+ # Initialize GLiNER model
14
+ gliner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
15
+
16
+ DEFAULT_NER_LABELS = "person, organization, location, date, event"
17
 
18
  # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
19
 
 
57
  prompt_suffix = "<|end|>\n"
58
 
59
  @spaces.GPU
60
+ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct", run_ner=False, ner_labels=DEFAULT_NER_LABELS):
61
+ # First get the OCR text
62
  text_input = "Convert the image to text."
63
  image_path = array_to_image_path(image)
64
 
 
65
  model = models[model_id]
66
  processor = processors[model_id]
67
+
68
  prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
69
  image = Image.fromarray(image).convert("RGB")
70
  messages = [
 
103
  generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
104
  )
105
 
106
+ ocr_text = output_text[0]
107
+
108
+ # If NER is enabled, process the OCR text
109
+ if run_ner:
110
+ ner_results = gliner_model.predict_entities(
111
+ ocr_text,
112
+ ner_labels.split(","),
113
+ threshold=0.3
114
+ )
115
+
116
+ # Format the text with entity annotations
117
+ annotated_text = ocr_text
118
+ for entity in sorted(ner_results, key=lambda x: x["start"], reverse=True):
119
+ entity_text = entity["text"]
120
+ entity_label = entity["label"]
121
+ annotated_text = (
122
+ annotated_text[:entity["start"]] +
123
+ f"[{entity_text}]({entity_label})" +
124
+ annotated_text[entity["end"]:]
125
+ )
126
+
127
+ return f"OCR Text:\n{ocr_text}\n\nAnnotated Entities:\n{annotated_text}"
128
+
129
+ return ocr_text
130
 
131
  css = """
132
  /* Overall app styling */
 
206
  with gr.Column(elem_classes="input-container"):
207
  input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
208
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct", elem_classes="gr-dropdown")
209
+
210
+ # Add NER controls
211
+ with gr.Row():
212
+ ner_checkbox = gr.Checkbox(label="Run Named Entity Recognition", value=False)
213
+ ner_labels = gr.Textbox(
214
+ label="NER Labels (comma-separated)",
215
+ value=DEFAULT_NER_LABELS,
216
+ visible=False
217
+ )
218
+
219
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
220
  with gr.Column(elem_classes="output-container"):
221
  output_text = gr.Textbox(label="Output Text", elem_id="output")
222
 
223
+ # Show/hide NER labels based on checkbox
224
+ ner_checkbox.change(
225
+ lambda x: gr.update(visible=x),
226
+ inputs=[ner_checkbox],
227
+ outputs=[ner_labels]
228
+ )
229
+
230
+ # Update submit button click handler
231
+ submit_btn.click(
232
+ run_example,
233
+ inputs=[input_img, model_selector, ner_checkbox, ner_labels],
234
+ outputs=[output_text]
235
+ )
236
  with gr.Row():
237
  filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
238
  download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
requirements.txt CHANGED
@@ -5,4 +5,6 @@ torch
5
  torchvision
6
  git+https://github.com/huggingface/transformers.git
7
  accelerate
8
- qwen-vl-utils
 
 
 
5
  torchvision
6
  git+https://github.com/huggingface/transformers.git
7
  accelerate
8
+ qwen-vl-utils
9
+ git+https://github.com/huggingface/transformers.git
10
+ gliner