arshaan-nazir commited on
Commit
511f352
·
verified ·
1 Parent(s): b2468fa

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +328 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from paddleocr import PaddleOCR
3
+ from groq import Groq
4
+ from openai import OpenAI
5
+ import os
6
+ import json
7
+
8
+ ##################################
9
+ # Initialize Models
10
+ ##################################
11
+ print("Loading PaddleOCR model...")
12
+
13
+ # Available languages in PaddleOCR
14
+ AVAILABLE_LANGUAGES = {
15
+ 'English': 'en',
16
+ 'Chinese Simplified': 'ch',
17
+ 'French': 'fr',
18
+ 'German': 'german',
19
+ 'Korean': 'korean',
20
+ 'Japanese': 'japan',
21
+ 'Italian': 'it',
22
+ 'Spanish': 'es',
23
+ 'Portuguese': 'pt',
24
+ 'Russian': 'ru',
25
+ 'Arabic': 'ar',
26
+ 'Hindi': 'hi',
27
+ 'Vietnamese': 'vi',
28
+ 'Thai': 'th'
29
+ }
30
+
31
+ # Available LLM providers
32
+ PROVIDERS = ["None", "Groq", "OpenAI"]
33
+
34
+ # Dictionary to store OCR models for different languages
35
+ ocr_models = {}
36
+
37
+ def get_ocr_model(lang_code):
38
+ if lang_code not in ocr_models:
39
+ ocr_models[lang_code] = PaddleOCR(
40
+ use_angle_cls=True,
41
+ lang=lang_code,
42
+ show_log=False,
43
+ enable_mkldnn=True # Better CPU performance
44
+ )
45
+ return ocr_models[lang_code]
46
+
47
+ ##################################
48
+ # Groq Processing Functions
49
+ ##################################
50
+ def format_with_groq(text: str, api_key: str) -> str:
51
+ client = Groq(api_key=api_key)
52
+ completion = client.chat.completions.create(
53
+ model="llama3-8b-8192",
54
+ messages=[
55
+ {
56
+ "role": "system",
57
+ "content": (
58
+ "You are a receipt data extraction expert. Extract and format the receipt data into a clear JSON structure.\n"
59
+ "Look for these key pieces of information:\n"
60
+ "1. Restaurant/store name\n"
61
+ "2. Date and time\n"
62
+ "3. Individual items with quantities and prices\n"
63
+ "4. Table number if present\n"
64
+ "5. Server name if present\n"
65
+ "6. Payment details\n"
66
+ "7. Receipt/order number\n"
67
+ "Format numbers as actual numbers, not strings."
68
+ )
69
+ },
70
+ {
71
+ "role": "user",
72
+ "content": f"Convert this receipt text to structured data:\n\n{text}"
73
+ }
74
+ ],
75
+ temperature=0.1,
76
+ max_tokens=1024,
77
+ top_p=1,
78
+ stream=True
79
+ )
80
+
81
+ formatted_text = ""
82
+ for chunk in completion:
83
+ content = getattr(chunk.choices[0].delta, "content", None)
84
+ if content:
85
+ formatted_text += content
86
+
87
+ return formatted_text.strip()
88
+
89
+ def refine_json_with_groq(initial_text: str, api_key: str) -> str:
90
+ client = Groq(api_key=api_key)
91
+ completion = client.chat.completions.create(
92
+ model="llama3-8b-8192",
93
+ messages=[
94
+ {
95
+ "role": "system",
96
+ "content": (
97
+ "Convert the receipt data into this exact JSON format:\n"
98
+ "{\n"
99
+ " 'restaurant_name': string,\n"
100
+ " 'date': string,\n"
101
+ " 'time': string,\n"
102
+ " 'table_number': string or number,\n"
103
+ " 'server_name': string,\n"
104
+ " 'payment_method': string,\n"
105
+ " 'items': [{'name': string, 'quantity': number, 'price': number}],\n"
106
+ " 'subtotal': number,\n"
107
+ " 'tax': number,\n"
108
+ " 'tip': number or null,\n"
109
+ " 'total': number,\n"
110
+ " 'receipt_number': string or null\n"
111
+ "}\n"
112
+ "Rules:\n"
113
+ "1. Use ONLY double quotes for JSON compliance\n"
114
+ "2. All numbers must be actual numbers, not strings\n"
115
+ "3. Return ONLY the JSON, no explanations\n"
116
+ "4. Ensure math is correct"
117
+ )
118
+ },
119
+ {
120
+ "role": "user",
121
+ "content": f"Format this receipt data as valid JSON:\n\n{initial_text}"
122
+ }
123
+ ],
124
+ temperature=0.1,
125
+ max_tokens=1024,
126
+ top_p=1,
127
+ stream=True
128
+ )
129
+
130
+ refined_text = ""
131
+ for chunk in completion:
132
+ content = getattr(chunk.choices[0].delta, "content", None)
133
+ if content:
134
+ refined_text += content
135
+
136
+ try:
137
+ # Clean up any potential extra text
138
+ json_start = refined_text.find('{')
139
+ json_end = refined_text.rfind('}') + 1
140
+ if json_start >= 0 and json_end > 0:
141
+ refined_text = refined_text[json_start:json_end]
142
+
143
+ # Validate JSON and reformat
144
+ parsed_json = json.loads(refined_text)
145
+ return json.dumps(parsed_json, indent=2)
146
+ except json.JSONDecodeError:
147
+ return refined_text
148
+
149
+ ##################################
150
+ # OpenAI Processing Functions
151
+ ##################################
152
+ def process_with_openai(text: str, api_key: str) -> dict:
153
+ client = OpenAI(api_key=api_key)
154
+ try:
155
+ completion = client.chat.completions.create(
156
+ model="gpt-3.5-turbo",
157
+ messages=[
158
+ {
159
+ "role": "system",
160
+ "content": (
161
+ "Convert the receipt data into this exact JSON format:\n"
162
+ "{\n"
163
+ " 'restaurant_name': string,\n"
164
+ " 'date': string,\n"
165
+ " 'time': string,\n"
166
+ " 'table_number': string or number,\n"
167
+ " 'server_name': string,\n"
168
+ " 'payment_method': string,\n"
169
+ " 'items': [{'name': string, 'quantity': number, 'price': number}],\n"
170
+ " 'subtotal': number,\n"
171
+ " 'tax': number,\n"
172
+ " 'tip': number or null,\n"
173
+ " 'total': number,\n"
174
+ " 'receipt_number': string or null\n"
175
+ "}\n"
176
+ "Rules:\n"
177
+ "1. Use ONLY double quotes for JSON compliance\n"
178
+ "2. All numbers must be actual numbers, not strings\n"
179
+ "3. Return ONLY the JSON, no explanations"
180
+ )
181
+ },
182
+ {
183
+ "role": "user",
184
+ "content": f"Convert this receipt text to JSON:\n\n{text}"
185
+ }
186
+ ],
187
+ temperature=0.1
188
+ )
189
+ return completion.choices[0].message.content
190
+ except Exception as e:
191
+ return json.dumps({"error": str(e)})
192
+
193
+ ##################################
194
+ # Main Processing
195
+ ##################################
196
+ def process_receipt(image, selected_language, provider="None", api_key=""):
197
+ try:
198
+ os.makedirs("temp", exist_ok=True)
199
+
200
+ image_path = os.path.join("temp", "temp_image.jpg")
201
+ image.save(image_path)
202
+
203
+ # Get OCR model and process image
204
+ lang_code = AVAILABLE_LANGUAGES[selected_language]
205
+ ocr_model = get_ocr_model(lang_code)
206
+ result = ocr_model.ocr(image_path, cls=True)
207
+
208
+ # Extract text from results
209
+ extracted_text = "\n".join([line[1][0] for page in result for line in page])
210
+
211
+ # If no provider/api key, return raw OCR
212
+ if not api_key or provider == "None":
213
+ return {
214
+ "raw_ocr_text": extracted_text,
215
+ "note": "Provide API key and select a provider for structured JSON output"
216
+ }
217
+
218
+ try:
219
+ if provider == "Groq":
220
+ # Two-step Groq processing
221
+ initial_text = format_with_groq(extracted_text, api_key)
222
+ final_json = refine_json_with_groq(initial_text, api_key)
223
+ return json.loads(final_json)
224
+
225
+ elif provider == "OpenAI":
226
+ # OpenAI processing
227
+ result = process_with_openai(extracted_text, api_key)
228
+ return json.loads(result)
229
+
230
+ except json.JSONDecodeError:
231
+ return {
232
+ "error": "Failed to parse response",
233
+ "raw_ocr_text": extracted_text
234
+ }
235
+
236
+ except Exception as e:
237
+ return {
238
+ "error": str(e),
239
+ "type": "processing_error"
240
+ }
241
+ finally:
242
+ if os.path.exists(image_path):
243
+ try:
244
+ os.remove(image_path)
245
+ except:
246
+ pass
247
+
248
+ ##################################
249
+ # Gradio Interface
250
+ ##################################
251
+ css = """
252
+ .gradio-container {max-width: 1100px !important}
253
+ """
254
+
255
+ with gr.Blocks(css=css) as demo:
256
+ gr.Markdown("# Multi-Language Receipt OCR")
257
+
258
+ with gr.Row():
259
+ with gr.Column(scale=1):
260
+ image_input = gr.Image(
261
+ type="pil",
262
+ label="Upload Receipt Image",
263
+ height=400
264
+ )
265
+ language_dropdown = gr.Dropdown(
266
+ choices=list(AVAILABLE_LANGUAGES.keys()),
267
+ value="English",
268
+ label="Select Language",
269
+ info="Choose the primary language of the receipt"
270
+ )
271
+
272
+ with gr.Row():
273
+ provider_dropdown = gr.Dropdown(
274
+ choices=PROVIDERS,
275
+ value="None",
276
+ label="Select LLM Provider",
277
+ info="Choose provider for JSON formatting"
278
+ )
279
+ api_key_input = gr.Textbox(
280
+ label="API Key",
281
+ placeholder="Enter your API key",
282
+ type="password",
283
+ info="Required for JSON formatting"
284
+ )
285
+
286
+ submit_button = gr.Button("Process Receipt", variant="primary")
287
+
288
+ with gr.Column(scale=1):
289
+ json_output = gr.JSON(
290
+ label="Extracted Receipt Data",
291
+ height=500
292
+ )
293
+
294
+ gr.Markdown("""
295
+ ### Usage Instructions
296
+ 1. Upload a clear image of your receipt
297
+ 2. Select the receipt's primary language
298
+ 3. (Optional) Choose a provider and enter API key for JSON formatting
299
+ 4. Click 'Process Receipt'
300
+
301
+ ### Notes
302
+ - Without an API key, you'll receive raw OCR text
303
+ - For best results, ensure receipt image is clear and well-lit
304
+ - Supported languages include English, Chinese, French, German, and more
305
+ """)
306
+
307
+ submit_button.click(
308
+ fn=process_receipt,
309
+ inputs=[
310
+ image_input,
311
+ language_dropdown,
312
+ provider_dropdown,
313
+ api_key_input
314
+ ],
315
+ outputs=[json_output],
316
+ )
317
+
318
+ # Close any existing gradio instances
319
+ gr.close_all()
320
+
321
+ # Launch the app
322
+ demo.queue(max_size=10)
323
+ demo.launch(
324
+ server_name="0.0.0.0",
325
+ server_port=7860,
326
+ show_api=False,
327
+ share=False
328
+ )
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ paddlepaddle
2
+ paddleocr>=2.0.1
3
+ gradio==4.14.0
4
+ groq==0.3.2
5
+ openai==1.11.0
6
+ Pillow==10.0.0
7
+ numpy>=1.21.6