Ubuntu commited on
Commit
34b23f6
·
1 Parent(s): 00a0f2e

updated project description, added example images

Browse files
app.py CHANGED
@@ -5,21 +5,25 @@ from predict_cheque_parser import parse_cheque_with_donut
5
 
6
  ##Create list of examples to be loaded
7
  example_list = glob.glob("examples/cheque_parser/*")
8
- faulty_cheques_list = glob.glob("examples/cheque_analyze/*")
9
  example_list = list(map(lambda el:[el], example_list))
10
- faulty_cheques_list = list(map(lambda el:[el], faulty_cheques_list))
11
 
12
- demo = gr.Blocks(css="#warning {color: red}")
13
 
14
  with demo:
15
 
16
  gr.Markdown("# **<p align='center'>ChequeEasy: Banking with Transformers </p>**")
17
- gr.Markdown("This space demonstrates the use of Donut proposed in this <a href=\"https://arxiv.org/abs/2111.15664/\">paper </a>")
 
 
 
 
 
18
 
19
  with gr.Tabs():
20
 
21
  with gr.TabItem("Cheque Parser"):
22
- gr.Markdown("The module is used to extract details filled by a bank customer from cheques. At present the model is trained to extract details like - payee_name, amount_in_words, amount_in_figures. This model can be further trained to parse additional details like micr_code, cheque_number, account_number, etc")
 
23
  with gr.Box():
24
  gr.Markdown("**Upload Cheque**")
25
  input_image_parse = gr.Image(type='filepath', label="Input Cheque")
@@ -30,12 +34,8 @@ with demo:
30
  amt_in_words = gr.Textbox(label="Courtesy Amount")
31
  amt_in_figures = gr.Textbox(label="Legal Amount")
32
  cheque_date = gr.Textbox(label="Cheque Date")
33
-
34
- # micr_code = gr.Textbox(label="MICR code")
35
- # cheque_number = gr.Textbox(label="Cheque Number")
36
- # account_number = gr.Textbox(label="Account Number")
37
 
38
- amts_matching = gr.Checkbox(label="Legal & Courtesy Amount Matching", elem_id="warning")
39
  stale_check = gr.Checkbox(label="Stale Cheque")
40
 
41
  with gr.Box():
@@ -46,37 +46,10 @@ with demo:
46
  with gr.Column():
47
  gr.Examples(example_list, [input_image_parse],
48
  [payee_name,amt_in_words,amt_in_figures,cheque_date],parse_cheque_with_donut,cache_examples=False)
49
- # micr_code,cheque_number,account_number,
50
- # amts_matching, stale_check]#,cache_examples=True)
51
-
52
-
53
- with gr.TabItem("Quality Analyzer"):
54
- gr.Markdown("The module is used to detect any mistakes made by bank customers while filling out the cheque or while taking a snapshot of the cheque. At present the model is trained to find mistakes like -'object blocking cheque', 'overwriting in cheque'. ")
55
- with gr.Box():
56
- gr.Markdown("**Upload Cheque**")
57
- input_image_detect = gr.Image(type='filepath',label="Input Cheque", show_label=True)
58
-
59
- with gr.Box(): # with gr.Column():
60
- gr.Markdown("**Cheque Quality Results:**")
61
- output_detections = gr.Image(label="Analyzed Cheque Image", show_label=True)
62
- output_text = gr.Textbox()
63
-
64
- with gr.Box():
65
- gr.Markdown("**Predict**")
66
- with gr.Row():
67
- analyze_cheque = gr.Button("Call YOLOS 🤙")
68
-
69
- gr.Markdown("**Examples:**")
70
-
71
- with gr.Column():
72
- gr.Examples(faulty_cheques_list, input_image_detect, [output_detections, output_text])#, predict, cache_examples=True)
73
 
74
 
75
  parse_cheque.click(parse_cheque_with_donut, inputs=input_image_parse, outputs=[payee_name,amt_in_words,amt_in_figures,cheque_date,amts_matching,stale_check])
76
- # micr_code,cheque_number,account_number,
77
- # amts_matching, stale_check])
78
- # analyze_cheque.click(predict, inputs=input_image_detect, outputs=[output_detections, output_text])
79
-
80
  gr.Markdown('\n Solution built by: <a href=\"https://www.linkedin.com/in/shivalika-singh/\">Shivalika Singh</a>')
81
 
82
  demo.launch()
 
5
 
6
  ##Create list of examples to be loaded
7
  example_list = glob.glob("examples/cheque_parser/*")
 
8
  example_list = list(map(lambda el:[el], example_list))
 
9
 
10
+ demo = gr.Blocks()
11
 
12
  with demo:
13
 
14
  gr.Markdown("# **<p align='center'>ChequeEasy: Banking with Transformers </p>**")
15
+ gr.Markdown("ChequeEasy is a project that aims to simply the process of approval of cheques. Leveraging recent advances in Visual Document Understanding (VDU) domain to extract relevant data from cheques and make the whole process quicker and easier for both bank officials and customers. \
16
+ This project leverages Donut model proposed in this <a href=\"https://arxiv.org/abs/2111.15664/\">paper </a> for the parsing of the required data from cheques." \
17
+ "Donut is based on a very simple transformer encoder and decoder architecture. It's main USP is that it is an OCR-free approach to information extraction from documents. \
18
+ OCR based techniques come with several limitations such as use of additional downstream models, lack of understanding about document structure, use of hand crafted rules,etc. \
19
+ Donut helps you get rid of all of these OCR specific limitations.")
20
+
21
 
22
  with gr.Tabs():
23
 
24
  with gr.TabItem("Cheque Parser"):
25
+ gr.Markdown("This module is used to extract details filled by a bank customer from cheques. At present the model is trained to extract details like - payee_name, amount_in_words, amount_in_figures. \
26
+ This model can be further trained to parse additional details like micr_code, cheque_number, account_number, etc")
27
  with gr.Box():
28
  gr.Markdown("**Upload Cheque**")
29
  input_image_parse = gr.Image(type='filepath', label="Input Cheque")
 
34
  amt_in_words = gr.Textbox(label="Courtesy Amount")
35
  amt_in_figures = gr.Textbox(label="Legal Amount")
36
  cheque_date = gr.Textbox(label="Cheque Date")
 
 
 
 
37
 
38
+ amts_matching = gr.Checkbox(label="Legal & Courtesy Amount Matching")
39
  stale_check = gr.Checkbox(label="Stale Cheque")
40
 
41
  with gr.Box():
 
46
  with gr.Column():
47
  gr.Examples(example_list, [input_image_parse],
48
  [payee_name,amt_in_words,amt_in_figures,cheque_date],parse_cheque_with_donut,cache_examples=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  parse_cheque.click(parse_cheque_with_donut, inputs=input_image_parse, outputs=[payee_name,amt_in_words,amt_in_figures,cheque_date,amts_matching,stale_check])
52
+
 
 
 
53
  gr.Markdown('\n Solution built by: <a href=\"https://www.linkedin.com/in/shivalika-singh/\">Shivalika Singh</a>')
54
 
55
  demo.launch()
examples/cheque_parser/2.jpg ADDED
examples/cheque_parser/3.jpg ADDED
examples/cheque_parser/5000.jpg ADDED
examples/cheque_parser/5805.jpg ADDED
examples/cheque_parser/5877.jpg ADDED
predict_cheque_parser.py CHANGED
@@ -13,8 +13,8 @@ TASK_PROMPT = "<s_cord-v2>"
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  def load_donut_model_and_processor():
16
- donut_processor = DonutProcessor.from_pretrained(CHEQUE_PARSER_MODEL,use_auth_token='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')
17
- model = VisionEncoderDecoderModel.from_pretrained(CHEQUE_PARSER_MODEL,use_auth_token='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')
18
  model.to(device)
19
  return donut_processor, model
20
 
@@ -69,6 +69,9 @@ def parse_cheque_with_donut(input_image_path):
69
  macthing_amts = match_legal_and_courstesy_amount(amt_in_words,amt_in_figures)
70
 
71
  payee_name = cheque_details_json['cheque_details'][2]['payee_name']
 
 
 
72
  cheque_date = '06/05/2022'
73
  stale_cheque = check_if_cheque_is_stale(cheque_date)
74
 
@@ -86,8 +89,11 @@ def spell_correction(amt_in_words):
86
 
87
  def match_legal_and_courstesy_amount(legal_amount,courtesy_amount):
88
  macthing_amts = False
 
 
89
  corrected_amt_in_words = spell_correction(legal_amount)
90
  print("corrected_amt_in_words:",corrected_amt_in_words)
 
91
  numeric_legal_amt = w2n.word_to_num(corrected_amt_in_words)
92
  print("numeric_legal_amt:",numeric_legal_amt)
93
  if int(numeric_legal_amt) == int(courtesy_amount):
 
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  def load_donut_model_and_processor():
16
+ donut_processor = DonutProcessor.from_pretrained(CHEQUE_PARSER_MODEL)
17
+ model = VisionEncoderDecoderModel.from_pretrained(CHEQUE_PARSER_MODEL)
18
  model.to(device)
19
  return donut_processor, model
20
 
 
69
  macthing_amts = match_legal_and_courstesy_amount(amt_in_words,amt_in_figures)
70
 
71
  payee_name = cheque_details_json['cheque_details'][2]['payee_name']
72
+
73
+ ## In the cheques dataset used to train the model -> all the cheques are dated '06/05/22'
74
+ ## Train model to extract cheque date -> to do
75
  cheque_date = '06/05/2022'
76
  stale_cheque = check_if_cheque_is_stale(cheque_date)
77
 
 
89
 
90
  def match_legal_and_courstesy_amount(legal_amount,courtesy_amount):
91
  macthing_amts = False
92
+ if len(legal_amount) == 0:
93
+ return macthing_amts
94
  corrected_amt_in_words = spell_correction(legal_amount)
95
  print("corrected_amt_in_words:",corrected_amt_in_words)
96
+
97
  numeric_legal_amt = w2n.word_to_num(corrected_amt_in_words)
98
  print("numeric_legal_amt:",numeric_legal_amt)
99
  if int(numeric_legal_amt) == int(courtesy_amount):