Luke commited on
Commit
68e1313
·
1 Parent(s): 03b6d75

no message

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. Preprocess/preprocessImg.py +59 -0
  3. app.py +54 -12
  4. requirements.txt +2 -1
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .idea/*
2
+ *.pyc
Preprocess/preprocessImg.py CHANGED
@@ -27,3 +27,62 @@ def preprocess_image002(image):
27
  gray = cv2.bilateralFilter(gray, 11, 17, 17) # 雙邊濾波去噪
28
  edged = cv2.Canny(gray, 30, 200) # 邊緣檢測
29
  return Image.fromarray(edged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  gray = cv2.bilateralFilter(gray, 11, 17, 17) # 雙邊濾波去噪
28
  edged = cv2.Canny(gray, 30, 200) # 邊緣檢測
29
  return Image.fromarray(edged)
30
+
31
+
32
+ # 方案三:自適應門檻和形態學變換
33
+ def preprocess_image003(image):
34
+ # 將 PIL Image 轉換為 numpy array
35
+ image_np = np.array(image)
36
+ # 轉為灰階影像
37
+ gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
38
+ # 自適應門檻處理
39
+ adaptive_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
40
+ # 形態學變換 (開運算) 去除小噪點
41
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
42
+ morph = cv2.morphologyEx(adaptive_thresh, cv2.MORPH_OPEN, kernel)
43
+ return Image.fromarray(morph)
44
+
45
+
46
+ # 方案四:CLAHE(限制對比度自適應直方圖均衡)
47
+ def preprocess_image004(image):
48
+ # 將 PIL Image 轉換為 numpy array
49
+ image_np = np.array(image)
50
+ # 轉為灰階影像
51
+ gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
52
+ # 應用 CLAHE
53
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
54
+ clahe_image = clahe.apply(gray)
55
+ # 二值化
56
+ _, binary = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
57
+ return Image.fromarray(binary)
58
+
59
+
60
+ # 方案五:直方圖均衡化和高斯模糊
61
+ def preprocess_image005(image):
62
+ # 將 PIL Image 轉換為 numpy array
63
+ image_np = np.array(image)
64
+ # 轉為灰階影像
65
+ gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
66
+ # 直方圖均衡化
67
+ equalized = cv2.equalizeHist(gray)
68
+ # 高斯模糊
69
+ blurred = cv2.GaussianBlur(equalized, (5, 5), 0)
70
+ # 二值化
71
+ _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
72
+ return Image.fromarray(binary)
73
+
74
+
75
+ # 方案六:自適應去噪與銳化
76
+ def preprocess_image006(image):
77
+ # 將 PIL Image 轉換為 numpy array
78
+ image_np = np.array(image)
79
+ # 轉為灰階影像
80
+ gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
81
+ # 自適應去噪
82
+ denoised = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
83
+ # 銳化
84
+ kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
85
+ sharpened = cv2.filter2D(denoised, -1, kernel)
86
+ # 二值化
87
+ _, binary = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
88
+ return Image.fromarray(binary)
app.py CHANGED
@@ -2,7 +2,10 @@ import os
2
  import gradio as gr
3
  from Plan.AiLLM import llm_recognition
4
  from Plan.pytesseractOCR import ocr_recognition
5
- from Preprocess.preprocessImg import preprocess_image001, preprocess_image002
 
 
 
6
 
7
  # 取得所有語言清單
8
  languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
@@ -15,8 +18,18 @@ def preprocess_and_ocr(image, valid_type, language):
15
  # 方案二
16
  pre_img_002 = preprocess_image002(image)
17
  ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
 
 
 
 
 
 
 
 
 
18
 
19
- return pre_img_001, pre_img_002, ocr_result_001, ocr_result_002
 
20
 
21
 
22
  def preprocess_and_llm(image, valid_type, language):
@@ -26,34 +39,63 @@ def preprocess_and_llm(image, valid_type, language):
26
  # 方案二
27
  pre_img_002 = preprocess_image002(image)
28
  llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
 
 
 
 
 
 
 
 
 
29
 
30
- return pre_img_001, pre_img_002, llm_result_001, llm_result_002
 
31
 
32
 
33
  with gr.Blocks() as demo:
34
  with gr.Row():
35
  image_input = gr.Image(type="pil", label="上傳圖片")
36
- preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
37
- preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
38
-
39
- with gr.Row():
40
  validation_type = gr.Dropdown(choices=["身分證正面", "身分證反面"], label="驗證類別")
41
  language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
42
- # preprocessed_type = gr.Radio(["001", "002"], label="解析方案")
43
 
44
  with gr.Row():
45
  ocr_button = gr.Button("使用 OCR")
46
  llm_button = gr.Button("使用 AI LLM")
47
 
48
  with gr.Row():
 
49
  ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
 
 
 
50
  ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
51
- llm_output_001 = gr.JSON(label="AiLLM-001 解析結果")
52
- llm_output_002 = gr.JSON(label="AiLLM-002 解析結果")
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
55
- outputs=[preprocess_output_001, preprocess_output_002, ocr_output_001, ocr_output_002])
 
 
 
 
56
  llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
57
- outputs=[preprocess_output_001, preprocess_output_002, llm_output_001, llm_output_002])
 
 
 
 
58
 
59
  demo.launch(share=False)
 
2
  import gradio as gr
3
  from Plan.AiLLM import llm_recognition
4
  from Plan.pytesseractOCR import ocr_recognition
5
+ from Preprocess.preprocessImg import (
6
+ preprocess_image001, preprocess_image002, preprocess_image003,
7
+ preprocess_image004, preprocess_image005
8
+ )
9
 
10
  # 取得所有語言清單
11
  languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
 
18
  # 方案二
19
  pre_img_002 = preprocess_image002(image)
20
  ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
21
+ # 方案三
22
+ pre_img_003 = preprocess_image003(image)
23
+ ocr_result_003 = ocr_recognition(pre_img_003, valid_type, language)
24
+ # 方案四
25
+ pre_img_004 = preprocess_image004(image)
26
+ ocr_result_004 = ocr_recognition(pre_img_004, valid_type, language)
27
+ # 方案五
28
+ pre_img_005 = preprocess_image005(image)
29
+ ocr_result_005 = ocr_recognition(pre_img_005, valid_type, language)
30
 
31
+ return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
32
+ ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005)
33
 
34
 
35
  def preprocess_and_llm(image, valid_type, language):
 
39
  # 方案二
40
  pre_img_002 = preprocess_image002(image)
41
  llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
42
+ # 方案三
43
+ pre_img_003 = preprocess_image003(image)
44
+ llm_result_003 = llm_recognition(pre_img_003, valid_type, language)
45
+ # 方案四
46
+ pre_img_004 = preprocess_image004(image)
47
+ llm_result_004 = llm_recognition(pre_img_004, valid_type, language)
48
+ # 方案五
49
+ pre_img_005 = preprocess_image005(image)
50
+ llm_result_005 = llm_recognition(pre_img_005, valid_type, language)
51
 
52
+ return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
53
+ llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005)
54
 
55
 
56
  with gr.Blocks() as demo:
57
  with gr.Row():
58
  image_input = gr.Image(type="pil", label="上傳圖片")
 
 
 
 
59
  validation_type = gr.Dropdown(choices=["身分證正面", "身分證反面"], label="驗證類別")
60
  language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
 
61
 
62
  with gr.Row():
63
  ocr_button = gr.Button("使用 OCR")
64
  llm_button = gr.Button("使用 AI LLM")
65
 
66
  with gr.Row():
67
+ preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
68
  ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
69
+ llm_output_001 = gr.JSON(label="AiLLM-001-解析結果")
70
+ with gr.Row():
71
+ preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
72
  ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
73
+ llm_output_002 = gr.JSON(label="AiLLM-002-解析結果")
74
+
75
+ with gr.Row():
76
+ preprocess_output_003 = gr.Image(type="pil", label="預處理後的圖片-方案三")
77
+ ocr_output_003 = gr.JSON(label="OCR-003-解析結果")
78
+ llm_output_003 = gr.JSON(label="AiLLM-003-解析結果")
79
+ with gr.Row():
80
+ preprocess_output_004 = gr.Image(type="pil", label="預處理後的圖片-方案四")
81
+ ocr_output_004 = gr.JSON(label="OCR-004-解析結果")
82
+ llm_output_004 = gr.JSON(label="AiLLM-004-解析結果")
83
+ with gr.Row():
84
+ preprocess_output_005 = gr.Image(type="pil", label="預處理後的圖片-方案五")
85
+ ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
86
+ llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
87
 
88
  ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
89
+ outputs=[
90
+ preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
91
+ preprocess_output_005,
92
+ ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005
93
+ ])
94
  llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
95
+ outputs=[
96
+ preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
97
+ preprocess_output_005,
98
+ llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005
99
+ ])
100
 
101
  demo.launch(share=False)
requirements.txt CHANGED
@@ -4,4 +4,5 @@ transformers
4
  Pillow
5
  torch
6
  huggingface-hub
7
- opencv-python
 
 
4
  Pillow
5
  torch
6
  huggingface-hub
7
+ opencv-python
8
+ numpy