Vinay15 commited on
Commit
8434495
·
verified ·
1 Parent(s): f2460f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -31
app.py CHANGED
@@ -1,38 +1,46 @@
1
  import gradio as gr
 
 
2
  from PIL import Image
3
- # Assuming 'model' and 'tokenizer' are defined elsewhere in your code
4
- # from your_model_file import model, tokenizer
5
-
6
- def load_image(image_file):
7
- """Load and preprocess the image."""
8
- if isinstance(image_file, Image.Image): # Check if the input is an Image object
9
- return image_file.convert("RGB") # Convert to RGB if necessary
10
- elif isinstance(image_file, str) and (image_file.startswith('http') or image_file.startswith('https')):
11
- # Handle URL case (you can use an external library to fetch the image if needed)
12
- return Image.open(requests.get(image_file, stream=True).raw).convert("RGB")
13
- else:
14
- # Handle file path case
15
- return Image.open(image_file).convert("RGB")
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def perform_ocr(image):
18
- """Perform OCR on the uploaded image."""
19
- try:
20
- # Load and preprocess the image
21
- processed_image = load_image(image)
22
- # Use the model for OCR
23
- res = model.chat(tokenizer, processed_image, ocr_type='ocr')
24
- return res
25
- except Exception as e:
26
- return str(e) # Return the error message
27
-
28
- # Gradio interface setup
29
- iface = gr.Interface(
30
  fn=perform_ocr,
31
- inputs=gr.Image(type="pil"), # Ensure Gradio accepts images as PIL images
32
- outputs="text",
33
- title="OCR Application",
34
- description="Upload an image to perform Optical Character Recognition (OCR)."
35
  )
36
 
37
- if _name_ == "_main_":
38
- iface.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModel, AutoTokenizer
4
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Check GPU availability
7
+ if torch.cuda.is_available():
8
+ print("CUDA is available! GPU is present.")
9
+ print(f"Number of GPUs: {torch.cuda.device_count()}")
10
+ print(f"GPU Name: {torch.cuda.get_device_name(0)}")
11
+ else:
12
+ print("CUDA is not available. Running on CPU.")
13
+
14
+ # Load the tokenizer and model
15
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
16
+
17
+ # Initialize the model
18
+ if torch.cuda.is_available():
19
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
20
+ model = model.eval().cuda()
21
+ else:
22
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, pad_token_id=tokenizer.eos_token_id)
23
+ model = model.eval() # Keep model on CPU
24
+
25
+ # Define the OCR function
26
  def perform_ocr(image):
27
+ # Convert PIL image to RGB format (if necessary)
28
+ if image.mode != "RGB":
29
+ image = image.convert("RGB")
30
+
31
+ # Perform OCR using the model
32
+ res = model.chat(tokenizer, image, ocr_type='ocr')
33
+
34
+ return res
35
+
36
+ # Define the Gradio interface
37
+ interface = gr.Interface(
 
38
  fn=perform_ocr,
39
+ inputs=gr.Image(type="pil", label="Upload Image"),
40
+ outputs=gr.Textbox(label="Extracted Text"),
41
+ title="OCR and Document Search Web Application",
42
+ description="Upload an image to extract text using the GOT-OCR2_0 model."
43
  )
44
 
45
+ # Launch the Gradio app
46
+ interface.launch()