Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Sleeping

File size: 1,407 Bytes

b42b1aa
8434495
f2460f7
c920662
dec293d
9184993
dec293d
e8ba698
 
 
 
8434495
dec293d
e8ba698
 
3534c83
dec293d
 
e8ba698
dec293d
e8ba698
 
76581dc
e8ba698
 
 
8434495
e8ba698
dec293d
e8ba698
 
 
dec293d
 
 
 
 
ce4c6de

import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import gradio as gr
import tempfile

# Load the OCR model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, 
                                  low_cpu_mem_usage=True, 
                                  pad_token_id=tokenizer.eos_token_id).eval()

# Check if GPU is available and use it, else use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Function to perform OCR on the image
def perform_ocr(image):
    # Save the image to a temporary file
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
        image.save(temp_file.name)  # Save the image
        temp_image_path = temp_file.name  # Get the file path for the saved image

    # Perform OCR using the model
    result = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
    return result

# Create the Gradio interface using the new syntax
interface = gr.Interface(
    fn=perform_ocr, 
    inputs=gr.Image(type="pil"),  # Updated to gr.Image
    outputs=gr.Textbox(),         # Updated to gr.Textbox
    title="OCR Web App",
    description="Upload an image to extract text using the GOT-OCR2.0 model."
)

# Launch the app
interface.launch()