kavg commited on
Commit
415bf3c
1 Parent(s): 71e5e9f

Implementation done

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. Dockerfile +21 -0
  3. config.py +8 -0
  4. main.py +47 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ trocr-env/
2
+ .env
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.10
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ RUN useradd -m -u 1000 user
13
+ USER user
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH
16
+
17
+ WORKDIR $HOME/app
18
+
19
+ COPY --chown=user . $HOME/app
20
+
21
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
config.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+ from pydantic import Field
3
+
4
+ class Settings(BaseSettings):
5
+ model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8')
6
+ OCR_MODEL: str
7
+ TOKENIZER: str
8
+ FEATURE_EXTRACTOR: str
main.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import Settings
2
+ import torch
3
+ from PIL import Image
4
+ import io
5
+ from contextlib import asynccontextmanager
6
+ from transformers import VisionEncoderDecoderModel
7
+ from fastapi import FastAPI, UploadFile, Form, HTTPException
8
+ from transformers import TrOCRProcessor, AutoTokenizer, ViTImageProcessor
9
+
10
+ config = {}
11
+
12
+ @asynccontextmanager
13
+ async def lifespan(app: FastAPI):
14
+ settings = Settings()
15
+ config['settings'] = settings
16
+ config['device'] = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
+ tokenizer = AutoTokenizer.from_pretrained(settings.TOKENIZER)
18
+ feature_extractor = ViTImageProcessor.from_pretrained(settings.FEATURE_EXTRACTOR)
19
+ config['processor'] = TrOCRProcessor(image_processor=feature_extractor, tokenizer=tokenizer)
20
+ config['ocr_model'] = VisionEncoderDecoderModel.from_pretrained(settings.OCR_MODEL)
21
+
22
+ yield
23
+ # Clean up and release the resources
24
+ config.clear()
25
+
26
+ app = FastAPI(lifespan=lifespan)
27
+
28
+ @app.get("/")
29
+ def api_home():
30
+ return {'detail': 'Welcome to Sinhala OCR Space'}
31
+
32
+ app.post("/apply-trocr")
33
+ async def ApplyOCR(file: UploadFile):
34
+ try:
35
+ # Read the uploaded image file
36
+ contents = await file.read()
37
+ image = Image.open(io.BytesIO(contents))
38
+
39
+ pixel_values = config['processor'](image, return_tensors="pt").pixel_values
40
+ generated_ids = config['ocr_model'].generate(pixel_values)
41
+ generated_text = config['processor'].batch_decode(generated_ids, skip_special_tokens=True)[0]
42
+
43
+ # Return the extracted text as the response
44
+ return {"ocr_result": generated_text}
45
+ except Exception as e:
46
+ # Handle any exceptions that may occur
47
+ return {"error": str(e)}