Spaces:
Runtime error
Runtime error
Implementation done
Browse files- .gitignore +2 -0
- Dockerfile +21 -0
- config.py +8 -0
- main.py +47 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
trocr-env/
|
2 |
+
.env
|
Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.10
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
+
|
12 |
+
RUN useradd -m -u 1000 user
|
13 |
+
USER user
|
14 |
+
ENV HOME=/home/user \
|
15 |
+
PATH=/home/user/.local/bin:$PATH
|
16 |
+
|
17 |
+
WORKDIR $HOME/app
|
18 |
+
|
19 |
+
COPY --chown=user . $HOME/app
|
20 |
+
|
21 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
config.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
2 |
+
from pydantic import Field
|
3 |
+
|
4 |
+
class Settings(BaseSettings):
|
5 |
+
model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8')
|
6 |
+
OCR_MODEL: str
|
7 |
+
TOKENIZER: str
|
8 |
+
FEATURE_EXTRACTOR: str
|
main.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from config import Settings
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
import io
|
5 |
+
from contextlib import asynccontextmanager
|
6 |
+
from transformers import VisionEncoderDecoderModel
|
7 |
+
from fastapi import FastAPI, UploadFile, Form, HTTPException
|
8 |
+
from transformers import TrOCRProcessor, AutoTokenizer, ViTImageProcessor
|
9 |
+
|
10 |
+
config = {}
|
11 |
+
|
12 |
+
@asynccontextmanager
|
13 |
+
async def lifespan(app: FastAPI):
|
14 |
+
settings = Settings()
|
15 |
+
config['settings'] = settings
|
16 |
+
config['device'] = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained(settings.TOKENIZER)
|
18 |
+
feature_extractor = ViTImageProcessor.from_pretrained(settings.FEATURE_EXTRACTOR)
|
19 |
+
config['processor'] = TrOCRProcessor(image_processor=feature_extractor, tokenizer=tokenizer)
|
20 |
+
config['ocr_model'] = VisionEncoderDecoderModel.from_pretrained(settings.OCR_MODEL)
|
21 |
+
|
22 |
+
yield
|
23 |
+
# Clean up and release the resources
|
24 |
+
config.clear()
|
25 |
+
|
26 |
+
app = FastAPI(lifespan=lifespan)
|
27 |
+
|
28 |
+
@app.get("/")
|
29 |
+
def api_home():
|
30 |
+
return {'detail': 'Welcome to Sinhala OCR Space'}
|
31 |
+
|
32 |
+
app.post("/apply-trocr")
|
33 |
+
async def ApplyOCR(file: UploadFile):
|
34 |
+
try:
|
35 |
+
# Read the uploaded image file
|
36 |
+
contents = await file.read()
|
37 |
+
image = Image.open(io.BytesIO(contents))
|
38 |
+
|
39 |
+
pixel_values = config['processor'](image, return_tensors="pt").pixel_values
|
40 |
+
generated_ids = config['ocr_model'].generate(pixel_values)
|
41 |
+
generated_text = config['processor'].batch_decode(generated_ids, skip_special_tokens=True)[0]
|
42 |
+
|
43 |
+
# Return the extracted text as the response
|
44 |
+
return {"ocr_result": generated_text}
|
45 |
+
except Exception as e:
|
46 |
+
# Handle any exceptions that may occur
|
47 |
+
return {"error": str(e)}
|