from base64 import b64decode, b64encode from io import BytesIO from fastapi import FastAPI, File, Form from PIL import Image from transformers import pipeline description = """ ## DocQA with 🤗 transformers, FastAPI, and Docker This app shows how to do Document Question Answering using FastAPI in a Docker Space 🚀 Check out the docs for the `/predict` endpoint below to try it out! ## Usage #### Python ```python import requests url = "https://dog-fastapi-document-qa.hf.space/predict" r = requests.get( url, files={"image_file": open("invoice.png", "rb")}, data={"question": "What is the invoice number?"} ) print(r.json()) ``` #### Curl ```bash curl -X 'POST' \ 'https://dog-fastapi-document-qa.hf.space/predict' \ -H 'accept: application/json' \ -H 'Content-Type: multipart/form-data' \ -F 'image_file=@invoice.png;type=image/png' \ -F 'question=What is the invoice number?' ``` """ # NOTE - we configure docs_url to serve the interactive Docs at the root path # of the app. This way, we can use the docs as a landing page for the app on Spaces. app = FastAPI(docs_url="/", description=description) pipe = pipeline("document-question-answering", model="impira/layoutlm-document-qa") @app.post("/predict") def predict(image_file: bytes = File(...), question: str = Form(...)): """ Using the document-question-answering pipeline from `transformers`, take a given input document (image) and a question about it, and return the predicted answer. The model used is available on the hub at: [`impira/layoutlm-document-qa`](https://huggingface.co./impira/layoutlm-document-qa). """ image = Image.open(BytesIO(image_file)) output = pipe(image, question) return output