medieval-htr / app.py
wjbmattingly's picture
Update app.py
63782be verified
raw
history blame
2.33 kB
import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests
from PIL import Image
# Dictionary of model names and their corresponding HuggingFace model IDs
MODEL_OPTIONS = {
"Microsoft Handwritten": "microsoft/trocr-base-handwritten",
"Medieval Base": "medieval-data/trocr-medieval-base",
"Medieval Latin Caroline": "medieval-data/trocr-medieval-latin-caroline",
"Medieval Castilian Hybrida": "medieval-data/trocr-medieval-castilian-hybrida",
"Medieval Humanistica": "medieval-data/trocr-medieval-humanistica",
"Medieval Textualis": "medieval-data/trocr-medieval-textualis",
"Medieval Cursiva": "medieval-data/trocr-medieval-cursiva",
"Medieval Semitextualis": "medieval-data/trocr-medieval-semitextualis",
"Medieval Praegothica": "medieval-data/trocr-medieval-praegothica",
"Medieval Semihybrida": "medieval-data/trocr-medieval-semihybrida",
"Medieval Print": "medieval-data/trocr-medieval-print"
}
# Load image examples
urls = [
'https://huggingface.co./medieval-data/trocr-medieval-base/resolve/main/images/caroline-1.png'
]
for idx, url in enumerate(urls):
image = Image.open(requests.get(url, stream=True).raw)
image.save(f"image_{idx}.png")
def load_model(model_name):
model_id = MODEL_OPTIONS[model_name]
processor = TrOCRProcessor.from_pretrained(model_id)
model = VisionEncoderDecoderModel.from_pretrained(model_id)
return processor, model
def process_image(image, model_name):
processor, model = load_model(model_name)
# prepare image
pixel_values = processor(image, return_tensors="pt").pixel_values
# generate (no beam search)
generated_ids = model.generate(pixel_values)
# decode
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text
title = "Interactive demo: TrOCR Model Switcher"
description = "Demo for the Medieval TrOCR HTR Models."
iface = gr.Interface(
fn=process_image,
inputs=[
gr.Image(type="pil"),
gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Select Model")
],
outputs=gr.Textbox(),
title=title,
description=description,
examples=[
["image_0.png", "Medieval Latin Caroline"]
]
)
iface.launch(debug=True, share=True)