import gradio as gr from transformers import TrOCRProcessor, VisionEncoderDecoderModel import requests from PIL import Image # Dictionary of model names and their corresponding HuggingFace model IDs MODEL_OPTIONS = { "Microsoft Handwritten": "microsoft/trocr-base-handwritten", "Medieval Base": "medieval-data/trocr-medieval-base", "Medieval Latin Caroline": "medieval-data/trocr-medieval-latin-caroline", "Medieval Castilian Hybrida": "medieval-data/trocr-medieval-castilian-hybrida", "Medieval Humanistica": "medieval-data/trocr-medieval-humanistica", "Medieval Textualis": "medieval-data/trocr-medieval-textualis", "Medieval Cursiva": "medieval-data/trocr-medieval-cursiva", "Medieval Semitextualis": "medieval-data/trocr-medieval-semitextualis", "Medieval Praegothica": "medieval-data/trocr-medieval-praegothica", "Medieval Semihybrida": "medieval-data/trocr-medieval-semihybrida", "Medieval Print": "medieval-data/trocr-medieval-print" } # Load image examples urls = [ 'https://huggingface.co./medieval-data/trocr-medieval-base/resolve/main/images/caroline-1.png' ] for idx, url in enumerate(urls): image = Image.open(requests.get(url, stream=True).raw) image.save(f"image_{idx}.png") def load_model(model_name): model_id = MODEL_OPTIONS[model_name] processor = TrOCRProcessor.from_pretrained(model_id) model = VisionEncoderDecoderModel.from_pretrained(model_id) return processor, model def process_image(image, model_name): processor, model = load_model(model_name) # prepare image pixel_values = processor(image, return_tensors="pt").pixel_values # generate (no beam search) generated_ids = model.generate(pixel_values) # decode generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text title = "Interactive demo: TrOCR Model Switcher" description = "Demo for the Medieval TrOCR HTR Models." iface = gr.Interface( fn=process_image, inputs=[ gr.Image(type="pil"), gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Select Model") ], outputs=gr.Textbox(), title=title, description=description, examples=[ ["image_0.png", "Medieval Latin Caroline"] ] ) iface.launch(debug=True, share=True)