Spaces:

wjbmattingly
/

medieval-htr-page

Running on Zero

wjbmattingly commited on Aug 9

Commit

3fc0241

•

1 Parent(s): 546d56f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image, ImageDraw
 import os
 import tempfile
 import numpy as np
 # Dictionary of model names and their corresponding HuggingFace model IDs
 MODEL_OPTIONS = {
     "Microsoft Handwritten": "microsoft/trocr-base-handwritten",
@@ -36,12 +37,12 @@ def load_model(model_name):
         current_model = VisionEncoderDecoderModel.from_pretrained(model_id)
         current_model_name = model_name
-        # Move model to GPU
-        current_model = current_model.to('cuda')
     return current_processor, current_model
 def process_image(image, model_name):
     # Save the uploaded image to a temporary file
     with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_img:
@@ -59,6 +60,9 @@ def process_image(image, model_name):
     processor, model = load_model(model_name)
     # Process each line
     transcriptions = []
     for line in lines_data['lines']:
@@ -79,7 +83,7 @@ def process_image(image, model_name):
         # Prepare image for TrOCR
         pixel_values = processor(images=line_image_np, return_tensors="pt").pixel_values
-        pixel_values = pixel_values.to('cuda')
         # Generate (no beam search)
         with torch.no_grad():
@@ -117,4 +121,4 @@ with gr.Blocks() as iface:
     submit_button = gr.Button("Transcribe")
     submit_button.click(fn=process_image, inputs=[input_image, model_dropdown], outputs=[output_image, transcription_output])
-iface.launch()

 import os
 import tempfile
 import numpy as np
 # Dictionary of model names and their corresponding HuggingFace model IDs
 MODEL_OPTIONS = {
     "Microsoft Handwritten": "microsoft/trocr-base-handwritten",
         current_model = VisionEncoderDecoderModel.from_pretrained(model_id)
         current_model_name = model_name
+        # Move model to GPU if available, else use CPU
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        current_model = current_model.to(device)
     return current_processor, current_model
 def process_image(image, model_name):
     # Save the uploaded image to a temporary file
     with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_img:
     processor, model = load_model(model_name)
+    # Determine device
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     # Process each line
     transcriptions = []
     for line in lines_data['lines']:
         # Prepare image for TrOCR
         pixel_values = processor(images=line_image_np, return_tensors="pt").pixel_values
+        pixel_values = pixel_values.to(device)
         # Generate (no beam search)
         with torch.no_grad():
     submit_button = gr.Button("Transcribe")
     submit_button.click(fn=process_image, inputs=[input_image, model_dropdown], outputs=[output_image, transcription_output])
+iface.launch(share=True)