import gradio as gr import numpy as np #from transformers import TrOCRProcessor, VisionEncoderDecoderModel from transformers import AutoProcessor, AutoModelForDocumentQuestionAnswering from PIL import Image #client = InferenceClient("models/microsoft/trocr-base-handwritten") processor = AutoProcessor.from_pretrained("Sharka/CIVQA_LayoutLMv2_EasyOCR") model = AutoModelForDocumentQuestionAnswering.from_pretrained("Sharka/CIVQA_LayoutLMv2_EasyOCR") def sepia(input_img): sepia_filter = np.array([ [0.393, 0.769, 0.189], [0.349, 0.686, 0.168], [0.272, 0.534, 0.131] ]) sepia_img = input_img.dot(sepia_filter.T) sepia_img /= sepia_img.max() sepia_values = repr(sepia_img) return sepia_img, sepia_values ## https://www.gradio.app/docs/gradio/blocks ## required positional arguments: 'inputs' and 'outputs' def process_image(image): try: pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text except Exception as e: return f"Error: {str(e)}" def additional_input(text): return f"Additional input received: {text}" sepia_interface = gr.Interface(sepia, gr.Image(), "image") with gr.Blocks() as generated_output: with gr.Column(): sepia_values_text=gr.Textbox(label="Sepia Values") output_img = gr.Image(label="Output Image") gr.Interface(fn=sepia, inputs=gr.Image( #this makes the camera stream live sources=["webcam"], streaming=True ), outputs=[output_img, sepia_values_text], live=True, show_progress="full") with gr.Row(): output_img.change( fn=process_image, inputs=output_img, outputs=gr.Textbox(label="Recognized Text"), show_progress="full") #with gr.Blocks() as generated_output: # inp = gr.Interface(sepia, gr.Image(), "image") # out = gr.Textbox() #demo = gr.TabbedInterface([sepia_interface, generated_output], ["RGB Sepia Filter", "Handwritten to Text"]) #with gr.Blocks() as demo: # with gr.Row(): # input_img = gr.Image(label="Input Image") # submit_button = gr.Button("Submit") # output_img = gr.Image(label="Output Image") # sepia_values_text = gr.Textbox(label="Sepia Values") # submit_button.click(sepia, inputs=input_img, outputs=[output_img, sepia_values_text]) if __name__ == "__main__": generated_output.launch()