File size: 2,901 Bytes
54baee8
 
e10566d
 
54baee8
 
 
dbd7548
 
 
 
54baee8
 
 
 
 
 
 
 
 
 
 
 
 
 
ef49bab
54baee8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import numpy as np
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
#from transformers import AutoProcessor, AutoModelForDocumentQuestionAnswering
from PIL import Image

#client = InferenceClient("models/microsoft/trocr-base-handwritten")
#processor = AutoProcessor.from_pretrained("microsoft/trocr-base-handwritten")
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
#model = AutoModelForDocumentQuestionAnswering.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')

def sepia(input_img):
    sepia_filter = np.array([
        [0.393, 0.769, 0.189],
        [0.349, 0.686, 0.168],
        [0.272, 0.534, 0.131]
    ])
    sepia_img = input_img.dot(sepia_filter.T)
    sepia_img /= sepia_img.max()
    sepia_values = repr(sepia_img)
    return sepia_img, sepia_values


## https://www.gradio.app/docs/gradio/blocks
## required positional arguments: 'inputs' and 'outputs' 
def process_image(image):
    try:
        pixel_values = processor(images=image, return_tensors="pt").pixel_values
        
        generated_ids = model.generate(pixel_values)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return generated_text
    except Exception as e:
        return f"Error: {str(e)}"

def additional_input(text):
    return f"Additional input received: {text}"

sepia_interface = gr.Interface(sepia, gr.Image(), "image")

with gr.Blocks() as generated_output:
    with gr.Column():
        sepia_values_text=gr.Textbox(label="Sepia Values")
        output_img = gr.Image(label="Output Image")
        gr.Interface(fn=sepia,
                    inputs=gr.Image(
                        #this makes the camera stream live
                        sources=["webcam"], 
                        streaming=True
                    ), 
                    outputs=[output_img, sepia_values_text],
                    live=True,
                    show_progress="full")
    with gr.Row():
        output_img.change(
            fn=process_image,
            inputs=output_img,
            outputs=gr.Textbox(label="Recognized Text"),
            show_progress="full")
#with gr.Blocks() as generated_output:
#    inp = gr.Interface(sepia, gr.Image(), "image")
#    out = gr.Textbox()


#demo = gr.TabbedInterface([sepia_interface, generated_output], ["RGB Sepia Filter", "Handwritten to Text"])

#with gr.Blocks() as demo:
#    with gr.Row():
#        input_img = gr.Image(label="Input Image")
#        submit_button = gr.Button("Submit")
#        output_img = gr.Image(label="Output Image")
#        sepia_values_text = gr.Textbox(label="Sepia Values")

#    submit_button.click(sepia, inputs=input_img, outputs=[output_img, sepia_values_text])
    
if __name__ == "__main__":
    generated_output.launch()