PerryCheng614 commited on
Commit
76578bc
·
1 Parent(s): 99e4ea3

initial check-in gradio vlm UI

Browse files
Files changed (2) hide show
  1. app.py +85 -0
  2. example_images/example_1.jpg +0 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import websockets
3
+ import asyncio
4
+ import json
5
+ import base64
6
+ from PIL import Image
7
+ import io
8
+
9
+ async def process_image_stream(image_path, prompt, max_tokens=512):
10
+ """
11
+ Process image with streaming response via WebSocket
12
+ """
13
+ if not image_path:
14
+ yield "Please upload an image first."
15
+ return
16
+
17
+ try:
18
+ # Read and convert image to base64
19
+ with Image.open(image_path) as img:
20
+ img = img.convert('RGB')
21
+ buffer = io.BytesIO()
22
+ img.save(buffer, format="JPEG")
23
+ base64_image = base64.b64encode(buffer.getvalue()).decode('utf-8')
24
+
25
+ # Connect to WebSocket
26
+ async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-image/') as websocket:
27
+ # Send image data and parameters as JSON
28
+ await websocket.send(json.dumps({
29
+ "image": f"data:image/jpeg;base64,{base64_image}",
30
+ "prompt": prompt,
31
+ "task": "instruct", # Fixed to instruct
32
+ "max_tokens": max_tokens
33
+ }))
34
+
35
+ # Initialize response
36
+ response = ""
37
+
38
+ # Receive streaming response
39
+ async for message in websocket:
40
+ try:
41
+ data = json.loads(message)
42
+ if data["status"] == "generating":
43
+ response += data["token"]
44
+ yield response
45
+ elif data["status"] == "complete":
46
+ break
47
+ elif data["status"] == "error":
48
+ yield f"Error: {data['error']}"
49
+ break
50
+ except json.JSONDecodeError:
51
+ continue
52
+
53
+ except Exception as e:
54
+ yield f"Error connecting to server: {str(e)}"
55
+
56
+ # Create Gradio interface
57
+ demo = gr.Interface(
58
+ fn=process_image_stream,
59
+ inputs=[
60
+ gr.Image(type="filepath", label="Upload Image"),
61
+ gr.Textbox(
62
+ label="Question",
63
+ placeholder="Ask a question about the image...",
64
+ value="Describe this image"
65
+ ),
66
+ gr.Slider(
67
+ minimum=50,
68
+ maximum=200,
69
+ value=200,
70
+ step=1,
71
+ label="Max Tokens"
72
+ )
73
+ ],
74
+ outputs=gr.Textbox(label="Response", interactive=False),
75
+ title="Nexa Omni Vision",
76
+ description="""
77
+ Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries.
78
+ """,
79
+ examples=[
80
+ ["example_images/example_1.jpg", "Describe this image", 128],
81
+ ]
82
+ )
83
+
84
+ if __name__ == "__main__":
85
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)
example_images/example_1.jpg ADDED