Daemontatox commited on
Commit
0b4a56a
·
verified ·
1 Parent(s): df7e1e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -27
app.py CHANGED
@@ -10,15 +10,19 @@ from openai import OpenAI
10
  inference_api_key = os.environ.get("HF_TOKEN")
11
  chat_api_key = os.environ.get("HF_TOKEN")
12
 
13
- # Global variable to store the image as a data URL so it can be used in subsequent chat calls.
14
  global_image_data_url = None
 
15
 
16
  def generate_image_fn(selected_prompt):
17
  """
18
  Uses the Hugging Face Inference API to generate an image from the selected prompt.
19
- Converts the image to a data URL so that it can be embedded in the chat request.
20
  """
21
- global global_image_data_url
 
 
 
22
 
23
  # Create an inference client for text-to-image (Stable Diffusion)
24
  image_client = InferenceClient(
@@ -37,46 +41,36 @@ def generate_image_fn(selected_prompt):
37
  image.save(buffered, format="PNG")
38
  img_bytes = buffered.getvalue()
39
  img_b64 = base64.b64encode(img_bytes).decode("utf-8")
40
- data_url = f"data:image/png;base64,{img_b64}"
41
- global_image_data_url = data_url
42
 
43
  return image
44
 
45
  def chat_about_image_fn(user_input):
46
  """
47
- Sends the user's text message and the current image (as a data URL) to a vision-chat model.
48
- Returns the model's response.
49
  """
50
  if not global_image_data_url:
51
  return "Please generate an image first."
52
 
53
- # Create the messages payload. The payload contains the user's text
54
- # along with the image in a field named "image_url" (using our data URL).
55
  messages = [
56
  {
57
  "role": "user",
58
  "content": [
59
- {
60
- "type": "text",
61
- "text": user_input
62
- },
63
  {
64
  "type": "image_url",
65
- "image_url": {
66
- "url": global_image_data_url
67
- }
68
  }
69
  ]
70
  }
71
  ]
72
 
73
- # Create a client for the vision-chat model
74
  chat_client = OpenAI(
75
  base_url="https://api-inference.huggingface.co/v1/",
76
  api_key=chat_api_key # Loaded from env secrets
77
  )
78
 
79
- # Call the chat completions API. Here we use streaming to accumulate the full response.
80
  stream = chat_client.chat.completions.create(
81
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
82
  messages=messages,
@@ -84,7 +78,52 @@ def chat_about_image_fn(user_input):
84
  stream=True
85
  )
86
 
87
- # Concatenate all streamed chunks of the response.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  response_text = ""
89
  for chunk in stream:
90
  response_text += chunk.choices[0].delta.content
@@ -107,8 +146,8 @@ prompt_options = [
107
 
108
  # Define the Gradio interface using Blocks.
109
  with gr.Blocks() as demo:
110
- gr.Markdown("# Image Generation and Chat")
111
-
112
  with gr.Row():
113
  with gr.Column():
114
  gr.Markdown("## Generate Image")
@@ -117,14 +156,26 @@ with gr.Blocks() as demo:
117
  img_output = gr.Image(label="Generated Image")
118
  with gr.Column():
119
  gr.Markdown("## Chat about the Image")
120
- chat_input = gr.Textbox(label="Enter your message about the image", placeholder="Ask a question or comment about the image...")
 
 
 
121
  chat_output = gr.Textbox(label="Chat Response")
122
-
123
- # When the "Generate Image" button is clicked, call generate_image_fn with the selected prompt.
 
 
 
 
 
 
 
 
 
 
 
124
  generate_btn.click(generate_image_fn, inputs=prompt_dropdown, outputs=img_output)
125
-
126
- # When the user submits a message in the chat textbox, call chat_about_image_fn.
127
- chat_input.submit(chat_about_image_fn, inputs=chat_input, outputs=chat_output)
128
 
129
  # Launch the app. (Hugging Face Spaces will detect and run this.)
130
  demo.launch()
 
10
  inference_api_key = os.environ.get("HF_TOKEN")
11
  chat_api_key = os.environ.get("HF_TOKEN")
12
 
13
+ # Global variables to store the generated image (as a data URL) and the prompt used
14
  global_image_data_url = None
15
+ global_image_prompt = None
16
 
17
  def generate_image_fn(selected_prompt):
18
  """
19
  Uses the Hugging Face Inference API to generate an image from the selected prompt.
20
+ Converts the image to a data URL for later use, and stores the prompt globally.
21
  """
22
+ global global_image_data_url, global_image_prompt
23
+
24
+ # Store the chosen prompt for later use in detail checking
25
+ global_image_prompt = selected_prompt
26
 
27
  # Create an inference client for text-to-image (Stable Diffusion)
28
  image_client = InferenceClient(
 
41
  image.save(buffered, format="PNG")
42
  img_bytes = buffered.getvalue()
43
  img_b64 = base64.b64encode(img_bytes).decode("utf-8")
44
+ global_image_data_url = f"data:image/png;base64,{img_b64}"
 
45
 
46
  return image
47
 
48
  def chat_about_image_fn(user_input):
49
  """
50
+ Sends the user's chat message along with the current image (as a data URL)
51
+ to a vision‑chat model, and returns the model's response.
52
  """
53
  if not global_image_data_url:
54
  return "Please generate an image first."
55
 
 
 
56
  messages = [
57
  {
58
  "role": "user",
59
  "content": [
60
+ {"type": "text", "text": user_input},
 
 
 
61
  {
62
  "type": "image_url",
63
+ "image_url": {"url": global_image_data_url}
 
 
64
  }
65
  ]
66
  }
67
  ]
68
 
 
69
  chat_client = OpenAI(
70
  base_url="https://api-inference.huggingface.co/v1/",
71
  api_key=chat_api_key # Loaded from env secrets
72
  )
73
 
 
74
  stream = chat_client.chat.completions.create(
75
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
76
  messages=messages,
 
78
  stream=True
79
  )
80
 
81
+ response_text = ""
82
+ for chunk in stream:
83
+ response_text += chunk.choices[0].delta.content
84
+
85
+ return response_text
86
+
87
+ def check_details_fn(user_details):
88
+ """
89
+ Compares the user's description of the generated image with the prompt used to generate it.
90
+ The function sends both the original prompt and the user description to the vision-chat model,
91
+ which responds whether the description is correct and (if not) provides a hint.
92
+ """
93
+ if not global_image_prompt:
94
+ return "Please generate an image first."
95
+
96
+ # Build a message to instruct the model to evaluate the user's details.
97
+ # The message asks the model to check whether the description covers the key elements of the prompt.
98
+ messages = [
99
+ {
100
+ "role": "user",
101
+ "content": [
102
+ {
103
+ "type": "text",
104
+ "text": (
105
+ f"The image was generated using the prompt: '{global_image_prompt}'.\n"
106
+ f"Evaluate the following user description of the image: '{user_details}'.\n"
107
+ "If the description is accurate and captures the key elements of the prompt, reply with 'Correct'. "
108
+ "If it is inaccurate or missing important details, reply with 'Incorrect' and provide a hint on what is missing."
109
+ )
110
+ }
111
+ ]
112
+ }
113
+ ]
114
+
115
+ chat_client = OpenAI(
116
+ base_url="https://api-inference.huggingface.co/v1/",
117
+ api_key=chat_api_key # Loaded from env secrets
118
+ )
119
+
120
+ stream = chat_client.chat.completions.create(
121
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
122
+ messages=messages,
123
+ max_tokens=100,
124
+ stream=True
125
+ )
126
+
127
  response_text = ""
128
  for chunk in stream:
129
  response_text += chunk.choices[0].delta.content
 
146
 
147
  # Define the Gradio interface using Blocks.
148
  with gr.Blocks() as demo:
149
+ gr.Markdown("# Image Generation, Chat, and Detail Check")
150
+
151
  with gr.Row():
152
  with gr.Column():
153
  gr.Markdown("## Generate Image")
 
156
  img_output = gr.Image(label="Generated Image")
157
  with gr.Column():
158
  gr.Markdown("## Chat about the Image")
159
+ chat_input = gr.Textbox(
160
+ label="Enter your message about the image",
161
+ placeholder="Ask a question or comment about the image..."
162
+ )
163
  chat_output = gr.Textbox(label="Chat Response")
164
+ chat_input.submit(chat_about_image_fn, inputs=chat_input, outputs=chat_output)
165
+
166
+ # Row for checking the user's description of the generated image.
167
+ with gr.Row():
168
+ gr.Markdown("## Check Your Description of the Image")
169
+ details_input = gr.Textbox(
170
+ label="Enter details about the image",
171
+ placeholder="Describe the key elements of the image..."
172
+ )
173
+ check_details_btn = gr.Button("Check Details")
174
+ details_output = gr.Textbox(label="Result")
175
+
176
+ # Bind the button clicks to functions.
177
  generate_btn.click(generate_image_fn, inputs=prompt_dropdown, outputs=img_output)
178
+ check_details_btn.click(check_details_fn, inputs=details_input, outputs=details_output)
 
 
179
 
180
  # Launch the app. (Hugging Face Spaces will detect and run this.)
181
  demo.launch()