Daemontatox commited on
Commit
5d96bcc
·
verified ·
1 Parent(s): 524a2a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -35
app.py CHANGED
@@ -5,30 +5,93 @@ import os
5
  from PIL import Image
6
  from huggingface_hub import InferenceClient
7
  from openai import OpenAI
8
- # from dotenv import load_dotenv
9
 
10
- # load_dotenv()
11
  # Load API keys from environment variables
12
  inference_api_key = os.environ.get("HF_TOKEN")
13
  chat_api_key = os.environ.get("HF_TOKEN")
 
 
14
 
15
  # Global variable to store the image data URL and prompt for the currently generated image.
16
  global_image_data_url = None
17
  global_image_prompt = None # Still stored if needed elsewhere
18
 
19
- def generate_prompt_from_options(difficulty, age, level):
20
  """
21
  Uses the OpenAI chat model (via Hugging Face Inference API) to generate an image generation prompt
22
- based on the selected difficulty, age, and autism level.
23
  """
24
  query = (
25
- f"Generate an image generation prompt for an educational image intended for Autistic children. "
26
- f"Consider the following parameters:\n"
27
- f"- Difficulty: {difficulty}\n"
28
- f"- Age: {age}\n"
29
- f"- Autism Level: {level}\n\n"
30
- f"Make sure the prompt is clear, descriptive, and suitable for generating an image that "
31
- f"can be used to help children learn or understand a concept."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  )
33
 
34
  messages = [
@@ -39,14 +102,15 @@ def generate_prompt_from_options(difficulty, age, level):
39
  ]
40
 
41
  client = OpenAI(
42
- base_url="https://api-inference.huggingface.co/v1/",
43
- api_key=chat_api_key
44
  )
45
 
46
  stream = client.chat.completions.create(
47
- model="meta-llama/Llama-3.3-70B-Instruct",
 
48
  messages=messages,
49
- max_tokens=248,
50
  stream=True
51
  )
52
 
@@ -55,10 +119,15 @@ def generate_prompt_from_options(difficulty, age, level):
55
  response_text += chunk.choices[0].delta.content
56
  return response_text.strip()
57
 
58
- def generate_image_fn(selected_prompt):
59
  """
60
  Uses the Hugging Face Inference API to generate an image from the provided prompt.
61
  Converts the image to a data URL for later use and stores the prompt globally.
 
 
 
 
 
62
  """
63
  global global_image_data_url, global_image_prompt
64
 
@@ -72,7 +141,10 @@ def generate_image_fn(selected_prompt):
72
 
73
  image = image_client.text_to_image(
74
  selected_prompt,
75
- model="stabilityai/stable-diffusion-3.5-large-turbo"
 
 
 
76
  )
77
 
78
  buffered = io.BytesIO()
@@ -83,17 +155,17 @@ def generate_image_fn(selected_prompt):
83
 
84
  return image
85
 
86
- def generate_image_and_reset_chat(difficulty, age, level, active_session, saved_sessions):
87
  """
88
- Saves any current active session into the saved sessions list. Then, using the three selected options,
89
  generates an image generation prompt, creates an image, and starts a new active session.
90
  """
91
  new_sessions = saved_sessions.copy()
92
  if active_session.get("prompt"):
93
  new_sessions.append(active_session)
94
 
95
- generated_prompt = generate_prompt_from_options(difficulty, age, level)
96
- image = generate_image_fn(generated_prompt)
97
 
98
  new_active_session = {"prompt": generated_prompt, "image": global_image_data_url, "chat": []}
99
  return image, new_active_session, new_sessions
@@ -120,12 +192,20 @@ def compare_details_chat_fn(user_details):
120
  {
121
  "type": "text",
122
  "text": (
123
- f"Based on the image provided above, please evaluate the following description given by the user:\n"
124
  f"'{user_details}'\n\n"
125
- "Determine a correctness percentage for the description (without referencing the original prompt) "
126
- "and if the description is less than 75% accurate, provide useful hints for improvement."
127
- "Be concise not to overwhelm the user with information."
128
- "you are a kids assistant, so you should be able to explain the image in a simple way."
 
 
 
 
 
 
 
 
129
  )
130
  }
131
  ]
@@ -133,14 +213,14 @@ def compare_details_chat_fn(user_details):
133
  ]
134
 
135
  chat_client = OpenAI(
136
- base_url="https://api-inference.huggingface.co/v1/",
137
- api_key=chat_api_key
138
  )
139
 
140
  stream = chat_client.chat.completions.create(
141
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
142
  messages=messages,
143
- max_tokens=256,
144
  stream=True
145
  )
146
 
@@ -174,10 +254,9 @@ def update_sessions(saved_sessions, active_session):
174
  return saved_sessions
175
 
176
  ##############################################
177
- # Dropdown Options for Difficulty, Age, and Level
178
  ##############################################
179
- difficulty_options = ["Easy", "Medium", "Hard"]
180
- age_options = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20"]
181
  level_options = ["Level 1", "Level 2", "Level 3"]
182
 
183
  ##############################################
@@ -196,13 +275,20 @@ with gr.Blocks() as demo:
196
  gr.Markdown("Select options to create a custom prompt for image generation:")
197
  with gr.Row():
198
  difficulty_dropdown = gr.Dropdown(label="Difficulty", choices=difficulty_options, value=difficulty_options[0])
199
- age_dropdown = gr.Dropdown(label="Age", choices=age_options, value=age_options[0])
 
200
  level_dropdown = gr.Dropdown(label="Level", choices=level_options, value=level_options[0])
 
 
 
 
 
 
201
  generate_btn = gr.Button("Generate Image")
202
  img_output = gr.Image(label="Generated Image")
203
  generate_btn.click(
204
  generate_image_and_reset_chat,
205
- inputs=[difficulty_dropdown, age_dropdown, level_dropdown, active_session, saved_sessions],
206
  outputs=[img_output, active_session, saved_sessions]
207
  )
208
 
 
5
  from PIL import Image
6
  from huggingface_hub import InferenceClient
7
  from openai import OpenAI
8
+ from dotenv import load_dotenv
9
 
10
+ load_dotenv()
11
  # Load API keys from environment variables
12
  inference_api_key = os.environ.get("HF_TOKEN")
13
  chat_api_key = os.environ.get("HF_TOKEN")
14
+ chat_api_key2 = os.environ.get("OPENROUTER_TOKEN")
15
+
16
 
17
  # Global variable to store the image data URL and prompt for the currently generated image.
18
  global_image_data_url = None
19
  global_image_prompt = None # Still stored if needed elsewhere
20
 
21
+ def generate_prompt_from_options(difficulty, age, level, extra_details=""):
22
  """
23
  Uses the OpenAI chat model (via Hugging Face Inference API) to generate an image generation prompt
24
+ based on the selected difficulty, age, autism level, and any extra details the user provides.
25
  """
26
  query = (
27
+
28
+ f"""
29
+ Follow the instructions below to Generate an image generation prompt for an educational image intended for Autistic children.
30
+ Consider the following parameters:\n
31
+ - Difficulty: {difficulty}\n
32
+ - Age: {age}\n
33
+ - Autism Level: {level}\n
34
+ - Extra Details: {extra_details}\n\n
35
+ Use the following system prompt to guide the image generation process:\n
36
+
37
+ System Prompt:
38
+
39
+ You are an image generation engine specializing in creating clear, calming, and visually supportive images designed for children with autism spectrum disorder (ASD). Your primary goal is to produce images that aid in understanding, communication, emotional regulation, and daily routines. Prioritize the following characteristics:
40
+
41
+ **1. Clarity and Simplicity:**
42
+
43
+ # * **Minimalist Backgrounds:** Use solid, muted colors (e.g., soft blues, greens, light grays, pastels) or very simple, uncluttered backgrounds. Avoid busy patterns, highly contrasting colors, or distracting elements.
44
+ * **Clear Subject Focus:** The main subject of the image should be prominent and easily identifiable. Avoid unnecessary details that could cause confusion or sensory overload.
45
+ * **Unambiguous Representations:** Objects and people should be depicted in a realistic and straightforward manner. Avoid abstract art or overly stylized representations. If depicting emotions, make them very clear and easily recognizable (e.g., a simple, wide smile for happiness, a single tear for sadness).
46
+
47
+ **2. Visual Structure and Predictability:**
48
+
49
+ * **Literal Interpretation:** The images should be highly literal. Avoid metaphors, symbolism, or implied meanings. If depicting a sequence of events, make each step visually distinct.
50
+ * **Defined Borders:** Consider using clear outlines or borders around objects and people to enhance visual separation and definition.
51
+ * **Consistent Style:** Maintain a consistent visual style across multiple images. This helps build familiarity and predictability.
52
+
53
+ **3. Sensory Considerations:**
54
+
55
+ * **Soft Color Palette:** Favor muted, calming colors. Avoid overly bright, saturated, or fluorescent colors.
56
+ * **Reduced Visual Complexity:** Limit the number of elements in the image to prevent sensory overload.
57
+ * **Smooth Textures:** If textures are depicted, they should appear smooth and non-threatening. Avoid rough, jagged, or overly detailed textures.
58
+
59
+ **4. Positive and Supportive Imagery:**
60
+
61
+ * **Positive Reinforcement:** Images should be encouraging and positive. Depict success, cooperation, and positive social interactions.
62
+ * **Calm and Relaxing Scenes:** Consider scenes that promote calmness, such as nature scenes (e.g., a quiet forest, a calm beach), or familiar, safe environments (e.g., a cozy bedroom, a well-organized classroom).
63
+ * **Avoidance of Triggers:** Be mindful of potential triggers for anxiety or distress. Avoid images that depict conflict, overwhelming crowds, or potentially frightening situations.
64
+
65
+ **5. Specific Use Cases (Adapt as needed):**
66
+
67
+ * **Social Stories:** If generating images for a social story, ensure each image clearly illustrates a single step in the sequence. Use consistent characters and settings throughout the story.
68
+ * **Visual Schedules:** If creating images for a visual schedule, make each activity easily identifiable and visually distinct.
69
+ * **Emotion Recognition:** If depicting emotions, use clear facial expressions and body language. Consider using a consistent character to represent different emotions.
70
+ * **Communication Aids:** If creating images for communication, ensure the objects or actions are clearly depicted and easily recognizable.
71
+ * **Daily Routines**: Brushing teeth, eating food, going to school.
72
+ * **Learning concepts**: Shapes, colors, animals, numbers, alphabet.
73
+
74
+ **Prompting Instructions:**
75
+
76
+ When providing a prompt to the model, be as specific as possible, including:
77
+
78
+ * **The subject of the image:** "A boy brushing his teeth."
79
+ * **The desired style:** "Simple, clear, with a solid light blue background."
80
+ * **The intended use:** "For a visual schedule."
81
+ * **Any specific details:** "The boy should be smiling. The toothbrush should be blue."
82
+ * **Emotions:** Clearly state the emotion "happy" or "calm."
83
+
84
+ **Example Prompts (using the above system prompt as a base):**
85
+
86
+ * "Generate an image for a visual schedule. The subject is 'eating lunch.' Show a child sitting at a table with a plate of food (sandwich, apple slices, and a glass of milk). The background should be a solid, pale green. The child should be smiling. Use a clear, simple style with defined outlines."
87
+ * "Generate an image to help with emotion recognition. The subject is 'sad.' Show a child's face with a single tear rolling down their cheek and a downturned mouth. The background should be a solid, light gray. Use a simple, realistic style."
88
+ * "Generate an image for a social story about going to the doctor. Show a child sitting in a doctor's waiting room, calmly looking at a book. The room should have a few simple toys and a window. The background should be a soft blue. The style should be clear and uncluttered."
89
+ * "Generate a picture of two block shapes in a simple, cartoon style. One red square and one blue circle. Place them on a white background."
90
+ * "Generate a cartoon image of a dog. Make the dog appear to be friendly and non-threatening. Use warm colors."
91
+
92
+ Ensure your Prompts are acccurate and ensure the images are accurate and dont have any irregularities or deforamtions in them.
93
+ use descriptive and detailed prompts
94
+ """
95
  )
96
 
97
  messages = [
 
102
  ]
103
 
104
  client = OpenAI(
105
+ base_url="https://openrouter.ai/api/v1",
106
+ api_key=chat_api_key2
107
  )
108
 
109
  stream = client.chat.completions.create(
110
+ model="sophosympatheia/rogue-rose-103b-v0.2:free",
111
+ temperature=0.5,
112
  messages=messages,
113
+ max_tokens=8192,
114
  stream=True
115
  )
116
 
 
119
  response_text += chunk.choices[0].delta.content
120
  return response_text.strip()
121
 
122
+ def generate_image_fn(selected_prompt, guidance_scale=7.5, negative_prompt="ugly, blurry, poorly drawn hands , Lewd , nude", num_inference_steps=50):
123
  """
124
  Uses the Hugging Face Inference API to generate an image from the provided prompt.
125
  Converts the image to a data URL for later use and stores the prompt globally.
126
+
127
+ Additional parameters:
128
+ - guidance_scale: Influences how strongly the image generation adheres to the prompt.
129
+ - negative_prompt: Specifies undesirable elements to avoid in the generated image.
130
+ - num_inference_steps: The number of denoising steps for image generation.
131
  """
132
  global global_image_data_url, global_image_prompt
133
 
 
141
 
142
  image = image_client.text_to_image(
143
  selected_prompt,
144
+ model="stabilityai/stable-diffusion-3.5-large-turbo", #|| black-forest-labs/FLUX.1-dev || stabilityai/stable-diffusion-3.5-large-turbo
145
+ guidance_scale=guidance_scale,
146
+ negative_prompt=negative_prompt,
147
+ num_inference_steps=num_inference_steps
148
  )
149
 
150
  buffered = io.BytesIO()
 
155
 
156
  return image
157
 
158
+ def generate_image_and_reset_chat(difficulty, age, level, extra_details, active_session, saved_sessions):
159
  """
160
+ Saves any current active session into the saved sessions list. Then, using the three selected options and extra details,
161
  generates an image generation prompt, creates an image, and starts a new active session.
162
  """
163
  new_sessions = saved_sessions.copy()
164
  if active_session.get("prompt"):
165
  new_sessions.append(active_session)
166
 
167
+ generated_prompt = generate_prompt_from_options(difficulty, age, level, extra_details)
168
+ image = generate_image_fn(generated_prompt) # Uses default guidance_scale, negative_prompt, and num_inference_steps
169
 
170
  new_active_session = {"prompt": generated_prompt, "image": global_image_data_url, "chat": []}
171
  return image, new_active_session, new_sessions
 
192
  {
193
  "type": "text",
194
  "text": (
195
+ f"Based on the image provided above, please evaluate the following description given by the child:\n"
196
  f"'{user_details}'\n\n"
197
+ """
198
+ You are a friendly and encouraging teacher, guiding a child in describing an image. Speak directly to the child using simple, clear language. Provide positive reinforcement when the child gives a correct or accurate description.
199
+
200
+ If the child's description is incorrect or inaccurate, gently guide them with hints rather than direct corrections. Use Hint before providing guidance. Keep your hints playful and engaging to encourage curiosity.
201
+
202
+ Avoid repeating the child’s description. Instead, focus on giving feedback based on the image. If the description is correct, acknowledge it warmly with praise.
203
+
204
+ Keep the conversation going by asking open-ended questions about the image to encourage the child to observe and think more deeply. Use questions that spark curiosity, such as 'What else do you see?' or 'Why do you think that is happening?'
205
+
206
+ Do not mention your own thoughts, system prompts, or provide direct details about the image. Stay fully engaged in a natural, conversational way, making learning fun and interactive!
207
+
208
+ """
209
  )
210
  }
211
  ]
 
213
  ]
214
 
215
  chat_client = OpenAI(
216
+ base_url="https://openrouter.ai/api/v1", # https://openrouter.ai/api/v1 ||||| https://api-inference.huggingface.co/v1/
217
+ api_key=chat_api_key2
218
  )
219
 
220
  stream = chat_client.chat.completions.create(
221
+ model="google/gemini-2.0-flash-thinking-exp:free",
222
  messages=messages,
223
+ max_tokens=8192,
224
  stream=True
225
  )
226
 
 
254
  return saved_sessions
255
 
256
  ##############################################
257
+ # Dropdown Options for Difficulty and Level
258
  ##############################################
259
+ difficulty_options = ["Simple", "Average", "Detailed"]
 
260
  level_options = ["Level 1", "Level 2", "Level 3"]
261
 
262
  ##############################################
 
275
  gr.Markdown("Select options to create a custom prompt for image generation:")
276
  with gr.Row():
277
  difficulty_dropdown = gr.Dropdown(label="Difficulty", choices=difficulty_options, value=difficulty_options[0])
278
+ # Changed age input from a dropdown to a text box
279
+ age_input = gr.Textbox(label="Age", placeholder="Enter your age...", value="3")
280
  level_dropdown = gr.Dropdown(label="Level", choices=level_options, value=level_options[0])
281
+ # New textbox for extra details
282
+ extra_details_input = gr.Textbox(
283
+ label="Extra Details (optional)",
284
+ placeholder="Enter any additional details for the image...",
285
+ lines=2
286
+ )
287
  generate_btn = gr.Button("Generate Image")
288
  img_output = gr.Image(label="Generated Image")
289
  generate_btn.click(
290
  generate_image_and_reset_chat,
291
+ inputs=[difficulty_dropdown, age_input, level_dropdown, extra_details_input, active_session, saved_sessions],
292
  outputs=[img_output, active_session, saved_sessions]
293
  )
294