Spaces:
Running
Running
File size: 7,061 Bytes
13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd f7ed127 41c5739 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda 13240dd fcd8eda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import os
from PIL import Image
import google.generativeai as genai
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import tempfile
# Configure Google API Key and model
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)
MODEL_ID = "gemini-1.5-pro-latest"
model = genai.GenerativeModel(MODEL_ID)
# System prompts
analysis_system_prompt = "You are an expert in gender studies. Analyze the following content for any signs of gender-based discrimination and suggest actionable advice."
podcast_prompt = """You are Eva, a solo podcast host focusing on gender equality topics.
- Discuss real-life scenarios involving gender-based discrimination, provide insights, and offer solutions in a conversational, storytelling style.
- Based on the analyzed text, create an engaging solo podcast as if reading stories from different victims who send you their story.
- Introduce yourself as Eva.
- Keep the conversation within 30000 characters, with a lot of emotion.
- Use short sentences suitable for speech synthesis.
- Maintain an empathetic tone.
- Include filler words like 'äh' for a natural flow.
- Avoid background music or extra words.
"""
# Model generation configuration
generation_config = genai.GenerationConfig(
temperature=0.9,
top_p=1.0,
top_k=32,
candidate_count=1,
max_output_tokens=8192,
)
# Safety settings
safety_settings = {
genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
}
# Analyze text
def analyze_text(text):
prompt = f"{analysis_system_prompt}\nContent:\n{text}"
response = model.generate_content(
[prompt],
generation_config=generation_config,
safety_settings=safety_settings,
)
return response.text if response else "No response generated."
# Analyze image
def analyze_image(image: Image.Image) -> str:
prompt = f"{analysis_system_prompt}\nAnalyze this image for any instances of gender-based discrimination."
resized_image = preprocess_image(image)
response = model.generate_content(
[prompt, resized_image],
generation_config=generation_config,
safety_settings=safety_settings,
)
return response.text if response else "No response generated."
# Preprocess image by resizing
def preprocess_image(image: Image.Image) -> str:
image = image.resize((512, int(image.height * 512 / image.width)))
return "a detailed analysis of the visual content, focusing on gender-based discrimination aspects"
# Generate podcast script
def generate_podcast_script(content):
prompt = f"{podcast_prompt}\nAnalyzed content:\n{content}"
response = model.generate_content([prompt], generation_config=generation_config)
script = response.text if response else "Eva has no commentary at this time."
return script
# Convert script to audio using gTTS
def text_to_speech(script):
lines = [line.strip() for line in script.split(".") if line.strip()] # Split by sentences for manageable TTS segments
audio_files = []
for line in lines:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
try:
tts = gTTS(text=line, lang='en', tld='com') # Using 'com' for American accent
tts.save(temp_file.name)
sound = AudioSegment.from_mp3(temp_file.name)
sound += AudioSegment.silent(duration=500) # Add a 0.5-second pause after each sentence
sound.export(temp_file.name, format="mp3")
audio_files.append(temp_file.name)
except Exception as e:
print(f"Error generating audio for line '{line}': {e}")
combined_audio = AudioSegment.empty()
for file in audio_files:
sound = AudioSegment.from_mp3(file)
combined_audio += sound
os.remove(file) # Clean up temporary files
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
combined_audio.export(output_file.name, format="mp3")
return output_file.name
# Generate and play podcast
def generate_and_play_podcast(content, content_type='text'):
script = generate_podcast_script(content)
return text_to_speech(script)
css_style = """
body, .gradio-container {
background-color: #020308; /* Replace with your preferred color */
}
#logo {
display: flex;
justify-content: center;
font-size: 3em;
font-weight: bold;
letter-spacing: 3px;
}
.letter {
opacity: 0;
animation: fadeIn 0.1s forwards;
}
.letter.j { animation-delay: 0s; color: #4285F4; } /* Blue */
.letter.u { animation-delay: 0.1s; color: #3A9CF1; }
.letter.s { animation-delay: 0.2s; color: #32B3EE; }
.letter.t { animation-delay: 0.3s; color: #2BC9EA; }
.letter.e { animation-delay: 0.4s; color: #23E0E7; }
.letter.v { animation-delay: 0.5s; color: #1BF7E4; }
.letter.a { animation-delay: 0.6s; color: #14F0B5; } /* Greenish */
@keyframes fadeIn {
0% { opacity: 0; transform: translateY(-20px); }
100% { opacity: 1; transform: translateY(0); }
}
"""
# Gradio interface setup
with gr.Blocks(css=css_style) as app:
gr.HTML("""
<div id="logo">
<span class="letter j">J</span>
<span class="letter u">u</span>
<span class="letter s">s</span>
<span class="letter t">t</span>
<span class="letter e">E</span>
<span class="letter v">v</span>
<span class="letter a">a</span>
</div>
""")
gr.Markdown("<h1 style='text-align: center; color:#f0f0f0;'>Promotes Gender Equality in Every Conversation</h1>")
with gr.Tab("Text Analysis"):
text_input = gr.Textbox(label="Enter Text or Select an Example", placeholder="Type here or select an example...", lines=4)
text_output = gr.Textbox(label="Analysis Output", lines=6)
analyze_text_btn = gr.Button("Analyze Text")
listen_podcast_btn = gr.Button("Listen to Eva")
analyze_text_btn.click(analyze_text, inputs=text_input, outputs=text_output)
listen_podcast_btn.click(generate_and_play_podcast, inputs=text_output, outputs=gr.Audio())
with gr.Tab("Image Analysis"):
image_input = gr.Image(label="Upload Image (e.g., screenshot, photos, etc.)", type="pil")
image_output = gr.Textbox(label="Analysis Output", lines=6)
analyze_image_btn = gr.Button("Analyze Image")
listen_podcast_image_btn = gr.Button("Listen to Eva")
analyze_image_btn.click(analyze_image, inputs=image_input, outputs=image_output)
listen_podcast_image_btn.click(generate_and_play_podcast, inputs=image_output, outputs=gr.Audio())
app.launch()
|