Spaces:
Running
Running
import os | |
from PIL import Image | |
import google.generativeai as genai | |
import gradio as gr | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import tempfile | |
# Configure Google API Key and model | |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") | |
genai.configure(api_key=GOOGLE_API_KEY) | |
MODEL_ID = "gemini-1.5-pro-latest" | |
model = genai.GenerativeModel(MODEL_ID) | |
# System prompts | |
analysis_system_prompt = "You are an expert in gender studies. Analyze the following content for any signs of gender-based discrimination and suggest actionable advice." | |
podcast_prompt = """You are Eva, a solo podcast host focusing on gender equality topics. | |
- Discuss real-life scenarios involving gender-based discrimination, provide insights, and offer solutions in a conversational, storytelling style. | |
- Based on the analyzed text, create an engaging solo podcast as if reading stories from different victims who send you their story. | |
- Introduce yourself as Eva. | |
- Keep the conversation within 30000 characters, with a lot of emotion. | |
- Use short sentences suitable for speech synthesis. | |
- Maintain an empathetic tone. | |
- Include filler words like 'äh' for a natural flow. | |
- Avoid background music or extra words. | |
""" | |
# Model generation configuration | |
generation_config = genai.GenerationConfig( | |
temperature=0.9, | |
top_p=1.0, | |
top_k=32, | |
candidate_count=1, | |
max_output_tokens=8192, | |
) | |
# Safety settings | |
safety_settings = { | |
genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, | |
} | |
# Analyze text | |
def analyze_text(text): | |
prompt = f"{analysis_system_prompt}\nContent:\n{text}" | |
response = model.generate_content( | |
[prompt], | |
generation_config=generation_config, | |
safety_settings=safety_settings, | |
) | |
return response.text if response else "No response generated." | |
# Analyze image | |
def analyze_image(image: Image.Image) -> str: | |
prompt = f"{analysis_system_prompt}\nAnalyze this image for any instances of gender-based discrimination." | |
resized_image = preprocess_image(image) | |
response = model.generate_content( | |
[prompt, resized_image], | |
generation_config=generation_config, | |
safety_settings=safety_settings, | |
) | |
return response.text if response else "No response generated." | |
# Preprocess image by resizing | |
def preprocess_image(image: Image.Image) -> str: | |
image = image.resize((512, int(image.height * 512 / image.width))) | |
return "a detailed analysis of the visual content, focusing on gender-based discrimination aspects" | |
# Generate podcast script | |
def generate_podcast_script(content): | |
prompt = f"{podcast_prompt}\nAnalyzed content:\n{content}" | |
response = model.generate_content([prompt], generation_config=generation_config) | |
script = response.text if response else "Eva has no commentary at this time." | |
return script | |
# Convert script to audio using gTTS | |
def text_to_speech(script): | |
lines = [line.strip() for line in script.split(".") if line.strip()] # Split by sentences for manageable TTS segments | |
audio_files = [] | |
for line in lines: | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') | |
try: | |
tts = gTTS(text=line, lang='en', tld='com') # Using 'com' for American accent | |
tts.save(temp_file.name) | |
sound = AudioSegment.from_mp3(temp_file.name) | |
sound += AudioSegment.silent(duration=500) # Add a 0.5-second pause after each sentence | |
sound.export(temp_file.name, format="mp3") | |
audio_files.append(temp_file.name) | |
except Exception as e: | |
print(f"Error generating audio for line '{line}': {e}") | |
combined_audio = AudioSegment.empty() | |
for file in audio_files: | |
sound = AudioSegment.from_mp3(file) | |
combined_audio += sound | |
os.remove(file) # Clean up temporary files | |
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
combined_audio.export(output_file.name, format="mp3") | |
return output_file.name | |
# Generate and play podcast | |
def generate_and_play_podcast(content, content_type='text'): | |
script = generate_podcast_script(content) | |
return text_to_speech(script) | |
css_style = """ | |
body, .gradio-container { | |
background-color: #020308; /* Replace with your preferred color */ | |
} | |
#logo { | |
display: flex; | |
justify-content: center; | |
font-size: 3em; | |
font-weight: bold; | |
letter-spacing: 3px; | |
} | |
.letter { | |
opacity: 0; | |
animation: fadeIn 0.1s forwards; | |
} | |
.letter.j { animation-delay: 0s; color: #4285F4; } /* Blue */ | |
.letter.u { animation-delay: 0.1s; color: #3A9CF1; } | |
.letter.s { animation-delay: 0.2s; color: #32B3EE; } | |
.letter.t { animation-delay: 0.3s; color: #2BC9EA; } | |
.letter.e { animation-delay: 0.4s; color: #23E0E7; } | |
.letter.v { animation-delay: 0.5s; color: #1BF7E4; } | |
.letter.a { animation-delay: 0.6s; color: #14F0B5; } /* Greenish */ | |
@keyframes fadeIn { | |
0% { opacity: 0; transform: translateY(-20px); } | |
100% { opacity: 1; transform: translateY(0); } | |
} | |
""" | |
# Gradio interface setup | |
with gr.Blocks(css=css_style) as app: | |
gr.HTML(""" | |
<div id="logo"> | |
<span class="letter j">J</span> | |
<span class="letter u">u</span> | |
<span class="letter s">s</span> | |
<span class="letter t">t</span> | |
<span class="letter e">E</span> | |
<span class="letter v">v</span> | |
<span class="letter a">a</span> | |
</div> | |
""") | |
gr.Markdown("<h1 style='text-align: center; color:#f0f0f0;'>Promotes Gender Equality in Every Conversation</h1>") | |
with gr.Tab("Text Analysis"): | |
text_input = gr.Textbox(label="Enter Text or Select an Example", placeholder="Type here or select an example...", lines=4) | |
text_output = gr.Textbox(label="Analysis Output", lines=6) | |
analyze_text_btn = gr.Button("Analyze Text") | |
listen_podcast_btn = gr.Button("Listen to Eva") | |
analyze_text_btn.click(analyze_text, inputs=text_input, outputs=text_output) | |
listen_podcast_btn.click(generate_and_play_podcast, inputs=text_output, outputs=gr.Audio()) | |
with gr.Tab("Image Analysis"): | |
image_input = gr.Image(label="Upload Image (e.g., screenshot, photos, etc.)", type="pil") | |
image_output = gr.Textbox(label="Analysis Output", lines=6) | |
analyze_image_btn = gr.Button("Analyze Image") | |
listen_podcast_image_btn = gr.Button("Listen to Eva") | |
analyze_image_btn.click(analyze_image, inputs=image_input, outputs=image_output) | |
listen_podcast_image_btn.click(generate_and_play_podcast, inputs=image_output, outputs=gr.Audio()) | |
app.launch() | |