build_with_gemini / withaudapp.py
Rahatara's picture
Rename app.py to withaudapp.py
d9897e2 verified
import os
from PIL import Image
import google.generativeai as genai
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import tempfile
# Configure Google API Key and model
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)
MODEL_ID = "gemini-1.5-pro-latest"
model = genai.GenerativeModel(MODEL_ID)
# System prompts
analysis_system_prompt = "You are an expert in gender studies. Analyze the following content for any signs of gender-based discrimination and suggest actionable advice."
podcast_prompt = """You are Eva, a solo podcast host focusing on gender equality topics.
- Discuss real-life scenarios involving gender-based discrimination, provide insights, and offer solutions in a conversational, storytelling style.
- Based on the analyzed text, create an engaging solo podcast as if reading stories from different victims who send you their story.
- Introduce yourself as Eva.
- Keep the conversation within 30000 characters, with a lot of emotion.
- Use short sentences suitable for speech synthesis.
- Maintain an empathetic tone.
- Include filler words like 'äh' for a natural flow.
- Avoid background music or extra words.
"""
# Model generation configuration
generation_config = genai.GenerationConfig(
temperature=0.9,
top_p=1.0,
top_k=32,
candidate_count=1,
max_output_tokens=8192,
)
# Safety settings
safety_settings = {
genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
}
# Analyze text
def analyze_text(text):
prompt = f"{analysis_system_prompt}\nContent:\n{text}"
response = model.generate_content(
[prompt],
generation_config=generation_config,
safety_settings=safety_settings,
)
return response.text if response else "No response generated."
# Analyze image
def analyze_image(image: Image.Image) -> str:
prompt = f"{analysis_system_prompt}\nAnalyze this image for any instances of gender-based discrimination."
resized_image = preprocess_image(image)
response = model.generate_content(
[prompt, resized_image],
generation_config=generation_config,
safety_settings=safety_settings,
)
return response.text if response else "No response generated."
# Preprocess image by resizing
def preprocess_image(image: Image.Image) -> str:
image = image.resize((512, int(image.height * 512 / image.width)))
return "a detailed analysis of the visual content, focusing on gender-based discrimination aspects"
# Generate podcast script
def generate_podcast_script(content):
prompt = f"{podcast_prompt}\nAnalyzed content:\n{content}"
response = model.generate_content([prompt], generation_config=generation_config)
script = response.text if response else "Eva has no commentary at this time."
return script
# Convert script to audio using gTTS
def text_to_speech(script):
lines = [line.strip() for line in script.split(".") if line.strip()] # Split by sentences for manageable TTS segments
audio_files = []
for line in lines:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
try:
tts = gTTS(text=line, lang='en', tld='com') # Using 'com' for American accent
tts.save(temp_file.name)
sound = AudioSegment.from_mp3(temp_file.name)
sound += AudioSegment.silent(duration=500) # Add a 0.5-second pause after each sentence
sound.export(temp_file.name, format="mp3")
audio_files.append(temp_file.name)
except Exception as e:
print(f"Error generating audio for line '{line}': {e}")
combined_audio = AudioSegment.empty()
for file in audio_files:
sound = AudioSegment.from_mp3(file)
combined_audio += sound
os.remove(file) # Clean up temporary files
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
combined_audio.export(output_file.name, format="mp3")
return output_file.name
# Generate and play podcast
def generate_and_play_podcast(content, content_type='text'):
script = generate_podcast_script(content)
return text_to_speech(script)
css_style = """
body, .gradio-container {
background-color: #020308; /* Replace with your preferred color */
}
#logo {
display: flex;
justify-content: center;
font-size: 3em;
font-weight: bold;
letter-spacing: 3px;
}
.letter {
opacity: 0;
animation: fadeIn 0.1s forwards;
}
.letter.j { animation-delay: 0s; color: #4285F4; } /* Blue */
.letter.u { animation-delay: 0.1s; color: #3A9CF1; }
.letter.s { animation-delay: 0.2s; color: #32B3EE; }
.letter.t { animation-delay: 0.3s; color: #2BC9EA; }
.letter.e { animation-delay: 0.4s; color: #23E0E7; }
.letter.v { animation-delay: 0.5s; color: #1BF7E4; }
.letter.a { animation-delay: 0.6s; color: #14F0B5; } /* Greenish */
@keyframes fadeIn {
0% { opacity: 0; transform: translateY(-20px); }
100% { opacity: 1; transform: translateY(0); }
}
"""
# Gradio interface setup
with gr.Blocks(css=css_style) as app:
gr.HTML("""
<div id="logo">
<span class="letter j">J</span>
<span class="letter u">u</span>
<span class="letter s">s</span>
<span class="letter t">t</span>
<span class="letter e">E</span>
<span class="letter v">v</span>
<span class="letter a">a</span>
</div>
""")
gr.Markdown("<h1 style='text-align: center; color:#f0f0f0;'>Promotes Gender Equality in Every Conversation</h1>")
with gr.Tab("Text Analysis"):
text_input = gr.Textbox(label="Enter Text or Select an Example", placeholder="Type here or select an example...", lines=4)
text_output = gr.Textbox(label="Analysis Output", lines=6)
analyze_text_btn = gr.Button("Analyze Text")
listen_podcast_btn = gr.Button("Listen to Eva")
analyze_text_btn.click(analyze_text, inputs=text_input, outputs=text_output)
listen_podcast_btn.click(generate_and_play_podcast, inputs=text_output, outputs=gr.Audio())
with gr.Tab("Image Analysis"):
image_input = gr.Image(label="Upload Image (e.g., screenshot, photos, etc.)", type="pil")
image_output = gr.Textbox(label="Analysis Output", lines=6)
analyze_image_btn = gr.Button("Analyze Image")
listen_podcast_image_btn = gr.Button("Listen to Eva")
analyze_image_btn.click(analyze_image, inputs=image_input, outputs=image_output)
listen_podcast_image_btn.click(generate_and_play_podcast, inputs=image_output, outputs=gr.Audio())
app.launch()