import os from PIL import Image import google.generativeai as genai import gradio as gr from gtts import gTTS from pydub import AudioSegment import tempfile # Configure Google API Key and model GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") genai.configure(api_key=GOOGLE_API_KEY) MODEL_ID = "gemini-1.5-pro-latest" model = genai.GenerativeModel(MODEL_ID) # System prompts analysis_system_prompt = "You are an expert in gender studies. Analyze the following content for any signs of gender-based discrimination and suggest actionable advice." podcast_prompt = """You are Eva, a solo podcast host focusing on gender equality topics. - Discuss real-life scenarios involving gender-based discrimination, provide insights, and offer solutions in a conversational, storytelling style. - Based on the analyzed text, create an engaging solo podcast as if reading stories from different victims who send you their story. - Introduce yourself as Eva. - Keep the conversation within 30000 characters, with a lot of emotion. - Use short sentences suitable for speech synthesis. - Maintain an empathetic tone. - Include filler words like 'äh' for a natural flow. - Avoid background music or extra words. """ # Model generation configuration generation_config = genai.GenerationConfig( temperature=0.9, top_p=1.0, top_k=32, candidate_count=1, max_output_tokens=8192, ) # Safety settings safety_settings = { genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, } # Analyze text def analyze_text(text): prompt = f"{analysis_system_prompt}\nContent:\n{text}" response = model.generate_content( [prompt], generation_config=generation_config, safety_settings=safety_settings, ) return response.text if response else "No response generated." # Analyze image def analyze_image(image: Image.Image) -> str: prompt = f"{analysis_system_prompt}\nAnalyze this image for any instances of gender-based discrimination." resized_image = preprocess_image(image) response = model.generate_content( [prompt, resized_image], generation_config=generation_config, safety_settings=safety_settings, ) return response.text if response else "No response generated." # Preprocess image by resizing def preprocess_image(image: Image.Image) -> str: image = image.resize((512, int(image.height * 512 / image.width))) return "a detailed analysis of the visual content, focusing on gender-based discrimination aspects" # Generate podcast script def generate_podcast_script(content): prompt = f"{podcast_prompt}\nAnalyzed content:\n{content}" response = model.generate_content([prompt], generation_config=generation_config) script = response.text if response else "Eva has no commentary at this time." return script # Convert script to audio using gTTS def text_to_speech(script): lines = [line.strip() for line in script.split(".") if line.strip()] # Split by sentences for manageable TTS segments audio_files = [] for line in lines: temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') try: tts = gTTS(text=line, lang='en', tld='com') # Using 'com' for American accent tts.save(temp_file.name) sound = AudioSegment.from_mp3(temp_file.name) sound += AudioSegment.silent(duration=500) # Add a 0.5-second pause after each sentence sound.export(temp_file.name, format="mp3") audio_files.append(temp_file.name) except Exception as e: print(f"Error generating audio for line '{line}': {e}") combined_audio = AudioSegment.empty() for file in audio_files: sound = AudioSegment.from_mp3(file) combined_audio += sound os.remove(file) # Clean up temporary files output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") combined_audio.export(output_file.name, format="mp3") return output_file.name # Generate and play podcast def generate_and_play_podcast(content, content_type='text'): script = generate_podcast_script(content) return text_to_speech(script) css_style = """ body, .gradio-container { background-color: #020308; /* Replace with your preferred color */ } #logo { display: flex; justify-content: center; font-size: 3em; font-weight: bold; letter-spacing: 3px; } .letter { opacity: 0; animation: fadeIn 0.1s forwards; } .letter.j { animation-delay: 0s; color: #4285F4; } /* Blue */ .letter.u { animation-delay: 0.1s; color: #3A9CF1; } .letter.s { animation-delay: 0.2s; color: #32B3EE; } .letter.t { animation-delay: 0.3s; color: #2BC9EA; } .letter.e { animation-delay: 0.4s; color: #23E0E7; } .letter.v { animation-delay: 0.5s; color: #1BF7E4; } .letter.a { animation-delay: 0.6s; color: #14F0B5; } /* Greenish */ @keyframes fadeIn { 0% { opacity: 0; transform: translateY(-20px); } 100% { opacity: 1; transform: translateY(0); } } """ # Gradio interface setup with gr.Blocks(css=css_style) as app: gr.HTML("""