Spaces:

Rahatara
/

build_with_gemini

Sleeping

App Files Files Community

build_with_gemini / withaudapp.py

Rahatara

Rename app.py to withaudapp.py

d9897e2 verified 3 months ago

raw

history blame contribute delete

7.06 kB

	import os
	from PIL import Image
	import google.generativeai as genai
	import gradio as gr
	from gtts import gTTS
	from pydub import AudioSegment
	import tempfile

	# Configure Google API Key and model
	GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
	genai.configure(api_key=GOOGLE_API_KEY)
	MODEL_ID = "gemini-1.5-pro-latest"
	model = genai.GenerativeModel(MODEL_ID)

	# System prompts
	analysis_system_prompt = "You are an expert in gender studies. Analyze the following content for any signs of gender-based discrimination and suggest actionable advice."
	podcast_prompt = """You are Eva, a solo podcast host focusing on gender equality topics.
	- Discuss real-life scenarios involving gender-based discrimination, provide insights, and offer solutions in a conversational, storytelling style.
	- Based on the analyzed text, create an engaging solo podcast as if reading stories from different victims who send you their story.
	- Introduce yourself as Eva.
	- Keep the conversation within 30000 characters, with a lot of emotion.
	- Use short sentences suitable for speech synthesis.
	- Maintain an empathetic tone.
	- Include filler words like 'äh' for a natural flow.
	- Avoid background music or extra words.
	"""

	# Model generation configuration
	generation_config = genai.GenerationConfig(
	temperature=0.9,
	top_p=1.0,
	top_k=32,
	candidate_count=1,
	max_output_tokens=8192,
	)

	# Safety settings
	safety_settings = {
	genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
	genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
	genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
	genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
	}

	# Analyze text
	def analyze_text(text):
	prompt = f"{analysis_system_prompt}\nContent:\n{text}"
	response = model.generate_content(
	[prompt],
	generation_config=generation_config,
	safety_settings=safety_settings,
	)
	return response.text if response else "No response generated."

	# Analyze image
	def analyze_image(image: Image.Image) -> str:
	prompt = f"{analysis_system_prompt}\nAnalyze this image for any instances of gender-based discrimination."
	resized_image = preprocess_image(image)
	response = model.generate_content(
	[prompt, resized_image],
	generation_config=generation_config,
	safety_settings=safety_settings,
	)
	return response.text if response else "No response generated."

	# Preprocess image by resizing
	def preprocess_image(image: Image.Image) -> str:
	image = image.resize((512, int(image.height * 512 / image.width)))
	return "a detailed analysis of the visual content, focusing on gender-based discrimination aspects"

	# Generate podcast script
	def generate_podcast_script(content):
	prompt = f"{podcast_prompt}\nAnalyzed content:\n{content}"
	response = model.generate_content([prompt], generation_config=generation_config)
	script = response.text if response else "Eva has no commentary at this time."
	return script

	# Convert script to audio using gTTS
	def text_to_speech(script):
	lines = [line.strip() for line in script.split(".") if line.strip()] # Split by sentences for manageable TTS segments
	audio_files = []

	for line in lines:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
	try:
	tts = gTTS(text=line, lang='en', tld='com') # Using 'com' for American accent
	tts.save(temp_file.name)
	sound = AudioSegment.from_mp3(temp_file.name)
	sound += AudioSegment.silent(duration=500) # Add a 0.5-second pause after each sentence
	sound.export(temp_file.name, format="mp3")
	audio_files.append(temp_file.name)
	except Exception as e:
	print(f"Error generating audio for line '{line}': {e}")

	combined_audio = AudioSegment.empty()
	for file in audio_files:
	sound = AudioSegment.from_mp3(file)
	combined_audio += sound
	os.remove(file) # Clean up temporary files

	output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	combined_audio.export(output_file.name, format="mp3")
	return output_file.name

	# Generate and play podcast
	def generate_and_play_podcast(content, content_type='text'):
	script = generate_podcast_script(content)
	return text_to_speech(script)

	css_style = """

	body, .gradio-container {
	background-color: #020308; /* Replace with your preferred color */
	}

	#logo {
	display: flex;
	justify-content: center;
	font-size: 3em;
	font-weight: bold;
	letter-spacing: 3px;
	}
	.letter {
	opacity: 0;
	animation: fadeIn 0.1s forwards;
	}
	.letter.j { animation-delay: 0s; color: #4285F4; } /* Blue */
	.letter.u { animation-delay: 0.1s; color: #3A9CF1; }
	.letter.s { animation-delay: 0.2s; color: #32B3EE; }
	.letter.t { animation-delay: 0.3s; color: #2BC9EA; }
	.letter.e { animation-delay: 0.4s; color: #23E0E7; }
	.letter.v { animation-delay: 0.5s; color: #1BF7E4; }
	.letter.a { animation-delay: 0.6s; color: #14F0B5; } /* Greenish */

	@keyframes fadeIn {
	0% { opacity: 0; transform: translateY(-20px); }
	100% { opacity: 1; transform: translateY(0); }
	}
	"""

	# Gradio interface setup
	with gr.Blocks(css=css_style) as app:
	gr.HTML("""
	<div id="logo">
	<span class="letter j">J</span>
	<span class="letter u">u</span>
	<span class="letter s">s</span>
	<span class="letter t">t</span>
	<span class="letter e">E</span>
	<span class="letter v">v</span>
	<span class="letter a">a</span>
	</div>
	""")
	gr.Markdown("<h1 style='text-align: center; color:#f0f0f0;'>Promotes Gender Equality in Every Conversation</h1>")

	with gr.Tab("Text Analysis"):
	text_input = gr.Textbox(label="Enter Text or Select an Example", placeholder="Type here or select an example...", lines=4)
	text_output = gr.Textbox(label="Analysis Output", lines=6)
	analyze_text_btn = gr.Button("Analyze Text")
	listen_podcast_btn = gr.Button("Listen to Eva")

	analyze_text_btn.click(analyze_text, inputs=text_input, outputs=text_output)
	listen_podcast_btn.click(generate_and_play_podcast, inputs=text_output, outputs=gr.Audio())

	with gr.Tab("Image Analysis"):
	image_input = gr.Image(label="Upload Image (e.g., screenshot, photos, etc.)", type="pil")
	image_output = gr.Textbox(label="Analysis Output", lines=6)
	analyze_image_btn = gr.Button("Analyze Image")
	listen_podcast_image_btn = gr.Button("Listen to Eva")

	analyze_image_btn.click(analyze_image, inputs=image_input, outputs=image_output)
	listen_podcast_image_btn.click(generate_and_play_podcast, inputs=image_output, outputs=gr.Audio())

	app.launch()