import spaces import gradio as gr import requests from bs4 import BeautifulSoup from kokoro_tts import generate_audio import logging logging.basicConfig(level=logging.INFO) class Voices: flags = { "a": "🇺🇸", "b": "🇬🇧", "e": "🇪🇸", "f": "🇫🇷", "h": "🇮🇳", "i": "🇮🇹", "j": "🇯🇵", "p": "🇧🇷", "z": "🇨🇳", } flags_win = { "a": "american", "b": "british", "e": "spanish", "f": "french", "h": "hindi", "i": "italian", "j": "japanese", "p": "portuguese", "z": "chinese", } voices = { "a": [ "af_alloy", "af_aoede", "af_bella", "af_heart", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa", ], "b": [ "bf_alice", "bf_emma", "bf_isabella", "bf_lily", "bm_daniel", "bm_fable", "bm_george", "bm_lewis", ], "e": ["ef_dora", "em_alex", "em_santa"], "f": ["ff_siwis"], "h": ["hf_alpha", "hf_beta", "hm_omega", "hm_psi"], "i": ["if_sara", "im_nicola"], "j": ["jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo"], "p": ["pf_dora", "pm_alex", "pm_santa"], "z": [ "zf_xiaobei", "zf_xiaoni", "zf_xiaoxiao", "zf_xiaoyi", "zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang", ], } def extract_text_from_url(url): try: # Download the webpage content response = requests.get(url) response.raise_for_status() # Raise an exception for bad status codes # Parse the HTML content soup = BeautifulSoup(response.text, "html.parser") # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Get text and clean it up text = soup.get_text(separator="\n", strip=True) # Remove excessive newlines and whitespace lines = (line.strip() for line in text.splitlines()) text = "\n".join(line for line in lines if line) return text except Exception as e: return f"Error: {str(e)}" def get_language_choices(): return [ (f"{Voices.flags[code]} {Voices.flags_win[code].title()}", code) for code in Voices.voices.keys() ] def get_voice_choices(lang_code): if lang_code in Voices.voices: return Voices.voices[lang_code] return [] @spaces.GPU(duration=60) def text_to_audio(text, lang_code, voice, progress=gr.Progress()): try: audio_data = generate_audio(text, voice=voice, progress=progress) return (24000, audio_data) # Return tuple of (sample_rate, audio_data) except Exception as e: print(f"Error generating audio: {e}") return None # Create Gradio interface with gr.Blocks(title="Web Page Text Extractor & Audio Generator") as demo: gr.Markdown("# Web Page Text Extractor & Audio Generator") gr.Markdown( "Scrape a website and generate text using [hexgrad/Kokoro-82M](https://huggingface.co./hexgrad/Kokoro-82M)" ) with gr.Row(): url_input = gr.Textbox( label="Enter URL", value="https://paulgraham.com/words.html" ) extract_btn = gr.Button("Extract Text") text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=True) with gr.Row(): lang_dropdown = gr.Dropdown( choices=get_language_choices(), label="Language", value="a", # Default to English ) voice_dropdown = gr.Dropdown( choices=Voices.voices["a"], # Default to English voices label="Voice", value="am_onyx", # Default voice ) generate_btn = gr.Button("Generate Audio") audio_output = gr.Audio(label="Generated Audio") def update_voices(lang_code): return gr.Dropdown(choices=get_voice_choices(lang_code)) extract_btn.click(fn=extract_text_from_url, inputs=url_input, outputs=text_output) lang_dropdown.change(fn=update_voices, inputs=lang_dropdown, outputs=voice_dropdown) generate_btn.click( fn=text_to_audio, inputs=[text_output, lang_dropdown, voice_dropdown], outputs=audio_output, ) if __name__ == "__main__": demo.launch()