Spaces:

lmah
/

PROYECTO1

Runtime error

App Files Files Community

lmah commited on Nov 16, 2024

Commit

fa0577d

1 Parent(s): 01b574c

Add application file and dependencies

Browse files

Files changed (2) hide show

app.py +86 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+import torch
+import soundfile as sf
+import gradio as gr
+from datasets import load_dataset
+from runware import Runware, IImageInference
+import asyncio
+from dotenv import load_dotenv
+import os
+# Cargar las variables de entorno desde el archivo .env
+load_dotenv()
+RUNWARE_API_KEY = os.getenv("RUNWARE_API_KEY")
+if not RUNWARE_API_KEY:
+    raise ValueError("API key no encontrada. Asegúrate de configurarla en la variable de entorno 'RUNWARE_API_KEY'.")
+# Cargar modelos de texto a voz
+processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+# Función para generar imagen desde texto usando la API de Runware
+async def generar_imagen_desde_texto(texto):
+    if not (3 <= len(texto) <= 2000):
+        return "Error: El texto debe tener entre 3 y 2000 caracteres."
+    runware = Runware(api_key=RUNWARE_API_KEY)
+    await runware.connect()
+    request_image = IImageInference(
+        positivePrompt=texto,
+        model="civitai:36520@76907",
+        numberResults=1,
+        negativePrompt="cloudy, rainy",
+        height=512,
+        width=512,
+    )
+    images = await runware.imageInference(requestImage=request_image)
+    if images:
+        return images[0].imageURL
+    else:
+        return "No se generó ninguna imagen."
+# Función de texto a voz
+def text_to_speech(text):
+    if not (3 <= len(text) <= 2000):
+        return "Error: El texto debe tener entre 3 y 2000 caracteres.", None
+    # Procesar el texto
+    inputs = processor(text=text, return_tensors="pt")
+    # Obtener el embedding de voz
+    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+    # Generar el discurso
+    with torch.no_grad():
+        speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    # Guardar el archivo de audio
+    audio_path = "speech.wav"
+    sf.write(audio_path, speech.numpy(), samplerate=16000)
+    # Generar la imagen usando la API de Runware
+    imagen_url = asyncio.run(generar_imagen_desde_texto(text))
+    # Imprimir la URL de la imagen generada
+    print(f"URL de la imagen generada: {imagen_url}")
+    return audio_path, imagen_url
+# Interfaz de Gradio
+iface = gr.Interface(
+    fn=text_to_speech,
+    inputs=gr.Textbox(label="Escribe tu texto aquí"),
+    outputs=[
+        gr.Audio(label="Escucha el audio generado"),
+        gr.Image(label="Imagen generada")
+    ],
+    title="Generación de texto a voz e imagen según texto",
+    live=True
+)
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+transformers
+torch
+soundfile
+gradio
+requests
+datasets
+runware
+python-dotenv