Spaces:

Woziii
/

datasetTTS

Sleeping

App Files Files Community

Woziii commited on 30 days ago

Commit

075f0bd

verified ·

1 Parent(s): a8ecd5f

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -6

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
-# Version: Corrected After Test 3 (V2 - Improved with Scribe-based Timestamp Handling, Debugging Logs Added)
-# Description: Cette version intègre l'affichage des timestamps pour chaque mot,
-# permet une correction manuelle des erreurs, et ajoute une étape intermédiaire
-# avant la validation finale avec des logs détaillés pour le débogage.
 import os
 import shutil
@@ -14,11 +14,12 @@ from pydub import AudioSegment
 from transformers import pipeline
 # -------------------------------------------------
-# Configuration
 # -------------------------------------------------
 MODEL_NAME = "openai/whisper-large-v3"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
@@ -26,12 +27,16 @@ pipe = pipeline(
     model_kwargs={"low_cpu_mem_usage": True},
 )
 TEMP_DIR = "./temp_audio"
 os.makedirs(TEMP_DIR, exist_ok=True)
 def init_metadata_state():
     return []
 def transcribe_audio(audio_path):
     if not audio_path:
         print("[LOG] Aucun fichier audio fourni.")
@@ -54,6 +59,9 @@ def transcribe_audio(audio_path):
     print(f"[LOG DETAIL] Timestamps associés : {word_timestamps}")
     return raw_transcription, [], audio_path, word_timestamps, transcription_with_timestamps
 def preprocess_segments(table_data, word_timestamps):
     print("[LOG] Début du prétraitement des segments...")
     formatted_data = []
@@ -82,6 +90,9 @@ def preprocess_segments(table_data, word_timestamps):
     return formatted_data
 def validate_segments(audio_path, table_data, metadata_state, word_timestamps):
     print("[LOG] Début de la validation des segments...")
     if not audio_path or not word_timestamps:
@@ -125,7 +136,35 @@ def validate_segments(audio_path, table_data, metadata_state, word_timestamps):
     return segment_paths, updated_metadata
 # -------------------------------------------------
-# Interface Gradio
 # -------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# Application de Découpe Audio")

+# Version: Corrected After Test 3 (V2.2.0 - Structured, Commented, and ZIP Generation Restored)
+# Description: Cette version structure le script selon l'ordre des étapes du processus.
+# La génération du fichier ZIP a été réintégrée après avoir été omise dans la version précédente.
+# Chaque section est commentée pour assurer une meilleure lisibilité et une logique claire.
 import os
 import shutil
 from transformers import pipeline
 # -------------------------------------------------
+# 1. Configuration et Initialisation
 # -------------------------------------------------
 MODEL_NAME = "openai/whisper-large-v3"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Initialisation du modèle Whisper
 pipe = pipeline(
     task="automatic-speech-recognition",
     model=MODEL_NAME,
     model_kwargs={"low_cpu_mem_usage": True},
 )
+# Création du répertoire temporaire pour stocker les extraits audio
 TEMP_DIR = "./temp_audio"
 os.makedirs(TEMP_DIR, exist_ok=True)
 def init_metadata_state():
     return []
+# -------------------------------------------------
+# 2. Transcription de l'audio avec Whisper
+# -------------------------------------------------
 def transcribe_audio(audio_path):
     if not audio_path:
         print("[LOG] Aucun fichier audio fourni.")
     print(f"[LOG DETAIL] Timestamps associés : {word_timestamps}")
     return raw_transcription, [], audio_path, word_timestamps, transcription_with_timestamps
+# -------------------------------------------------
+# 3. Prétraitement des segments : Associer les timestamps aux phrases sélectionnées
+# -------------------------------------------------
 def preprocess_segments(table_data, word_timestamps):
     print("[LOG] Début du prétraitement des segments...")
     formatted_data = []
     return formatted_data
+# -------------------------------------------------
+# 4. Validation et découpage des extraits audio
+# -------------------------------------------------
 def validate_segments(audio_path, table_data, metadata_state, word_timestamps):
     print("[LOG] Début de la validation des segments...")
     if not audio_path or not word_timestamps:
     return segment_paths, updated_metadata
 # -------------------------------------------------
+# 5. Génération du fichier ZIP
+# -------------------------------------------------
+def generate_zip(metadata_state):
+    if not metadata_state:
+        print("[LOG ERROR] Aucun segment valide trouvé pour la génération du ZIP.")
+        return None
+    zip_path = os.path.join(TEMP_DIR, "dataset.zip")
+    if os.path.exists(zip_path):
+        os.remove(zip_path)
+    metadata_csv_path = os.path.join(TEMP_DIR, "metadata.csv")
+    with open(metadata_csv_path, "w", encoding="utf-8") as f:
+        f.write("audio_file|text|speaker_name|API\n")
+        for seg in metadata_state:
+            f.write(f"{seg['audio_file']}|{seg['text']}|projectname|/API_PHONETIC/\n")
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+        zf.write(metadata_csv_path, "metadata.csv")
+        for seg in metadata_state:
+            file_path = os.path.join(TEMP_DIR, seg["audio_file"])
+            if os.path.exists(file_path):
+                zf.write(file_path, seg["audio_file"])
+    print("[LOG] Fichier ZIP généré avec succès.")
+    return zip_path
+# -------------------------------------------------
+# 6. Interface utilisateur Gradio
 # -------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# Application de Découpe Audio")