Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
-
import spaces
|
4 |
import shutil
|
5 |
import zipfile
|
6 |
import torch
|
@@ -32,130 +31,78 @@ os.makedirs(TEMP_DIR, exist_ok=True)
|
|
32 |
def init_metadata_state():
|
33 |
return []
|
34 |
|
35 |
-
#
|
36 |
-
# 2. Transcription de l'audio avec Whisper (Timestamps de fin + Marge de Sécurité)
|
37 |
-
# -------------------------------------------------
|
38 |
def correct_typography(text):
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
|
|
|
|
43 |
def transcribe_audio(audio_path):
|
44 |
if not audio_path:
|
45 |
-
print("[LOG] Aucun fichier audio fourni.")
|
46 |
return "Aucun fichier audio fourni", None, [], ""
|
47 |
|
48 |
-
print(f"[LOG] Début de la transcription de {audio_path}...")
|
49 |
result = pipe(audio_path, return_timestamps="word")
|
50 |
words = result.get("chunks", [])
|
51 |
|
52 |
if not words:
|
53 |
-
print("[LOG ERROR] Erreur : Aucun timestamp détecté.")
|
54 |
return "Erreur : Aucun timestamp détecté.", None, [], ""
|
55 |
|
56 |
raw_transcription = " ".join([w["text"] for w in words])
|
57 |
-
|
58 |
-
# 🔄 Correction typographique AVANT affichage
|
59 |
-
raw_transcription = correct_typography(raw_transcription)
|
60 |
-
|
61 |
-
# 🔄 Ajout des timestamps de fin avec marge de sécurité
|
62 |
-
MARGIN = 0.06 # 60ms
|
63 |
-
word_timestamps = []
|
64 |
-
|
65 |
-
for i, w in enumerate(words):
|
66 |
-
start_time = w["timestamp"][0]
|
67 |
-
end_time = w["timestamp"][1] if w["timestamp"][1] is not None else start_time + 0.5
|
68 |
-
|
69 |
-
# Vérifier qu'on ne dépasse pas le début du mot suivant
|
70 |
-
if i < len(words) - 1:
|
71 |
-
next_start_time = words[i + 1]["timestamp"][0]
|
72 |
-
end_time = min(end_time + MARGIN, next_start_time - 0.01) # On laisse 10ms de sécurité
|
73 |
-
|
74 |
-
word_timestamps.append((w["text"], start_time, end_time))
|
75 |
-
|
76 |
transcription_with_timestamps = " ".join([f"{w[0]}[{w[1]:.2f}-{w[2]:.2f}]" for w in word_timestamps])
|
77 |
-
|
78 |
-
print(f"[LOG] Transcription brute corrigée : {raw_transcription}")
|
79 |
return raw_transcription, word_timestamps, transcription_with_timestamps, audio_path
|
80 |
|
81 |
# -------------------------------------------------
|
82 |
-
# 3. Enregistrement des segments définis par l'utilisateur
|
83 |
# -------------------------------------------------
|
84 |
def save_segments(table_data):
|
85 |
-
print("[LOG] Enregistrement des segments définis par l'utilisateur...")
|
86 |
formatted_data = []
|
87 |
-
confirmation_message = "**📌 Segments enregistrés :**\n"
|
88 |
-
|
89 |
for i, row in table_data.iterrows():
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
try:
|
94 |
-
start_time = str(start_time).replace(",", ".")
|
95 |
-
end_time = str(end_time).replace(",", ".")
|
96 |
-
|
97 |
-
if not start_time.replace(".", "").isdigit() or not end_time.replace(".", "").isdigit():
|
98 |
-
raise ValueError("Valeurs de timestamps invalides")
|
99 |
-
|
100 |
-
start_time = float(start_time)
|
101 |
-
end_time = float(end_time)
|
102 |
-
|
103 |
-
if start_time < 0 or end_time <= start_time:
|
104 |
-
raise ValueError("Valeurs incohérentes")
|
105 |
-
|
106 |
-
formatted_data.append([text, start_time, end_time, segment_id])
|
107 |
-
log_message = f"- `{segment_id}` | **Texte** : {text} | ⏱ **{start_time:.2f}s - {end_time:.2f}s**"
|
108 |
-
confirmation_message += log_message + "\n"
|
109 |
-
print(f"[LOG] {log_message}")
|
110 |
-
|
111 |
-
except ValueError as e:
|
112 |
-
print(f"[LOG ERROR] Erreur de conversion des timestamps : {e}")
|
113 |
-
return pd.DataFrame(), "❌ **Erreur** : Vérifiez que les valeurs sont bien des nombres valides."
|
114 |
-
|
115 |
-
return pd.DataFrame(formatted_data, columns=["Texte", "Début (s)", "Fin (s)", "ID"]), confirmation_message
|
116 |
|
117 |
# -------------------------------------------------
|
118 |
-
# 4. Génération du fichier ZIP
|
119 |
# -------------------------------------------------
|
120 |
-
def generate_zip(metadata_state, audio_path):
|
121 |
-
if isinstance(metadata_state, tuple):
|
122 |
-
metadata_state = metadata_state[0] # Extraire le DataFrame si c'est un tuple
|
123 |
-
|
124 |
if metadata_state is None or metadata_state.empty:
|
125 |
-
print("[LOG ERROR] Aucun segment valide trouvé pour la génération du ZIP.")
|
126 |
return None
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
os.remove(zip_path)
|
131 |
|
|
|
|
|
132 |
metadata_csv_path = os.path.join(TEMP_DIR, "metadata.csv")
|
133 |
-
|
134 |
-
|
135 |
-
metadata_state["
|
136 |
-
|
137 |
-
# Ajouter une colonne "Commentaires" vide
|
138 |
metadata_state["Commentaires"] = ""
|
139 |
-
|
140 |
-
# Réorganiser l’ordre des colonnes
|
141 |
-
metadata_state = metadata_state[["ID", "Texte", "Début (s)", "Fin (s)", "Commentaires"]]
|
142 |
-
|
143 |
-
# Sauvegarde du fichier CSV
|
144 |
metadata_state.to_csv(metadata_csv_path, sep="|", index=False)
|
145 |
-
|
146 |
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
147 |
zf.write(metadata_csv_path, "metadata.csv")
|
148 |
original_audio = AudioSegment.from_file(audio_path)
|
149 |
|
150 |
-
for
|
151 |
start_ms, end_ms = int(row["Début (s)"] * 1000), int(row["Fin (s)"] * 1000)
|
152 |
segment_audio = original_audio[start_ms:end_ms]
|
153 |
-
segment_filename = f"{
|
154 |
segment_path = os.path.join(TEMP_DIR, segment_filename)
|
155 |
segment_audio.export(segment_path, format="wav")
|
156 |
zf.write(segment_path, segment_filename)
|
157 |
|
158 |
-
print("[LOG] Fichier ZIP généré avec succès.")
|
159 |
return zip_path
|
160 |
|
161 |
# -------------------------------------------------
|
@@ -166,17 +113,17 @@ with gr.Blocks() as demo:
|
|
166 |
metadata_state = gr.State(init_metadata_state())
|
167 |
|
168 |
audio_input = gr.Audio(type="filepath", label="Fichier audio")
|
|
|
169 |
raw_transcription = gr.Textbox(label="Transcription", interactive=True)
|
170 |
transcription_timestamps = gr.Textbox(label="Transcription avec Timestamps", interactive=True)
|
171 |
-
table = gr.Dataframe(headers=["Texte", "Début (s)", "Fin (s)"], datatype=["str", "str", "str"], row_count=(
|
172 |
save_button = gr.Button("Enregistrer les segments")
|
173 |
-
save_message = gr.Markdown(label="📢 **Message de confirmation**")
|
174 |
generate_button = gr.Button("Générer ZIP")
|
175 |
zip_file = gr.File(label="Télécharger le ZIP")
|
176 |
word_timestamps = gr.State()
|
177 |
|
178 |
audio_input.change(transcribe_audio, inputs=audio_input, outputs=[raw_transcription, word_timestamps, transcription_timestamps, audio_input])
|
179 |
-
save_button.click(save_segments, inputs=table, outputs=
|
180 |
-
generate_button.click(generate_zip, inputs=[metadata_state, audio_input], outputs=zip_file)
|
181 |
|
182 |
-
demo.queue().launch()
|
|
|
1 |
import os
|
2 |
import re
|
|
|
3 |
import shutil
|
4 |
import zipfile
|
5 |
import torch
|
|
|
31 |
def init_metadata_state():
|
32 |
return []
|
33 |
|
34 |
+
# Fonction de correction typographique complète avec détection préalable
|
|
|
|
|
35 |
def correct_typography(text):
|
36 |
+
corrected_text = text
|
37 |
+
corrected_text = re.sub(r"\s+", " ", corrected_text) # Suppression des doubles espaces
|
38 |
+
corrected_text = re.sub(r"\b([lLdDmMcCjJnNsStT]) ['’] (\w)", r"\1'\2", corrected_text) # Correction des apostrophes
|
39 |
+
corrected_text = re.sub(r"(?<=\w) ([?!:;])", r"\1", corrected_text) # Suppression de l'espace avant !, ?, : et ;
|
40 |
+
corrected_text = re.sub(r"([?!:;])(?=\w)", r"\1 ", corrected_text) # Ajout d'un espace après !, ?, : et ; si nécessaire
|
41 |
+
corrected_text = re.sub(r"(?<!\d) (\.)", r"\1", corrected_text) # Suppression de l'espace avant un point
|
42 |
+
corrected_text = re.sub(r"(\.) (?=\w)", r". \2", corrected_text) # Ajout d'un espace après un point si nécessaire
|
43 |
+
|
44 |
+
# Appliquer la correction uniquement si le texte a changé
|
45 |
+
return corrected_text.strip() if corrected_text != text else text
|
46 |
|
47 |
+
# -------------------------------------------------
|
48 |
+
# 2. Transcription de l'audio avec Whisper
|
49 |
+
# -------------------------------------------------
|
50 |
def transcribe_audio(audio_path):
|
51 |
if not audio_path:
|
|
|
52 |
return "Aucun fichier audio fourni", None, [], ""
|
53 |
|
|
|
54 |
result = pipe(audio_path, return_timestamps="word")
|
55 |
words = result.get("chunks", [])
|
56 |
|
57 |
if not words:
|
|
|
58 |
return "Erreur : Aucun timestamp détecté.", None, [], ""
|
59 |
|
60 |
raw_transcription = " ".join([w["text"] for w in words])
|
61 |
+
word_timestamps = [(w["text"], w["timestamp"][0], w["timestamp"][1]) for w in words]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
transcription_with_timestamps = " ".join([f"{w[0]}[{w[1]:.2f}-{w[2]:.2f}]" for w in word_timestamps])
|
63 |
+
|
|
|
64 |
return raw_transcription, word_timestamps, transcription_with_timestamps, audio_path
|
65 |
|
66 |
# -------------------------------------------------
|
67 |
+
# 3. Enregistrement des segments définis par l'utilisateur
|
68 |
# -------------------------------------------------
|
69 |
def save_segments(table_data):
|
|
|
70 |
formatted_data = []
|
|
|
|
|
71 |
for i, row in table_data.iterrows():
|
72 |
+
formatted_data.append([row["Texte"], float(row["Début (s)"]), float(row["Fin (s)"])] )
|
73 |
+
return pd.DataFrame(formatted_data, columns=["Texte", "Début (s)", "Fin (s)"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
# -------------------------------------------------
|
76 |
+
# 4. Génération du fichier ZIP avec correction typographique
|
77 |
# -------------------------------------------------
|
78 |
+
def generate_zip(metadata_state, audio_path, zip_name):
|
|
|
|
|
|
|
79 |
if metadata_state is None or metadata_state.empty:
|
|
|
80 |
return None
|
81 |
|
82 |
+
if not zip_name.strip():
|
83 |
+
zip_name = "processed_audio"
|
|
|
84 |
|
85 |
+
zip_folder_name = f"{zip_name}_dataset"
|
86 |
+
zip_path = os.path.join(TEMP_DIR, f"{zip_folder_name}.zip")
|
87 |
metadata_csv_path = os.path.join(TEMP_DIR, "metadata.csv")
|
88 |
+
|
89 |
+
metadata_state["ID"] = [f"{zip_name}_seg_{i+1:02d}" for i in range(len(metadata_state))]
|
90 |
+
metadata_state["Texte"] = metadata_state["Texte"].apply(correct_typography)
|
|
|
|
|
91 |
metadata_state["Commentaires"] = ""
|
|
|
|
|
|
|
|
|
|
|
92 |
metadata_state.to_csv(metadata_csv_path, sep="|", index=False)
|
93 |
+
|
94 |
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
95 |
zf.write(metadata_csv_path, "metadata.csv")
|
96 |
original_audio = AudioSegment.from_file(audio_path)
|
97 |
|
98 |
+
for i, row in metadata_state.iterrows():
|
99 |
start_ms, end_ms = int(row["Début (s)"] * 1000), int(row["Fin (s)"] * 1000)
|
100 |
segment_audio = original_audio[start_ms:end_ms]
|
101 |
+
segment_filename = f"{zip_name}_seg_{i+1:02d}.wav"
|
102 |
segment_path = os.path.join(TEMP_DIR, segment_filename)
|
103 |
segment_audio.export(segment_path, format="wav")
|
104 |
zf.write(segment_path, segment_filename)
|
105 |
|
|
|
106 |
return zip_path
|
107 |
|
108 |
# -------------------------------------------------
|
|
|
113 |
metadata_state = gr.State(init_metadata_state())
|
114 |
|
115 |
audio_input = gr.Audio(type="filepath", label="Fichier audio")
|
116 |
+
zip_name = gr.Textbox(label="Nom du fichier ZIP", placeholder="Nom personnalisé")
|
117 |
raw_transcription = gr.Textbox(label="Transcription", interactive=True)
|
118 |
transcription_timestamps = gr.Textbox(label="Transcription avec Timestamps", interactive=True)
|
119 |
+
table = gr.Dataframe(headers=["Texte", "Début (s)", "Fin (s)"], datatype=["str", "str", "str"], row_count=(5, "dynamic"))
|
120 |
save_button = gr.Button("Enregistrer les segments")
|
|
|
121 |
generate_button = gr.Button("Générer ZIP")
|
122 |
zip_file = gr.File(label="Télécharger le ZIP")
|
123 |
word_timestamps = gr.State()
|
124 |
|
125 |
audio_input.change(transcribe_audio, inputs=audio_input, outputs=[raw_transcription, word_timestamps, transcription_timestamps, audio_input])
|
126 |
+
save_button.click(save_segments, inputs=table, outputs=metadata_state)
|
127 |
+
generate_button.click(generate_zip, inputs=[metadata_state, audio_input, zip_name], outputs=zip_file)
|
128 |
|
129 |
+
demo.queue().launch()
|