Woziii commited on
Commit
eb72b93
·
verified ·
1 Parent(s): dde191c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -129
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import shutil
3
  import zipfile
 
4
  from pathlib import Path
5
 
6
  import gradio as gr
@@ -10,7 +11,6 @@ from transformers import pipeline
10
  # -------------------------------------------------
11
  # Configuration
12
  # -------------------------------------------------
13
-
14
  MODEL_NAME = "openai/whisper-large-v3"
15
  device = 0 if torch.cuda.is_available() else "cpu"
16
 
@@ -24,80 +24,49 @@ pipe = pipeline(
24
  TEMP_DIR = "./temp_audio"
25
  os.makedirs(TEMP_DIR, exist_ok=True)
26
 
27
- # -------------------------------------------------
28
- # Gestion de l'état
29
- # -------------------------------------------------
30
  def init_metadata_state():
31
- """
32
- Initialise l'état pour stocker les informations des segments validés.
33
- """
34
  return []
35
 
36
- # -------------------------------------------------
37
- # Étape 2 : Transcription avec Whisper
38
- # -------------------------------------------------
39
  def transcribe_audio(audio_path):
40
- """
41
- Retourne la transcription brute, un tableau de segments vides et le chemin de l'audio.
42
- """
43
  if not audio_path:
44
- return "Aucun fichier audio fourni", [["", None, None, ""] for _ in range(20)], None
45
-
46
- # Transcrire
47
- result = pipe(audio_path, return_timestamps="word")
48
- text = result["text"]
49
-
50
- # Transcription brute
51
- raw_transcription = " ".join([chunk["text"] for chunk in result["chunks"]])
52
-
53
- # Tableau de 20 lignes vides pour l'édition
54
- table_init = [["", None, None, ""] for _ in range(20)]
55
-
56
- return raw_transcription, table_init, audio_path
57
-
58
- # -------------------------------------------------
59
- # Étape 5 : Validation + découpe
60
- # -------------------------------------------------
61
- def validate_segments(audio_path, table_data, metadata_state):
62
- """
63
- Découpe l'audio en fonction des segments validés et met à jour l'état.
64
- """
65
- if not audio_path:
66
- return [None] * 20, metadata_state
67
-
68
- # Nettoyer le dossier temporaire
69
  if os.path.exists(TEMP_DIR):
70
  shutil.rmtree(TEMP_DIR)
71
  os.makedirs(TEMP_DIR, exist_ok=True)
72
 
73
  original_audio = AudioSegment.from_file(audio_path)
74
-
75
  segment_paths = []
76
  updated_metadata = []
77
 
78
  for i, row in enumerate(table_data):
79
- if len(row) < 4:
80
- continue # Ligne incomplète
81
  text, start_time, end_time, segment_id = row
82
-
83
  if not text or start_time is None or end_time is None:
84
- continue # Ligne vide ou incomplète
85
- if not segment_id:
86
- segment_id = f"seg_{i+1:02d}"
87
-
88
- start_ms = int(float(start_time) * 1000)
89
- end_ms = int(float(end_time) * 1000)
90
  if start_ms < 0 or end_ms <= start_ms:
91
  continue
92
-
93
  segment_filename = f"{Path(audio_path).stem}_{segment_id}.wav"
94
  segment_path = os.path.join(TEMP_DIR, segment_filename)
95
-
96
- # Découpe et export
97
  extract = original_audio[start_ms:end_ms]
98
  extract.export(segment_path, format="wav")
99
-
100
- # Stocker les informations
101
  segment_paths.append(segment_path)
102
  updated_metadata.append({
103
  "audio_file": segment_filename,
@@ -106,107 +75,51 @@ def validate_segments(audio_path, table_data, metadata_state):
106
  "end_time": end_time,
107
  "id": segment_id,
108
  })
 
 
109
 
110
- # Remplir les sorties audio (20 max)
111
- output_paths = segment_paths + [None] * (20 - len(segment_paths))
112
-
113
- return output_paths, updated_metadata
114
-
115
- # -------------------------------------------------
116
- # Étape 8 : Génération du ZIP
117
- # -------------------------------------------------
118
  def generate_zip(metadata_state):
119
- """
120
- Génère un fichier ZIP contenant les segments audio et un fichier metadata.csv.
121
- """
122
  if not metadata_state:
123
  return None
124
-
125
  zip_path = os.path.join(TEMP_DIR, "dataset.zip")
126
  if os.path.exists(zip_path):
127
  os.remove(zip_path)
128
-
129
- # Créer le fichier metadata.csv
130
  metadata_csv_path = os.path.join(TEMP_DIR, "metadata.csv")
131
  with open(metadata_csv_path, "w", encoding="utf-8") as f:
132
  f.write("audio_file|text|speaker_name|API\n")
133
  for seg in metadata_state:
134
- line = f"{seg['audio_file']}|{seg['text']}|projectname|/API_PHONETIC/\n"
135
- f.write(line)
136
-
137
- # Ajouter les fichiers au ZIP
138
  with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
139
  zf.write(metadata_csv_path, "metadata.csv")
140
  for seg in metadata_state:
141
  file_path = os.path.join(TEMP_DIR, seg["audio_file"])
142
  if os.path.exists(file_path):
143
  zf.write(file_path, seg["audio_file"])
144
-
145
  return zip_path
146
 
147
  # -------------------------------------------------
148
- # Construction de l'interface Gradio
149
  # -------------------------------------------------
150
- with gr.Blocks(css="style.css") as demo:
151
- gr.Markdown("# Application de Découpe Audio (jusqu'à 20 segments)")
152
-
153
- # State global
154
  metadata_state = gr.State(init_metadata_state())
155
 
156
- # Étape 1 : Téléversement audio
157
- with gr.Column():
158
- gr.Markdown("### 1. Téléversez un fichier audio (MP3/WAV)")
159
- audio_input = gr.Audio(source="upload", type="filepath", label="Fichier audio")
160
-
161
- # Étape 2 : Transcription brute
162
- raw_transcription = gr.Textbox(
163
- label="Transcription (Whisper)",
164
- placeholder="Le texte apparaîtra ici après chargement.",
165
- interactive=False,
166
- )
167
-
168
- # Étape 3 : Table des segments
169
- gr.Markdown("### 2. Définissez vos segments")
170
- table = gr.Dataframe(
171
- headers=["Texte", "Début (s)", "Fin (s)", "ID"],
172
- datatype=["str", "number", "number", "str"],
173
- row_count=20,
174
- col_count=4,
175
- )
176
-
177
- # Bouton de validation
178
- validate_button = gr.Button("Valider et générer les extraits")
179
-
180
- # 20 lecteurs audio
181
  audio_players = [gr.Audio(label=f"Extrait {i+1}", interactive=False) for i in range(20)]
182
-
183
- # Bouton pour générer le fichier ZIP
184
- generate_button = gr.Button("Générer le fichier ZIP")
185
  zip_file = gr.File(label="Télécharger le ZIP")
186
 
187
- # ----------------
188
- # Callbacks
189
- # ----------------
190
-
191
- # Étape 1 : Transcription audio
192
- audio_input.change(
193
- fn=transcribe_audio,
194
- inputs=audio_input,
195
- outputs=[raw_transcription, table, audio_input],
196
- )
197
-
198
- # Étape 5 : Validation des segments
199
- validate_button.click(
200
- fn=validate_segments,
201
- inputs=[audio_input, table, metadata_state],
202
- outputs=audio_players + [metadata_state],
203
- )
204
 
205
- # Étape 8 : Génération du fichier ZIP
206
- generate_button.click(
207
- fn=generate_zip,
208
- inputs=metadata_state,
209
- outputs=zip_file,
210
- )
211
 
212
- demo.queue().launch()
 
1
  import os
2
  import shutil
3
  import zipfile
4
+ import torch
5
  from pathlib import Path
6
 
7
  import gradio as gr
 
11
  # -------------------------------------------------
12
  # Configuration
13
  # -------------------------------------------------
 
14
  MODEL_NAME = "openai/whisper-large-v3"
15
  device = 0 if torch.cuda.is_available() else "cpu"
16
 
 
24
  TEMP_DIR = "./temp_audio"
25
  os.makedirs(TEMP_DIR, exist_ok=True)
26
 
 
 
 
27
  def init_metadata_state():
 
 
 
28
  return []
29
 
 
 
 
30
  def transcribe_audio(audio_path):
 
 
 
31
  if not audio_path:
32
+ return "Aucun fichier audio fourni", [], None
33
+
34
+ result = pipe(audio_path, return_timestamps=True)
35
+ words = result.get("chunks", [])
36
+
37
+ raw_transcription = " ".join([w["text"] for w in words])
38
+ word_timestamps = [(w["text"], w["timestamp"]) for w in words]
39
+
40
+ return raw_transcription, [], audio_path, word_timestamps
41
+
42
+ def validate_segments(audio_path, table_data, metadata_state, word_timestamps):
43
+ if not audio_path or not word_timestamps:
44
+ return [], metadata_state
45
+
 
 
 
 
 
 
 
 
 
 
 
46
  if os.path.exists(TEMP_DIR):
47
  shutil.rmtree(TEMP_DIR)
48
  os.makedirs(TEMP_DIR, exist_ok=True)
49
 
50
  original_audio = AudioSegment.from_file(audio_path)
 
51
  segment_paths = []
52
  updated_metadata = []
53
 
54
  for i, row in enumerate(table_data):
 
 
55
  text, start_time, end_time, segment_id = row
 
56
  if not text or start_time is None or end_time is None:
57
+ continue
58
+
59
+ segment_id = segment_id or f"seg_{i+1:02d}"
60
+ start_ms, end_ms = int(float(start_time) * 1000), int(float(end_time) * 1000)
 
 
61
  if start_ms < 0 or end_ms <= start_ms:
62
  continue
63
+
64
  segment_filename = f"{Path(audio_path).stem}_{segment_id}.wav"
65
  segment_path = os.path.join(TEMP_DIR, segment_filename)
66
+
 
67
  extract = original_audio[start_ms:end_ms]
68
  extract.export(segment_path, format="wav")
69
+
 
70
  segment_paths.append(segment_path)
71
  updated_metadata.append({
72
  "audio_file": segment_filename,
 
75
  "end_time": end_time,
76
  "id": segment_id,
77
  })
78
+
79
+ return segment_paths, updated_metadata
80
 
 
 
 
 
 
 
 
 
81
  def generate_zip(metadata_state):
 
 
 
82
  if not metadata_state:
83
  return None
84
+
85
  zip_path = os.path.join(TEMP_DIR, "dataset.zip")
86
  if os.path.exists(zip_path):
87
  os.remove(zip_path)
88
+
 
89
  metadata_csv_path = os.path.join(TEMP_DIR, "metadata.csv")
90
  with open(metadata_csv_path, "w", encoding="utf-8") as f:
91
  f.write("audio_file|text|speaker_name|API\n")
92
  for seg in metadata_state:
93
+ f.write(f"{seg['audio_file']}|{seg['text']}|projectname|/API_PHONETIC/\n")
94
+
 
 
95
  with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
96
  zf.write(metadata_csv_path, "metadata.csv")
97
  for seg in metadata_state:
98
  file_path = os.path.join(TEMP_DIR, seg["audio_file"])
99
  if os.path.exists(file_path):
100
  zf.write(file_path, seg["audio_file"])
101
+
102
  return zip_path
103
 
104
  # -------------------------------------------------
105
+ # Interface Gradio
106
  # -------------------------------------------------
107
+ with gr.Blocks() as demo:
108
+ gr.Markdown("# Application de Découpe Audio")
 
 
109
  metadata_state = gr.State(init_metadata_state())
110
 
111
+ audio_input = gr.Audio(type="filepath", label="Fichier audio")
112
+ raw_transcription = gr.Textbox(label="Transcription", interactive=False)
113
+ table = gr.Dataframe(headers=["Texte", "Début (s)", "Fin (s)", "ID"], datatype=["str", "number", "number", "str"], row_count="dynamic")
114
+ validate_button = gr.Button("Valider")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  audio_players = [gr.Audio(label=f"Extrait {i+1}", interactive=False) for i in range(20)]
116
+ generate_button = gr.Button("Générer ZIP")
 
 
117
  zip_file = gr.File(label="Télécharger le ZIP")
118
 
119
+ word_timestamps = gr.State()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ audio_input.change(transcribe_audio, inputs=audio_input, outputs=[raw_transcription, table, audio_input, word_timestamps])
122
+ validate_button.click(validate_segments, inputs=[audio_input, table, metadata_state, word_timestamps], outputs=audio_players + [metadata_state])
123
+ generate_button.click(generate_zip, inputs=metadata_state, outputs=zip_file)
 
 
 
124
 
125
+ demo.queue().launch()