from pydub.silence import detect_nonsilent from pydub import AudioSegment import numpy as np import re import os import unicodedata def format_title(title): formatted_title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore').decode('utf-8') formatted_title = re.sub(r'[\u2500-\u257F]+', '', title) formatted_title = re.sub(r'[^\w\s-]', '', title) formatted_title = re.sub(r'\s+', '_', formatted_title) return formatted_title def process_audio(file_path): try: # load audio file song = AudioSegment.from_file(file_path) print(f"Ignore the warning if you saw any...") # set silence threshold and duration silence_thresh = -70 # dB min_silence_len = 750 # ms, adjust as needed # detect nonsilent parts nonsilent_parts = detect_nonsilent(song, min_silence_len=min_silence_len, silence_thresh=silence_thresh) # Create a new directory to store chunks file_dir = os.path.dirname(file_path) file_name = os.path.basename(file_path).split('.')[0] file_name = format_title(file_name) new_dir_path = os.path.join(file_dir, file_name) os.makedirs(new_dir_path, exist_ok=True) # Check if timestamps file exists, if so delete it timestamps_file = os.path.join(file_dir, f"{file_name}_timestamps.txt") if os.path.isfile(timestamps_file): os.remove(timestamps_file) # export chunks and save start times segment_count = 0 for i, (start_i, end_i) in enumerate(nonsilent_parts): chunk = song[start_i:end_i] chunk_file_path = os.path.join(new_dir_path, f"chunk{i}.wav") chunk.export(chunk_file_path, format="wav") print(f"Segment {i} created!") segment_count += 1 # write start times to file with open(timestamps_file, "a", encoding="utf-8") as f: f.write(f"{chunk_file_path} starts at {start_i} ms\n") print(f"Total segments created: {segment_count}") print(f"Split all chunks for {file_path} successfully!") return "Finish", new_dir_path except Exception as e: print(f"An error occurred: {e}") return "Error", None def merge_audio(timestamps_file): try: # Extract prefix from the timestamps filename prefix = os.path.basename(timestamps_file).replace('_timestamps.txt', '') timestamps_dir = os.path.dirname(timestamps_file) # Open the timestamps file with open(timestamps_file, "r", encoding="utf-8") as f: lines = f.readlines() # Initialize empty list to hold audio segments audio_segments = [] last_end_time = 0 print(f"Processing file: {timestamps_file}") for line in lines: # Extract filename and start time from line match = re.search(r"(chunk\d+.wav) starts at (\d+) ms", line) if match: filename, start_time = match.groups() start_time = int(start_time) # Construct the complete path to the chunk file chunk_file = os.path.join(timestamps_dir, prefix, filename) # Add silence from last_end_time to start_time silence_duration = max(start_time - last_end_time, 0) silence = AudioSegment.silent(duration=silence_duration) audio_segments.append(silence) # Load audio file and append to list audio = AudioSegment.from_wav(chunk_file) audio_segments.append(audio) # Update last_end_time last_end_time = start_time + len(audio) print(f"Processed chunk: {chunk_file}") # Concatenate all audio_segments and export merged_audio = sum(audio_segments) merged_audio_np = np.array(merged_audio.get_array_of_samples()) #print(f"Exported merged file: {merged_filename}\n") return merged_audio.frame_rate, merged_audio_np except Exception as e: print(f"An error occurred: {e}")