import gradio as gr import parselmouth from parselmouth.praat import call import numpy as np import pandas as pd def find_rises_and_peaks_gradient(data, threshold=4): data['rise_point'] = 0 data['peak_point'] = 0 pitch_values = data['pitch'].values gradients = np.gradient(pitch_values) in_rise = False rise_start = 0 successive_rise_count = 0 min_successive_rise = 3 # Minimum successive values to qualify as a rise checking_rise = False # Flag to start checking for rises after NaN for i in range(1, len(gradients)): if np.isnan(pitch_values[i]): checking_rise = False # Reset flag when encountering NaN in_rise = False successive_rise_count = 0 continue if not checking_rise: checking_rise = True # Start checking for rises after NaN continue if gradients[i] >= threshold: if not in_rise: in_rise = True rise_start = i-1 successive_rise_count += 1 else: if in_rise: if successive_rise_count >= min_successive_rise: data.at[rise_start, 'rise_point'] = 1 data.at[i-1, 'peak_point'] = 1 in_rise = False successive_rise_count = 0 return data def get_pitch(audio_data): rate, data = audio_data if data.ndim > 1: # Check if the audio is stereo or multi-channel data = np.mean(data, axis=1) # Convert to mono by averaging channels # Convert data to float64 for compatibility with Parselmouth data = data.astype('float64') sound = parselmouth.Sound(values=data, sampling_frequency=rate) try: pitch = call(sound, "To Pitch", 0.0, 75, 500) pitch_values = pitch.selected_array['frequency'] # return "Pitch frequencies: " + str(pitch_values) pitch_values[pitch_values==0] = np.nan df_pitch = pd.DataFrame(np.column_stack([pitch.xs(), pitch_values]), columns=['time', 'pitch']) df_pitch = find_rises_and_peaks_gradient(df_pitch) output = df_pitch.to_json(orient='records') return output except Exception as e: return "Error in pitch extraction: " + str(e) demo = gr.Interface(fn=get_pitch, inputs="audio", outputs="text") demo.launch()