Spaces:
Build error
Build error
import gradio as gr | |
import pandas as pd | |
from pydub import AudioSegment | |
from pyannote.audio import Pipeline | |
import whisper | |
model = whisper.load_model("medium") | |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization",use_auth_token="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") | |
def diarization(inp_audio): | |
diarization = pipeline(inp_audio) | |
speakertime=[] | |
output="" | |
# print the result | |
for turn, _, speaker in diarization.itertracks(yield_label=True): | |
details=[turn.start,turn.end,speaker] | |
speakertime.append(details) | |
#print(turn.start) | |
#print(speaker) | |
print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}") | |
output=output+f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}"+"\n" | |
#print(speakertime) | |
df = pd.DataFrame(speakertime,columns=['start', 'end','speaker']) | |
text=[] | |
for i in range (df.start.count()): | |
text.append(generatetext(inp_audio,df.start[i], df.end[i])) | |
df['text']=text | |
with open('my_file.txt', 'w') as my_file: | |
for i in range (df.start.count()): | |
my_file.write(df.speaker[i]+": " +df.text[i] + '\n') | |
output=output+df.speaker[i]+": " +df.text[i] + '\n' | |
print(open("my_file.txt","r").read()) | |
return output | |
def generatetext(filename,starttime,endtime): | |
t1 = starttime * 1000 # works in milliseconds | |
t2 = endtime * 1000 | |
newAudio = AudioSegment.from_wav(filename) | |
a = newAudio[t1:t2] | |
a.export('audio.wav', format="wav") | |
audio = whisper.load_audio('audio.wav') | |
result= model.transcribe(audio) | |
#text1 = whisper('audio.wav') | |
print(result) | |
print(result.get("text")) | |
return result.get("text") | |
block = gr.Blocks() | |
with block: | |
with gr.Group(): | |
with gr.Box(): | |
with gr.Row().style(): | |
inp_audio = gr.Audio( | |
label="Input Audio", | |
type="filepath", | |
mirror_webcam = False | |
) | |
outputdialogs = gr.Textbox() | |
btn = gr.Button("Generate Text") | |
btn.click(diarization, inputs=[inp_audio], outputs=[outputdialogs],api_name="view_api") | |
block.launch(enable_queue = True,debug=True) |