File size: 1,318 Bytes
ffd5eab
f150fa4
ffd5eab
 
 
f150fa4
 
 
 
0a6ed9e
f150fa4
 
 
 
f7451c4
 
f150fa4
 
ffd5eab
076ac90
f150fa4
ffd5eab
19d3c44
36f5cb5
797b660
 
 
 
f150fa4
 
 
 
 
 
797b660
 
 
 
ffd5eab
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
import ast

model = gr.Interface.load("huggingface/pyannote/voice-activity-detection")

def format_inference(output):
    if output:
        timestamps = []
        for out in output:
            timestamps.append(f"Start: {out['start']}s; Stop: {out['stop']}s")
        return "\n".join(timestamps)
    else:
        return "No voice activity detected."

def inference(audio_file):
    output = model(audio_file)
    output_list = ast.literal_eval(output)
    return format_inference(output_list)

inputs = gr.inputs.Audio(label="Input Audio", type="filepath", source="upload")
outputs = gr.outputs.Textbox(label="Voice timestamps", type="auto")
title = "Voice Activity Detection"
description = "<p style='text-align: center'>Upload an audio file and detected voices will be timestamped.</p>"
article = "<p style='text-align: center'>Model by pyannote, https://github.com/pyannote/pyannote-audio</p>"
examples = [["talk.wav"],
            ["talk2.wav"],
            ["silence.wav"],]
            
gr.Interface(inference, 
             inputs, 
             outputs, 
             title=title, 
             description=description, 
             article=article,
             examples=examples,
             theme="grass",
             allow_flagging=False,
             ).launch(debug=True)