File size: 2,535 Bytes
d147382
 
 
 
16a3aec
 
d147382
16a3aec
 
 
 
d147382
 
16a3aec
 
d147382
 
 
16a3aec
 
 
d147382
16a3aec
d147382
 
 
 
 
16a3aec
 
 
d147382
 
 
 
 
 
 
 
 
16a3aec
d147382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16a3aec
 
d147382
16a3aec
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from cProfile import label
from email.policy import default

from altair import value
import gradio as gr
import os
from lang_list import TEXT_SOURCE_LANGUAGE_NAMES

HF_API = os.getenv("HF_API")
API_URL = os.getenv("API_URL")  # path to Seamlessm4t API endpoint

DEFAULT_TARGET_LANGUAGE = "Western Persian"

DESCRIPTION = """
# Seamlessm4t + Speaker Diarization + Voice Activity Detection
Here we use seamlessm4t to generate captions for full audios. Audio can be of arbitrary length. 

"""

DUPLICATE = """
To duplicate this repo, you have to give permission from three reopsitories and accept all user conditions: 

1- https://huggingface.co./pyannote/voice-activity-detection

2- https://hf.co/pyannote/segmentation

3- https://hf.co/pyannote/speaker-diarization

"""


def update_audio_ui(audio_source: str) -> tuple[dict, dict]:
    mic = audio_source == "microphone"
    return (
        gr.update(visible=mic, value=None),  # input_audio_mic
        gr.update(visible=not mic, value=None),  # input_audio_file
    )


with gr.Blocks(css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    with gr.Group():
        target_language = gr.Dropdown(
            choices=TEXT_SOURCE_LANGUAGE_NAMES,
            label="Output Language",
            value=DEFAULT_TARGET_LANGUAGE,
            interactive=True,
        )
        target_language.update(value=DEFAULT_TARGET_LANGUAGE)
        with gr.Row() as audio_box:
            audio_source = gr.Radio(
                choices=["file", "microphone"], value="file", interactive=True
            )
            input_audio_mic = gr.Audio(
                label="Input speech",
                type="filepath",
                source="microphone",
                visible=False,
            )
            input_audio_file = gr.Audio(
                label="Input speech",
                type="filepath",
                source="upload",
                visible=True,
            )
            output = gr.Audio(label="Output", visible=False)
        audio_source.change(
            fn=update_audio_ui,
            inputs=audio_source,
            outputs=[input_audio_mic, input_audio_file],
            queue=False,
            api_name=False,
        )
        input_audio_mic.change(lambda x: x, input_audio_mic, output)
        input_audio_file.change(lambda x: x, input_audio_file, output)
        submit = gr.Button("Submit")
        text_output = gr.Textbox(label="Transcribed Text", value="", interactive=False)

    gr.Markdown(DUPLICATE)


demo.queue(max_size=50).launch()