File size: 4,637 Bytes
a8cda10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81b3a7a
a8cda10
 
 
81b3a7a
a8cda10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os

import gradio as gr
import soundfile as sf
import torch
from gradio_client import Client
from huggingface_hub import Repository
from pandas import read_csv

from transformers import pipeline


# load the results file from the private repo
USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
HF_TOKEN = os.environ.get("HF_TOKEN")

usernames_url = os.path.join("https://huggingface.co./datasets", USERNAMES_DATASET_ID)

usernames_repo = Repository(local_dir="usernames", clone_from=usernames_url, use_auth_token=HF_TOKEN)
usernames_repo.git_pull()

CSV_RESULTS_FILE = os.path.join("usernames", "usernames.csv")
all_results = read_csv(CSV_RESULTS_FILE)

# load the LID checkpoint
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline("audio-classification", model="facebook/mms-lid-126", device=device)

# define some constants
TITLE = "🤗 Audio Transformers Course: Unit 7 Assessment"
DESCRIPTION = """
Check that you have successfully completed the hands-on exercise for Unit 7 of the 🤗 Audio Transformers Course by submitting your demo to this Space.

As a reminder, you should start with the template Space provided at [`course-demos/speech-to-speech-translation`](https://huggingface.co./spaces/course-demos/speech-to-speech-translation),
and update the Space to translate from any language X to a **non-English** language Y. Your demo should take as input an audio file, and return as output another audio file, matching the signature of the 
[`speech_to_speech_translation`](https://huggingface.co./spaces/course-demos/speech-to-speech-translation/blob/3946ba6705a6632a63de8672ac52a482ab74b3fc/app.py#L35)
function in the template demo.

To submit your demo for assessment, give the repo id or URL to your demo. For the template demo, this would be `course-demos/speech-to-speech-translation`. This Space will submit a test file to your demo, and check that the output is non-English audio. If your demo successfully
returns an audio file, and this audio file is classified as being non-English, you will pass the demo and get a green 
tick next to your name! ✅

If you experience any issues with using this checker, [open an issue](https://huggingface.co./spaces/huggingface-course/audio-course-u7-assessment/discussions/new)
on this Space and tag [`@sanchit-gandhi`](https://huggingface.co./sanchit-gandhi).
"""
THRESHOLD = 0.5
PASS_MESSAGE = "Congratulations! Your demo passed the assessment!"


def verify_demo(repo_id):
    if "/" not in repo_id:
        raise gr.Error(f"Ensure you pass a valid repo id to the assessor, got `{repo_id}`")

    split_repo_id = repo_id.split("/")
    user_name = split_repo_id[-2]

    if len(split_repo_id) > 2:
        repo_id = "/".join(split_repo_id[-2:])

    if user_name in all_results["username"]:
        raise gr.Error(f"Username {user_name} has already passed the assessment!")

    try:
        client = Client(repo_id, hf_token=HF_TOKEN)
    except Exception as e:
        raise gr.Error(f"Error with loading Space: {e}")

    try:
        audio_file = client.predict("test.wav", api_name="/predict")
    except Exception as e:
        raise gr.Error(
            f"Error with querying Space, ensure your Space takes an audio file as input and returns an audio as output: {e}"
        )

    audio, sampling_rate = sf.read(audio_file)

    language_prediction = pipe({"array": audio, "sampling_rate": sampling_rate})

    label_outputs = {}
    for pred in language_prediction:
        label_outputs[pred["label"]] = pred["score"]

    top_prediction = language_prediction[0]

    if top_prediction["score"] < THRESHOLD:
        raise gr.Error(
            f"Model made random predictions - predicted {top_prediction['label']} with probability {top_prediction['score']}"
        )
    elif top_prediction["label"] == "eng":
        raise gr.Error(
            "Model generated an English audio - ensure the model is set to generate audio in a non-English langauge, e.g. Dutch"
        )

    # save and upload new evaluated usernames
    all_results.loc[len(all_results)] = {"username": user_name}
    all_results.to_csv(CSV_RESULTS_FILE, index=False)
    usernames_repo.push_to_hub()

    return PASS_MESSAGE, (sampling_rate, audio), label_outputs


demo = gr.Interface(
    fn=verify_demo,
    inputs=gr.Textbox(placeholder="course-demos/speech-to-speech-translation", label="Repo id or URL of your demo"),
    outputs=[
        gr.Textbox(label="Status"),
        gr.Audio(label="Generated Speech", type="numpy"),
        gr.Label(label="Language prediction"),
    ],
    title=TITLE,
    description=DESCRIPTION,
)
demo.launch()