File size: 4,959 Bytes
a8cda10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ccb728
 
a8cda10
 
6ccb728
 
 
 
a8cda10
 
 
 
 
936805b
a8cda10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c90fcd7
a8cda10
 
6ccb728
a8cda10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936805b
 
054adf6
a8cda10
 
 
 
 
 
 
054adf6
a8cda10
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os

import gradio as gr
import soundfile as sf
import torch
from gradio_client import Client
from huggingface_hub import Repository
from pandas import read_csv

from transformers import pipeline


# load the results file from the private repo
USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
HF_TOKEN = os.environ.get("HF_TOKEN")

usernames_url = os.path.join("https://huggingface.co./datasets", USERNAMES_DATASET_ID)

usernames_repo = Repository(local_dir="usernames", clone_from=usernames_url, use_auth_token=HF_TOKEN)
usernames_repo.git_pull()

CSV_RESULTS_FILE = os.path.join("usernames", "usernames.csv")
all_results = read_csv(CSV_RESULTS_FILE)

# load the LID checkpoint
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline("audio-classification", model="facebook/mms-lid-126", device=device)

# define some constants
TITLE = "🤗 Audio Transformers Course: Unit 7 Assessment"
DESCRIPTION = """
Check that you have successfully completed the hands-on exercise for Unit 7 of the 🤗 Audio Transformers Course by submitting your demo to this Space.

As a reminder, you should start with the template Space provided at [`course-demos/speech-to-speech-translation`](https://huggingface.co./spaces/course-demos/speech-to-speech-translation),
and update the Space to translate from any language X to a **non-English** language Y. Your demo should take as input an audio file, and return as output another audio file, 
matching the signature of the [`speech_to_speech_translation`](https://huggingface.co./spaces/course-demos/speech-to-speech-translation/blob/3946ba6705a6632a63de8672ac52a482ab74b3fc/app.py#L35)
function in the template demo.

To submit your demo for assessment, give the repo id or URL to your demo. For the template demo, this would be `course-demos/speech-to-speech-translation`.
You should ensure that the visibility of your demo is set to **public**. This Space will submit a test file to your demo, and check that the output is 
non-English audio. If your demo successfully returns an audio file, and this audio file is classified as being non-English, you will pass the Unit and 
get a green tick next to your name on the overall [course progress space](https://huggingface.co./spaces/MariaK/Check-my-progress-Audio-Course) ✅

If you experience any issues with using this checker, [open an issue](https://huggingface.co./spaces/huggingface-course/audio-course-u7-assessment/discussions/new)
on this Space and tag [`@sanchit-gandhi`](https://huggingface.co./sanchit-gandhi).
"""
THRESHOLD = 0.5
PASS_MESSAGE = "Congratulations USER! Your demo passed the assessment!"


def verify_demo(repo_id):
    if "/" not in repo_id:
        raise gr.Error(f"Ensure you pass a valid repo id to the assessor, got `{repo_id}`")

    split_repo_id = repo_id.split("/")
    user_name = split_repo_id[-2]

    if len(split_repo_id) > 2:
        repo_id = "/".join(split_repo_id[-2:])

    if user_name in all_results["username"]:
        raise gr.Error(f"Username {user_name} has already passed the assessment!")

    try:
        client = Client(repo_id, hf_token=HF_TOKEN)
    except Exception as e:
        raise gr.Error(f"Error with loading Space: {e}")

    try:
        audio_file = client.predict("test_short.wav", api_name="/predict")
    except Exception as e:
        raise gr.Error(
            f"Error with querying Space, check that your Space takes an audio file as input and returns an audio as output: {e}"
        )

    audio, sampling_rate = sf.read(audio_file)

    language_prediction = pipe({"array": audio, "sampling_rate": sampling_rate})

    label_outputs = {}
    for pred in language_prediction:
        label_outputs[pred["label"]] = pred["score"]

    top_prediction = language_prediction[0]

    if top_prediction["score"] < THRESHOLD:
        raise gr.Error(
            f"Model made random predictions - predicted {top_prediction['label']} with probability {top_prediction['score']}"
        )
    elif top_prediction["label"] == "eng":
        raise gr.Error(
            "Model generated an English audio - ensure the model is set to generate audio in a non-English langauge, e.g. Dutch"
        )

    # save and upload new evaluated usernames
    all_results.loc[len(all_results)] = {"username": user_name}
    all_results.to_csv(CSV_RESULTS_FILE, index=False)
    usernames_repo.push_to_hub()

    message = PASS_MESSAGE.replace("USER", user_name)

    return message, "test_short.wav", (sampling_rate, audio), label_outputs


demo = gr.Interface(
    fn=verify_demo,
    inputs=gr.Textbox(placeholder="course-demos/speech-to-speech-translation", label="Repo id or URL of your demo"),
    outputs=[
        gr.Textbox(label="Status"),
        gr.Audio(label="Source Speech", type="filepath"),
        gr.Audio(label="Generated Speech", type="numpy"),
        gr.Label(label="Language prediction"),
    ],
    title=TITLE,
    description=DESCRIPTION,
)
demo.launch()