File size: 1,560 Bytes
b98062a
 
d7f4671
abce496
b98062a
 
 
 
 
 
 
 
 
 
 
 
 
 
d7f4671
 
 
 
 
 
 
 
4d11716
 
 
 
d7f4671
c122898
b98062a
 
 
 
 
 
 
 
963ddcb
abce496
b98062a
abce496
b98062a
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51




# import module
import streamlit as st

import datasets

import pandas as pd

access_token=""
dataset=""
split=""
skip=0


def load():
    if dataset=="nlewins/onetalk_questions_full_audio":
        column_with_audio="audio_transcription"
        column_with_english_text="en"
        column_with_other_text="transcription"
    elif dataset=="nlewins/LSK_full_with_audio":
        column_with_audio="audio_transcription"
        column_with_english_text="en"
        column_with_other_text="transcription"
    elif dataset=="nlewins/fleurs_ceb_to_en":
        column_with_audio="audio"
        column_with_english_text="transcription_en"
        column_with_other_text="transcription"
    
    ds = datasets.load_dataset(dataset, token=access_token if access_token!="" else st.secrets["hf_token"], split=datasets.ReadInstruction("test",from_=skip,to=skip+50))
    for example in ds:
        df=pd.DataFrame([example[column_with_other_text],example[column_with_english_text]])
        st.table(df.values)
        st.audio(example[column_with_audio]["array"],sample_rate=example[column_with_audio]["sampling_rate"])

# Title
st.title("One Talk dataset explorer")

access_token = st.text_input("Access token", value="", max_chars=None, key=None, type="password")
dataset = st.text_input("Dataset", value="nlewins/LSK_full_with_audio", max_chars=None, key=None, type="default")
split = st.text_input("Split", value="test", max_chars=None, key=None, type="default")
skip = st.number_input("Skip", value=250)

st.button("Go",on_click=load)

st.divider()

load()