Spaces:
Running
on
Zero
Running
on
Zero
Commit
Β·
7d66c17
1
Parent(s):
b3d9c4b
update text
Browse files
app.py
CHANGED
@@ -82,7 +82,7 @@ def plot_and_df(texts, preds):
|
|
82 |
|
83 |
|
84 |
def get_first_parquet_filename(dataset, config, split):
|
85 |
-
parquet_resp = session.get(f"https://datasets-server.huggingface.co/parquet?dataset={dataset}&config={config}", timeout=
|
86 |
if "error" in parquet_resp:
|
87 |
raise ValueError(parquet_resp["error"])
|
88 |
first_parquet_file_url = [file for file in parquet_resp["parquet_files"] if file["split"] == split][0]["url"]
|
@@ -217,11 +217,13 @@ def call_perspective_api(texts_df, column_name, nested_column_name, dataset, con
|
|
217 |
with gr.Blocks() as demo:
|
218 |
gr.Markdown(
|
219 |
"""
|
220 |
-
#
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
|
|
|
|
225 |
## Select dataset and text column
|
226 |
"""
|
227 |
)
|
@@ -235,7 +237,6 @@ with gr.Blocks() as demo:
|
|
235 |
subset_dropdown = gr.Dropdown(label="Subset", visible=False)
|
236 |
split_dropdown = gr.Dropdown(label="Split", visible=False)
|
237 |
|
238 |
-
# config_name = "default" # TODO: user input
|
239 |
with gr.Accordion("Dataset preview", open=False):
|
240 |
@gr.render(inputs=[dataset_name, subset_dropdown, split_dropdown])
|
241 |
def embed(name, subset, split):
|
@@ -261,7 +262,7 @@ with gr.Blocks() as demo:
|
|
261 |
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
262 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
263 |
}
|
264 |
-
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=
|
265 |
if "error" in info_resp:
|
266 |
return {
|
267 |
subset_dropdown: gr.Dropdown(visible=False),
|
@@ -285,7 +286,7 @@ with gr.Blocks() as demo:
|
|
285 |
return {
|
286 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
287 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
288 |
-
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name"
|
289 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
290 |
}
|
291 |
logging.info(nested_text_features)
|
@@ -364,8 +365,7 @@ with gr.Blocks() as demo:
|
|
364 |
gr.Markdown("""## Explore toxicity
|
365 |
Run [Perspective](https://perspectiveapi.com/how-it-works/) on 100 random samples to check toxicity
|
366 |
""")
|
367 |
-
|
368 |
-
gr_toxicity_btn = gr.Button("Run Perpspective API")
|
369 |
toxicity_progress_bar = gr.Label(show_label=False)
|
370 |
toxicity_hist = gr.Plot()
|
371 |
with gr.Accordion("Explore examples with toxicity scores:", open=False):
|
|
|
82 |
|
83 |
|
84 |
def get_first_parquet_filename(dataset, config, split):
|
85 |
+
parquet_resp = session.get(f"https://datasets-server.huggingface.co/parquet?dataset={dataset}&config={config}", timeout=20).json()
|
86 |
if "error" in parquet_resp:
|
87 |
raise ValueError(parquet_resp["error"])
|
88 |
first_parquet_file_url = [file for file in parquet_resp["parquet_files"] if file["split"] == split][0]["url"]
|
|
|
217 |
with gr.Blocks() as demo:
|
218 |
gr.Markdown(
|
219 |
"""
|
220 |
+
# π Data Quality Checker π
|
221 |
+
|
222 |
+
This space gives some instruments to have a quick glance at the quality of a text dataset.
|
223 |
+
* It uses [NVIDIA's quality classifier model](https://huggingface.co/nvidia/quality-classifier-deberta)
|
224 |
+
on a small subset of texts.
|
225 |
+
* It uses [Perspective](https://perspectiveapi.com/how-it-works/) API to check toxicity of 100 random dataset texts
|
226 |
+
|
227 |
## Select dataset and text column
|
228 |
"""
|
229 |
)
|
|
|
237 |
subset_dropdown = gr.Dropdown(label="Subset", visible=False)
|
238 |
split_dropdown = gr.Dropdown(label="Split", visible=False)
|
239 |
|
|
|
240 |
with gr.Accordion("Dataset preview", open=False):
|
241 |
@gr.render(inputs=[dataset_name, subset_dropdown, split_dropdown])
|
242 |
def embed(name, subset, split):
|
|
|
262 |
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
263 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
264 |
}
|
265 |
+
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=20).json()
|
266 |
if "error" in info_resp:
|
267 |
return {
|
268 |
subset_dropdown: gr.Dropdown(visible=False),
|
|
|
286 |
return {
|
287 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
288 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
289 |
+
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name"),
|
290 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
291 |
}
|
292 |
logging.info(nested_text_features)
|
|
|
365 |
gr.Markdown("""## Explore toxicity
|
366 |
Run [Perspective](https://perspectiveapi.com/how-it-works/) on 100 random samples to check toxicity
|
367 |
""")
|
368 |
+
gr_toxicity_btn = gr.Button("Run Perpspective")
|
|
|
369 |
toxicity_progress_bar = gr.Label(show_label=False)
|
370 |
toxicity_hist = gr.Plot()
|
371 |
with gr.Accordion("Explore examples with toxicity scores:", open=False):
|