Spaces:
Sleeping
Sleeping
Commit
·
8092547
1
Parent(s):
8dc0e4d
fix-uid-bug-and-add-debug-section (#23)
Browse files- fix uid reload and show log files (6e92592af2988e14cab1358ac455a313679bef52)
- fix typo (97c6f8e64fe737eb05234ccd746d062163c7fbe9)
- add closing files (a2810d675bbc1566b5a57b99774675ba1dc2d527)
- clean up code (94e80555ec10c4ab120d5abaa3f16508d55330ee)
Co-authored-by: zcy <[email protected]>
- app.py +3 -0
- app_debug.py +37 -0
- app_text_classification.py +19 -11
- io_utils.py +12 -1
- text_classification.py +0 -1
- text_classification_ui_helpers.py +1 -1
app.py
CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
|
|
3 |
|
4 |
from app_leaderboard import get_demo as get_demo_leaderboard
|
5 |
from app_text_classification import get_demo as get_demo_text_classification
|
|
|
6 |
from run_jobs import start_process_run_job, stop_thread
|
7 |
|
8 |
try:
|
@@ -11,6 +12,8 @@ try:
|
|
11 |
get_demo_text_classification(demo)
|
12 |
with gr.Tab("Leaderboard"):
|
13 |
get_demo_leaderboard()
|
|
|
|
|
14 |
|
15 |
start_process_run_job()
|
16 |
|
|
|
3 |
|
4 |
from app_leaderboard import get_demo as get_demo_leaderboard
|
5 |
from app_text_classification import get_demo as get_demo_text_classification
|
6 |
+
from app_debug import get_demo as get_demo_debug
|
7 |
from run_jobs import start_process_run_job, stop_thread
|
8 |
|
9 |
try:
|
|
|
12 |
get_demo_text_classification(demo)
|
13 |
with gr.Tab("Leaderboard"):
|
14 |
get_demo_leaderboard()
|
15 |
+
with gr.Tab("Logs(Debug)"):
|
16 |
+
get_demo_debug(demo)
|
17 |
|
18 |
start_process_run_job()
|
19 |
|
app_debug.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pipe
|
3 |
+
from os import listdir
|
4 |
+
from os.path import isfile, join
|
5 |
+
LOG_PATH = "./tmp"
|
6 |
+
CONFIG_PATH = "./cicd/configs"
|
7 |
+
|
8 |
+
def get_accordions_of_files(path, files):
|
9 |
+
components = []
|
10 |
+
for file in files:
|
11 |
+
with gr.Row():
|
12 |
+
with gr.Accordion(label=file, open=False):
|
13 |
+
with gr.Row():
|
14 |
+
with open(join(path, file), "r") as f:
|
15 |
+
gr.Markdown(f.read())
|
16 |
+
return components
|
17 |
+
|
18 |
+
def get_accordions_of_log_files():
|
19 |
+
log_files = [f for f in listdir(LOG_PATH) if isfile(join(LOG_PATH, f)) and f.endswith("_log")]
|
20 |
+
return get_accordions_of_files(LOG_PATH, log_files)
|
21 |
+
|
22 |
+
def get_accordions_of_config_files():
|
23 |
+
config_files = [f for f in listdir(CONFIG_PATH) if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")]
|
24 |
+
return get_accordions_of_files(CONFIG_PATH, config_files)
|
25 |
+
|
26 |
+
def get_demo(demo):
|
27 |
+
with gr.Row():
|
28 |
+
# check if jobs is an attribute of pipe
|
29 |
+
if hasattr(pipe, "jobs"):
|
30 |
+
gr.Markdown(f"current jobs in queue: {len(pipe.jobs)}")
|
31 |
+
with gr.Accordion(label="Config Files", open=False):
|
32 |
+
config_accordion = get_accordions_of_config_files()
|
33 |
+
demo.load(get_accordions_of_config_files, outputs=config_accordion, every=1)
|
34 |
+
with gr.Accordion(label="Log Files", open=False):
|
35 |
+
log_accordions = get_accordions_of_log_files()
|
36 |
+
demo.load(get_accordions_of_log_files, outputs=log_accordions, every=1)
|
37 |
+
|
app_text_classification.py
CHANGED
@@ -27,11 +27,10 @@ CONFIG_PATH = "./config.yaml"
|
|
27 |
|
28 |
|
29 |
def get_demo(demo):
|
30 |
-
uid = uuid.uuid4()
|
31 |
with gr.Row():
|
32 |
gr.Markdown(INTRODUCTION_MD)
|
33 |
uid_label = gr.Textbox(
|
34 |
-
label="Evaluation ID:", value=
|
35 |
)
|
36 |
with gr.Row():
|
37 |
model_id_input = gr.Textbox(
|
@@ -70,19 +69,28 @@ def get_demo(demo):
|
|
70 |
|
71 |
with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
|
72 |
run_local = gr.Checkbox(value=True, label="Run in this Space")
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
75 |
inference_token = gr.Textbox(value="", label="HF Token for Inference API", visible=False, interactive=True)
|
76 |
|
77 |
with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
|
78 |
-
selected = read_scanners(uid)
|
79 |
-
# currently we remove data_leakage from the default scanners
|
80 |
-
# Reason: data_leakage barely raises any issues and takes too many requests
|
81 |
-
# when using inference API, causing rate limit error
|
82 |
-
scan_config = selected + ["data_leakage"]
|
83 |
scanners = gr.CheckboxGroup(
|
84 |
-
|
85 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
with gr.Row():
|
88 |
run_btn = gr.Button(
|
@@ -98,7 +106,7 @@ def get_demo(demo):
|
|
98 |
|
99 |
dataset_id_input.change(
|
100 |
check_dataset_and_get_config,
|
101 |
-
inputs=[dataset_id_input
|
102 |
)
|
103 |
|
104 |
dataset_config_input.change(
|
|
|
27 |
|
28 |
|
29 |
def get_demo(demo):
|
|
|
30 |
with gr.Row():
|
31 |
gr.Markdown(INTRODUCTION_MD)
|
32 |
uid_label = gr.Textbox(
|
33 |
+
label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
|
34 |
)
|
35 |
with gr.Row():
|
36 |
model_id_input = gr.Textbox(
|
|
|
69 |
|
70 |
with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
|
71 |
run_local = gr.Checkbox(value=True, label="Run in this Space")
|
72 |
+
run_inference = gr.Checkbox(value="False", label="Run with Inference API")
|
73 |
+
@gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
|
74 |
+
def get_run_mode(uid):
|
75 |
+
return (
|
76 |
+
gr.update(value=read_inference_type(uid) == "hf_inference_api" and not run_local.value)
|
77 |
+
)
|
78 |
inference_token = gr.Textbox(value="", label="HF Token for Inference API", visible=False, interactive=True)
|
79 |
|
80 |
with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
|
|
|
|
|
|
|
|
|
|
|
81 |
scanners = gr.CheckboxGroup(
|
82 |
+
label="Scan Settings", visible=True
|
83 |
)
|
84 |
+
@gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
|
85 |
+
def get_scanners(uid):
|
86 |
+
selected = read_scanners(uid)
|
87 |
+
# currently we remove data_leakage from the default scanners
|
88 |
+
# Reason: data_leakage barely raises any issues and takes too many requests
|
89 |
+
# when using inference API, causing rate limit error
|
90 |
+
scan_config = selected + ["data_leakage"]
|
91 |
+
return (gr.update(
|
92 |
+
choices=scan_config, value=selected, label="Scan Settings", visible=True
|
93 |
+
))
|
94 |
|
95 |
with gr.Row():
|
96 |
run_btn = gr.Button(
|
|
|
106 |
|
107 |
dataset_id_input.change(
|
108 |
check_dataset_and_get_config,
|
109 |
+
inputs=[dataset_id_input], outputs=[dataset_config_input]
|
110 |
)
|
111 |
|
112 |
dataset_config_input.change(
|
io_utils.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import os
|
2 |
import subprocess
|
3 |
-
import gradio as gr
|
4 |
import yaml
|
5 |
|
6 |
import pipe
|
@@ -28,6 +27,7 @@ def read_scanners(uid):
|
|
28 |
with open(get_yaml_path(uid), "r") as f:
|
29 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
30 |
scanners = config.get("detectors", [])
|
|
|
31 |
return scanners
|
32 |
|
33 |
|
@@ -37,9 +37,12 @@ def write_scanners(scanners, uid):
|
|
37 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
38 |
if config:
|
39 |
config["detectors"] = scanners
|
|
|
40 |
# save scanners to detectors in yaml
|
41 |
with open(get_yaml_path(uid), "w") as f:
|
42 |
yaml.dump(config, f, Dumper=Dumper)
|
|
|
|
|
43 |
|
44 |
|
45 |
# read model_type from yaml file
|
@@ -48,6 +51,7 @@ def read_inference_type(uid):
|
|
48 |
with open(get_yaml_path(uid), "r") as f:
|
49 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
50 |
inference_type = config.get("inference_type", "")
|
|
|
51 |
return inference_type
|
52 |
|
53 |
|
@@ -62,9 +66,11 @@ def write_inference_type(use_inference, inference_token, uid):
|
|
62 |
config["inference_type"] = "hf_pipeline"
|
63 |
# FIXME: A quick and temp fix for missing token
|
64 |
config["inference_token"] = ""
|
|
|
65 |
# save inference_type to inference_type in yaml
|
66 |
with open(get_yaml_path(uid), "w") as f:
|
67 |
yaml.dump(config, f, Dumper=Dumper)
|
|
|
68 |
|
69 |
|
70 |
|
@@ -75,6 +81,7 @@ def read_column_mapping(uid):
|
|
75 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
76 |
if config:
|
77 |
column_mapping = config.get("column_mapping", dict())
|
|
|
78 |
return column_mapping
|
79 |
|
80 |
|
@@ -82,6 +89,7 @@ def read_column_mapping(uid):
|
|
82 |
def write_column_mapping(mapping, uid):
|
83 |
with open(get_yaml_path(uid), "r") as f:
|
84 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
|
|
85 |
|
86 |
if config is None:
|
87 |
return
|
@@ -92,6 +100,8 @@ def write_column_mapping(mapping, uid):
|
|
92 |
|
93 |
with open(get_yaml_path(uid), "w") as f:
|
94 |
yaml.dump(config, f, Dumper=Dumper)
|
|
|
|
|
95 |
|
96 |
|
97 |
# convert column mapping dataframe to json
|
@@ -114,6 +124,7 @@ def get_logs_file(uid):
|
|
114 |
def write_log_to_user_file(id, log):
|
115 |
with open(f"./tmp/{id}_log", "a") as f:
|
116 |
f.write(log)
|
|
|
117 |
|
118 |
|
119 |
def save_job_to_pipe(id, job, lock):
|
|
|
1 |
import os
|
2 |
import subprocess
|
|
|
3 |
import yaml
|
4 |
|
5 |
import pipe
|
|
|
27 |
with open(get_yaml_path(uid), "r") as f:
|
28 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
29 |
scanners = config.get("detectors", [])
|
30 |
+
f.close()
|
31 |
return scanners
|
32 |
|
33 |
|
|
|
37 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
38 |
if config:
|
39 |
config["detectors"] = scanners
|
40 |
+
f.close()
|
41 |
# save scanners to detectors in yaml
|
42 |
with open(get_yaml_path(uid), "w") as f:
|
43 |
yaml.dump(config, f, Dumper=Dumper)
|
44 |
+
f.close()
|
45 |
+
|
46 |
|
47 |
|
48 |
# read model_type from yaml file
|
|
|
51 |
with open(get_yaml_path(uid), "r") as f:
|
52 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
53 |
inference_type = config.get("inference_type", "")
|
54 |
+
f.close()
|
55 |
return inference_type
|
56 |
|
57 |
|
|
|
66 |
config["inference_type"] = "hf_pipeline"
|
67 |
# FIXME: A quick and temp fix for missing token
|
68 |
config["inference_token"] = ""
|
69 |
+
f.close()
|
70 |
# save inference_type to inference_type in yaml
|
71 |
with open(get_yaml_path(uid), "w") as f:
|
72 |
yaml.dump(config, f, Dumper=Dumper)
|
73 |
+
f.close()
|
74 |
|
75 |
|
76 |
|
|
|
81 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
82 |
if config:
|
83 |
column_mapping = config.get("column_mapping", dict())
|
84 |
+
f.close()
|
85 |
return column_mapping
|
86 |
|
87 |
|
|
|
89 |
def write_column_mapping(mapping, uid):
|
90 |
with open(get_yaml_path(uid), "r") as f:
|
91 |
config = yaml.load(f, Loader=yaml.FullLoader)
|
92 |
+
f.close()
|
93 |
|
94 |
if config is None:
|
95 |
return
|
|
|
100 |
|
101 |
with open(get_yaml_path(uid), "w") as f:
|
102 |
yaml.dump(config, f, Dumper=Dumper)
|
103 |
+
f.close()
|
104 |
+
|
105 |
|
106 |
|
107 |
# convert column mapping dataframe to json
|
|
|
124 |
def write_log_to_user_file(id, log):
|
125 |
with open(f"./tmp/{id}_log", "a") as f:
|
126 |
f.write(log)
|
127 |
+
f.close()
|
128 |
|
129 |
|
130 |
def save_job_to_pipe(id, job, lock):
|
text_classification.py
CHANGED
@@ -171,7 +171,6 @@ def infer_output_label_column(
|
|
171 |
str(i): id2label_mapping[label]
|
172 |
for i, label in zip(id2label.keys(), dataset_labels)
|
173 |
}
|
174 |
-
# print('>>>>> column_mapping >>>>>', column_mapping)
|
175 |
|
176 |
id2label_df = pd.DataFrame(
|
177 |
{
|
|
|
171 |
str(i): id2label_mapping[label]
|
172 |
for i, label in zip(id2label.keys(), dataset_labels)
|
173 |
}
|
|
|
174 |
|
175 |
id2label_df = pd.DataFrame(
|
176 |
{
|
text_classification_ui_helpers.py
CHANGED
@@ -23,7 +23,7 @@ HF_SPACE_ID = "SPACE_ID"
|
|
23 |
HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
|
24 |
|
25 |
|
26 |
-
def check_dataset_and_get_config(dataset_id
|
27 |
try:
|
28 |
# write_column_mapping(None, uid) # reset column mapping
|
29 |
configs = datasets.get_dataset_config_names(dataset_id)
|
|
|
23 |
HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
|
24 |
|
25 |
|
26 |
+
def check_dataset_and_get_config(dataset_id):
|
27 |
try:
|
28 |
# write_column_mapping(None, uid) # reset column mapping
|
29 |
configs = datasets.get_dataset_config_names(dataset_id)
|