Spaces:
Sleeping
Sleeping
chan030609
commited on
Commit
·
b826bd5
1
Parent(s):
fcbfc14
relabel
Browse files- .gitignore +0 -12
- app.py +2 -2
- asset/p1.md +2 -2
- asset/p2.md +18 -1
- submission/{hp.csv → books.csv} +1 -1
- submission/{bbc.csv → news.csv} +1 -1
- uploads.py +8 -3
- utils.py +12 -6
.gitignore
CHANGED
@@ -1,13 +1 @@
|
|
1 |
-
auto_evals/
|
2 |
-
venv/
|
3 |
-
__pycache__/
|
4 |
-
.env
|
5 |
-
.ipynb_checkpoints
|
6 |
-
*ipynb
|
7 |
-
.vscode/
|
8 |
|
9 |
-
eval-queue/
|
10 |
-
eval-results/
|
11 |
-
eval-queue-bk/
|
12 |
-
eval-results-bk/
|
13 |
-
logs/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -30,7 +30,7 @@ with demo:
|
|
30 |
with open("asset/citation_button_text.txt", 'r') as f:
|
31 |
citation_button = gr.Textbox(
|
32 |
value=f.read(),
|
33 |
-
label="Copy the following snippet to cite these results",
|
34 |
elem_id="citation-button",
|
35 |
show_copy_button=True,
|
36 |
)
|
@@ -59,7 +59,7 @@ with demo:
|
|
59 |
with gr.Accordion("Submit a new model for evaluation"):
|
60 |
with gr.Row():
|
61 |
with gr.Column():
|
62 |
-
corpus_radio = gr.Radio(['
|
63 |
organization_textbox = gr.Textbox(label="Organization")
|
64 |
mail_textbox = gr.Textbox(label="Contact email")
|
65 |
with gr.Column():
|
|
|
30 |
with open("asset/citation_button_text.txt", 'r') as f:
|
31 |
citation_button = gr.Textbox(
|
32 |
value=f.read(),
|
33 |
+
label="Copy the following snippet to cite these results:",
|
34 |
elem_id="citation-button",
|
35 |
show_copy_button=True,
|
36 |
)
|
|
|
59 |
with gr.Accordion("Submit a new model for evaluation"):
|
60 |
with gr.Row():
|
61 |
with gr.Column():
|
62 |
+
corpus_radio = gr.Radio(['news', 'books'], value="llama", label="Corpus")
|
63 |
organization_textbox = gr.Textbox(label="Organization")
|
64 |
mail_textbox = gr.Textbox(label="Contact email")
|
65 |
with gr.Column():
|
asset/p1.md
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
## 🥇
|
2 |
|
3 |
-
|
|
|
1 |
+
## 🥇 MUSE Leaderboard
|
2 |
|
3 |
+
MUSE is a comprehensive machine unlearning evaluation benchmark that assesses six desirable properties for unlearned models: (1) no verbatim memorization, (2) no knowledge memorization, (3) no privacy leakage, (4) utility preservation for non-removed data, (5) scalability with respect to removal requests, and (6) sustainability over sequential unlearning requests.
|
asset/p2.md
CHANGED
@@ -1 +1,18 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Expected File Format
|
2 |
+
|
3 |
+
We expect your submitted file to be in the CSV format containing the following columns:
|
4 |
+
- `name`: Name of the evaluated unlearning method. Must be unique for each row.
|
5 |
+
- `verbmem_f`, `privleak`, `knowmem_f`, `knowmem_r`: Evaluation scores.
|
6 |
+
|
7 |
+
Following the instructions for evaluation in our [GitHub Repository](https://github.com/jaechan-repo/muse_bench) yields an output file precisely of this format.
|
8 |
+
|
9 |
+
## Quick Links
|
10 |
+
|
11 |
+
- [Website](https://muse-bench.github.io): Landing page for MUSE.
|
12 |
+
- [arXiv Paper](): Detailed information about MUSE and analysis on the baseline unlearning methods.
|
13 |
+
- [GitHub Repository](https://github.com/jaechan-repo/muse_bench): Evaluation scripts, implementations of the baseline unlearning methods.
|
14 |
+
- [News Dataset](https://huggingface.co/datasets/muse-bench/MUSE-News), [Books Dataset](https://huggingface.co/datasets/muse-bench/MUSE-Books): Two evaluation corpora used by MUSE.
|
15 |
+
- [Leaderboard](https://huggingface.co/spaces/muse-bench/muse_leaderboard) (You are here): Current rankings of unlearning algorithms for MUSE.
|
16 |
+
- [Summary on Twitter](): A concise summary and key takeaways from the project.
|
17 |
+
|
18 |
+
|
submission/{hp.csv → books.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
Method,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
|
2 |
target,Baseline,99.8,59.4,-57.5,66.9,"-"
|
3 |
retrain,Baseline,14.3,28.9,0.0,74.5,"-"
|
4 |
"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-25.0,0.0,"-"
|
|
|
1 |
+
Method Name,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
|
2 |
target,Baseline,99.8,59.4,-57.5,66.9,"-"
|
3 |
retrain,Baseline,14.3,28.9,0.0,74.5,"-"
|
4 |
"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-25.0,0.0,"-"
|
submission/{bbc.csv → news.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
Method,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
|
2 |
target,Baseline,58.4,63.9,-99.8,55.2,"-"
|
3 |
retrain,Baseline,20.8,33.1,0.0,55.0,"-"
|
4 |
"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,5.2,0.0,"-"
|
|
|
1 |
+
Method Name,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
|
2 |
target,Baseline,58.4,63.9,-99.8,55.2,"-"
|
3 |
retrain,Baseline,20.8,33.1,0.0,55.0,"-"
|
4 |
"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,5.2,0.0,"-"
|
uploads.py
CHANGED
@@ -4,7 +4,7 @@ import io
|
|
4 |
import os
|
5 |
import base64
|
6 |
import pandas as pd
|
7 |
-
from utils import DEFAULT_METRICS, LEADERBOARD_PATH
|
8 |
|
9 |
|
10 |
api = HfApi()
|
@@ -46,13 +46,18 @@ def add_new_eval(
|
|
46 |
df = pd.read_csv(io_path)
|
47 |
df_new = pd.read_csv(fpath)
|
48 |
|
49 |
-
for col in ['
|
50 |
if col not in df_new.columns:
|
51 |
return format_warning(f"Missing column in the submitted file: {col}")
|
52 |
|
53 |
df_new['Submitted By'] = organization
|
54 |
df_new['Id'] = base64.b64encode(os.urandom(6)).decode('ascii')
|
55 |
-
df_new = df_new
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
df = pd.concat([df, df_new]).reset_index(drop=True)
|
58 |
buffer = io.BytesIO()
|
|
|
4 |
import os
|
5 |
import base64
|
6 |
import pandas as pd
|
7 |
+
from utils import DEFAULT_METRICS, LEADERBOARD_PATH, DEFAULT_METRIC_LABELS
|
8 |
|
9 |
|
10 |
api = HfApi()
|
|
|
46 |
df = pd.read_csv(io_path)
|
47 |
df_new = pd.read_csv(fpath)
|
48 |
|
49 |
+
for col in ['name'] + DEFAULT_METRICS:
|
50 |
if col not in df_new.columns:
|
51 |
return format_warning(f"Missing column in the submitted file: {col}")
|
52 |
|
53 |
df_new['Submitted By'] = organization
|
54 |
df_new['Id'] = base64.b64encode(os.urandom(6)).decode('ascii')
|
55 |
+
df_new = df_new.rename(
|
56 |
+
columns={
|
57 |
+
k: v for k, v in zip(DEFAULT_METRICS, DEFAULT_METRIC_LABELS)
|
58 |
+
} | {'name': 'Method Name'}
|
59 |
+
)
|
60 |
+
df_new = df_new[['Method Name', 'Submitted By'] + DEFAULT_METRIC_LABELS + ['Id']]
|
61 |
|
62 |
df = pd.concat([df, df_new]).reset_index(drop=True)
|
63 |
buffer = io.BytesIO()
|
utils.py
CHANGED
@@ -1,20 +1,26 @@
|
|
1 |
import pandas as pd
|
2 |
|
3 |
-
|
4 |
LEADERBOARD_PATH = f"chan0369/muse_leaderboard"
|
5 |
|
6 |
DEFAULT_METRICS = [
|
7 |
-
'
|
8 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
'PrivLeak',
|
10 |
-
'KnowMem Retain'
|
11 |
]
|
12 |
|
13 |
-
CORPORA = ['
|
14 |
|
15 |
|
16 |
def load_data(corpus):
|
17 |
assert corpus in CORPORA
|
18 |
df = pd.read_csv(f"submission/{corpus}.csv")
|
19 |
-
df = df[['Method', 'Submitted By'] +
|
20 |
return df
|
|
|
1 |
import pandas as pd
|
2 |
|
|
|
3 |
LEADERBOARD_PATH = f"chan0369/muse_leaderboard"
|
4 |
|
5 |
DEFAULT_METRICS = [
|
6 |
+
'verbmem_f',
|
7 |
+
'knowmem_f',
|
8 |
+
'privleak',
|
9 |
+
'knowmem_r'
|
10 |
+
]
|
11 |
+
|
12 |
+
DEFAULT_METRIC_LABELS = [
|
13 |
+
'VerbMem ⬇️',
|
14 |
+
'KnowMem Forget ⬇️',
|
15 |
'PrivLeak',
|
16 |
+
'KnowMem Retain (Utility) ⬆'
|
17 |
]
|
18 |
|
19 |
+
CORPORA = ['news', 'books']
|
20 |
|
21 |
|
22 |
def load_data(corpus):
|
23 |
assert corpus in CORPORA
|
24 |
df = pd.read_csv(f"submission/{corpus}.csv")
|
25 |
+
df = df[['Method Name', 'Submitted By'] + DEFAULT_METRIC_LABELS + ['Id']]
|
26 |
return df
|