Spaces:

chan030609
/

muse_leaderboard

Sleeping

chan030609 commited on Jul 10, 2024

Commit

b826bd5

1 Parent(s): fcbfc14

relabel

Files changed (8) hide show

.gitignore CHANGED Viewed

@@ -1,13 +1 @@
-auto_evals/
-venv/
-__pycache__/
-.env
-.ipynb_checkpoints
-*ipynb
-.vscode/
-eval-queue/
-eval-results/
-eval-queue-bk/
-eval-results-bk/
-logs/

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ with demo:
             with open("asset/citation_button_text.txt", 'r') as f:
                 citation_button = gr.Textbox(
                     value=f.read(),
-                    label="Copy the following snippet to cite these results",
                     elem_id="citation-button",
                     show_copy_button=True,
                 )
@@ -59,7 +59,7 @@ with demo:
     with gr.Accordion("Submit a new model for evaluation"):
         with gr.Row():
             with gr.Column():
-                corpus_radio = gr.Radio(['bbc', 'hp'], value="llama", label="Corpus")
                 organization_textbox = gr.Textbox(label="Organization")
                 mail_textbox = gr.Textbox(label="Contact email")
             with gr.Column():

             with open("asset/citation_button_text.txt", 'r') as f:
                 citation_button = gr.Textbox(
                     value=f.read(),
+                    label="Copy the following snippet to cite these results:",
                     elem_id="citation-button",
                     show_copy_button=True,
                 )
     with gr.Accordion("Submit a new model for evaluation"):
         with gr.Row():
             with gr.Column():
+                corpus_radio = gr.Radio(['news', 'books'], value="llama", label="Corpus")
                 organization_textbox = gr.Textbox(label="Organization")
                 mail_textbox = gr.Textbox(label="Contact email")
             with gr.Column():

asset/p1.md CHANGED Viewed

@@ -1,3 +1,3 @@
-## 🥇 Some text
-Some text is going to go here.


1	+ ## 🥇 MUSE Leaderboard
2
3	+ MUSE is a comprehensive machine unlearning evaluation benchmark that assesses six desirable properties for unlearned models: (1) no verbatim memorization, (2) no knowledge memorization, (3) no privacy leakage, (4) utility preservation for non-removed data, (5) scalability with respect to removal requests, and (6) sustainability over sequential unlearning requests.

asset/p2.md CHANGED Viewed

+## Expected File Format
+We expect your submitted file to be in the CSV format containing the following columns:
+- `name`: Name of the evaluated unlearning method. Must be unique for each row.
+- `verbmem_f`, `privleak`, `knowmem_f`, `knowmem_r`: Evaluation scores.
+Following the instructions for evaluation in our [GitHub Repository](https://github.com/jaechan-repo/muse_bench) yields an output file precisely of this format.
+## Quick Links
+- [Website](https://muse-bench.github.io): Landing page for MUSE.
+- [arXiv Paper](): Detailed information about MUSE and analysis on the baseline unlearning methods.
+- [GitHub Repository](https://github.com/jaechan-repo/muse_bench): Evaluation scripts, implementations of the baseline unlearning methods.
+- [News Dataset](https://huggingface.co/datasets/muse-bench/MUSE-News), [Books Dataset](https://huggingface.co/datasets/muse-bench/MUSE-Books): Two evaluation corpora used by MUSE.
+- [Leaderboard](https://huggingface.co/spaces/muse-bench/muse_leaderboard) (You are here): Current rankings of unlearning algorithms for MUSE.
+- [Summary on Twitter](): A concise summary and key takeaways from the project.

submission/{hp.csv → books.csv} RENAMED Viewed

@@ -1,4 +1,4 @@
-Method,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
 target,Baseline,99.8,59.4,-57.5,66.9,"-"
 retrain,Baseline,14.3,28.9,0.0,74.5,"-"
 "ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-25.0,0.0,"-"

+Method Name,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
 target,Baseline,99.8,59.4,-57.5,66.9,"-"
 retrain,Baseline,14.3,28.9,0.0,74.5,"-"
 "ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-25.0,0.0,"-"

submission/{bbc.csv → news.csv} RENAMED Viewed

@@ -1,4 +1,4 @@
-Method,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
 target,Baseline,58.4,63.9,-99.8,55.2,"-"
 retrain,Baseline,20.8,33.1,0.0,55.0,"-"
 "ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,5.2,0.0,"-"

+Method Name,Submitted By,VerbMem Forget,KnowMem Forget,PrivLeak,KnowMem Retain,Id
 target,Baseline,58.4,63.9,-99.8,55.2,"-"
 retrain,Baseline,20.8,33.1,0.0,55.0,"-"
 "ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,5.2,0.0,"-"

uploads.py CHANGED Viewed

@@ -4,7 +4,7 @@ import io
 import os
 import base64
 import pandas as pd
-from utils import DEFAULT_METRICS, LEADERBOARD_PATH
 api = HfApi()
@@ -46,13 +46,18 @@ def add_new_eval(
     df = pd.read_csv(io_path)
     df_new = pd.read_csv(fpath)
-    for col in ['Method'] + DEFAULT_METRICS:
         if col not in df_new.columns:
             return format_warning(f"Missing column in the submitted file: {col}")
     df_new['Submitted By'] = organization
     df_new['Id'] = base64.b64encode(os.urandom(6)).decode('ascii')
-    df_new = df_new[['Method', 'Submitted By'] + DEFAULT_METRICS + ['Id']]
     df = pd.concat([df, df_new]).reset_index(drop=True)
     buffer = io.BytesIO()

 import os
 import base64
 import pandas as pd
+from utils import DEFAULT_METRICS, LEADERBOARD_PATH, DEFAULT_METRIC_LABELS
 api = HfApi()
     df = pd.read_csv(io_path)
     df_new = pd.read_csv(fpath)
+    for col in ['name'] + DEFAULT_METRICS:
         if col not in df_new.columns:
             return format_warning(f"Missing column in the submitted file: {col}")
     df_new['Submitted By'] = organization
     df_new['Id'] = base64.b64encode(os.urandom(6)).decode('ascii')
+    df_new = df_new.rename(
+        columns={
+            k: v for k, v in zip(DEFAULT_METRICS, DEFAULT_METRIC_LABELS)
+        } | {'name': 'Method Name'}
+    )
+    df_new = df_new[['Method Name', 'Submitted By'] + DEFAULT_METRIC_LABELS + ['Id']]
     df = pd.concat([df, df_new]).reset_index(drop=True)
     buffer = io.BytesIO()

utils.py CHANGED Viewed

@@ -1,20 +1,26 @@
 import pandas as pd
 LEADERBOARD_PATH = f"chan0369/muse_leaderboard"
 DEFAULT_METRICS = [
-    'VerbMem Forget',
-    'KnowMem Forget',
     'PrivLeak',
-    'KnowMem Retain'
 ]
-CORPORA = ['bbc', 'hp']
 def load_data(corpus):
     assert corpus in CORPORA
     df = pd.read_csv(f"submission/{corpus}.csv")
-    df = df[['Method', 'Submitted By'] + DEFAULT_METRICS + ['Id']]
     return df

 import pandas as pd
 LEADERBOARD_PATH = f"chan0369/muse_leaderboard"
 DEFAULT_METRICS = [
+    'verbmem_f',
+    'knowmem_f',
+    'privleak',
+    'knowmem_r'
+]
+DEFAULT_METRIC_LABELS = [
+    'VerbMem ⬇️',
+    'KnowMem Forget ⬇️',
     'PrivLeak',
+    'KnowMem Retain (Utility) ⬆'
 ]
+CORPORA = ['news', 'books']
 def load_data(corpus):
     assert corpus in CORPORA
     df = pd.read_csv(f"submission/{corpus}.csv")
+    df = df[['Method Name', 'Submitted By'] + DEFAULT_METRIC_LABELS + ['Id']]
     return df