Spaces:

emilylearning
/

choose_your_own_spurious

Sleeping

App Files Files Community

emilylearning commited on Jul 23, 2023

Commit

704ce7b

1 Parent(s): 58e928c

functional w dags

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +372 -0
non_well_spec.png +0 -0
well_spec.png +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv*

app.py ADDED Viewed

	@@ -0,0 +1,372 @@

+# !pip install gradio -q
+# !pip install transformers -q
+# %%
+import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import random
+from matplotlib.ticker import MaxNLocator
+from transformers import pipeline
+# %%
+MODEL_NAMES = [
+    "bert-base-uncased",
+    "roberta-base",
+    "bert-large-uncased",
+    "roberta-large",
+]
+OWN_MODEL_NAME = "add-a-model"
+DECIMAL_PLACES = 1
+EPS = 1e-5  # to avoid /0 errors
+# %%
+# Fire up the models
+models = dict()
+for bert_like in MODEL_NAMES:
+    models[bert_like] = pipeline("fill-mask", model=bert_like)
+# %%
+def clean_tokens(tokens):
+    return [token.strip() for token in tokens]
+def prepare_text_for_masking(input_text, mask_token, gendered_tokens, split_key):
+    text_w_masks_list = [
+        mask_token if word.lower() in gendered_tokens else word
+        for word in input_text.split()
+    ]
+    num_masks = len([m for m in text_w_masks_list if m == mask_token])
+    text_portions = " ".join(text_w_masks_list).split(split_key)
+    return text_portions, num_masks
+def get_avg_prob_from_pipeline_outputs(mask_filled_text, gendered_token, num_preds):
+    pronoun_preds = [
+        sum(
+            [
+                pronoun["score"]
+                if pronoun["token_str"].strip().lower() in gendered_token
+                else 0.0
+                for pronoun in top_preds
+            ]
+        )
+        for top_preds in mask_filled_text
+    ]
+    return round(sum(pronoun_preds) / (EPS + num_preds) * 100, DECIMAL_PLACES)
+def get_figure(df, gender, n_fit=1):
+    df = df.set_index("x-axis")
+    cols = df.columns
+    xs = list(range(len(df)))
+    ys = df[cols[0]]
+    fig, ax = plt.subplots()
+    # Trying small fig due to rendering issues on HF, not on VS Code
+    fig.set_figheight(3)
+    fig.set_figwidth(9)
+    # find stackoverflow reference
+    p, C_p = np.polyfit(xs, ys, n_fit, cov=1)
+    t = np.linspace(min(xs)-1, max(xs)+1,  10*len(xs))
+    TT = np.vstack([t**(n_fit-i) for i in range(n_fit+1)]).T
+    # matrix multiplication calculates the polynomial values
+    yi = np.dot(TT, p)
+    C_yi = np.dot(TT, np.dot(C_p, TT.T))  # C_y = TT*C_z*TT.T
+    sig_yi = np.sqrt(np.diag(C_yi))  # Standard deviations are sqrt of diagonal
+    ax.fill_between(t, yi+sig_yi, yi-sig_yi, alpha=.25)
+    ax.plot(t, yi, '-')
+    ax.plot(df, "ro")
+    ax.legend(list(df.columns))
+    ax.axis("tight")
+    ax.set_xlabel("Value injected into input text")
+    ax.set_title(f"Probability of predicting {gender} tokens.")
+    ax.set_ylabel(f"Softmax prob")
+    ax.tick_params(axis="x", labelrotation=5)
+    ax.set_ylim(0, 100)
+    return fig
+# %%
+def predict_masked_tokens(
+    model_name,
+    own_model_name,
+    group_a_tokens,
+    group_b_tokens,
+    indie_vars,
+    split_key,
+    normalizing,
+    n_fit,
+    input_text,
+):
+    """Run inference on input_text for each model type, returning df and plots of percentage
+    of gender pronouns predicted as female and male in each target text.
+    """
+    if model_name not in MODEL_NAMES:
+        model = pipeline("fill-mask", model=own_model_name)
+    else:
+        model = models[model_name]
+    mask_token = model.tokenizer.mask_token
+    indie_vars_list = indie_vars.split(",")
+    group_a_tokens = clean_tokens(group_a_tokens.split(","))
+    group_b_tokens = clean_tokens(group_b_tokens.split(","))
+    text_segments, num_preds = prepare_text_for_masking(
+        input_text, mask_token, group_b_tokens + group_a_tokens, split_key
+    )
+    male_pronoun_preds = []
+    female_pronoun_preds = []
+    for indie_var in indie_vars_list:
+        target_text = f"{indie_var}".join(text_segments)
+        mask_filled_text = model(target_text)
+        # Quick hack as realized return type based on how many MASKs in text.
+        if type(mask_filled_text[0]) is not list:
+            mask_filled_text = [mask_filled_text]
+        female_pronoun_preds.append(
+            get_avg_prob_from_pipeline_outputs(
+                mask_filled_text, group_a_tokens, num_preds
+            )
+        )
+        male_pronoun_preds.append(
+            get_avg_prob_from_pipeline_outputs(
+                mask_filled_text, group_b_tokens, num_preds
+            )
+        )
+    if normalizing:
+        total_gendered_probs = np.add(female_pronoun_preds, male_pronoun_preds)
+        female_pronoun_preds = np.around(
+            np.divide(female_pronoun_preds, total_gendered_probs + EPS) * 100,
+            decimals=DECIMAL_PLACES,
+        )
+        male_pronoun_preds = np.around(
+            np.divide(male_pronoun_preds, total_gendered_probs + EPS) * 100,
+            decimals=DECIMAL_PLACES,
+        )
+    results_df = pd.DataFrame({"x-axis": indie_vars_list})
+    results_df["group_a"] = female_pronoun_preds
+    results_df["group_b"] = male_pronoun_preds
+    female_fig = get_figure(
+        results_df.drop("group_b", axis=1),
+        "group_a",
+        n_fit,
+    )
+    male_fig = get_figure(
+        results_df.drop("group_a", axis=1),
+        "group_b",
+        n_fit,
+    )
+    display_text = f"{random.choice(indie_vars_list)}".join(text_segments)
+    return (
+        display_text,
+        female_fig,
+        male_fig,
+        results_df,
+    )
+truck_fn_example = [
+    MODEL_NAMES[2],
+    '',
+    ', '.join(['truck', 'pickup']),
+    ', '.join(['car', 'sedan']),
+    ', '.join(['city','neighborhood','farm']),
+    'PLACE',
+    "True",
+    1,
+]
+def truck_1_fn():
+    return truck_fn_example + [
+        'He loaded up his truck and drove to the PLACE.'
+    ]
+def truck_2_fn():
+    return truck_fn_example + [
+        'He loaded up the bed of his truck and drove to the PLACE.'
+    ]
+# # %%
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("# Spurious Correlation Evaluation for Pre-trained LLMs")
+    gr.Markdown("## Instructions for this Demo")
+    gr.Markdown(
+        "1) Click on one of the examples below to pre-populate the input fields."
+    )
+    gr.Markdown(
+        "2) Check out the pre-populated fields as you scroll down to the ['Hit Submit...'] button!"
+    )
+    gr.Markdown(
+        "3) Repeat steps (1) and (2) with more pre-populated inputs or with your own values in the input fields!"
+    )
+    gr.Markdown("""The pre-populated inputs below are for a demo example of a location-vs-vehicle-type spurious correlation.
+        We can see this spurious correlation largely disappears in the well-specified example text.
+        <p align="center">
+        <img src="file/non_well_spec.png" alt="results" width="300"/>
+        </p>
+        <p align="center">
+        <img src="file/well_spec.png" alt="results" width="300"/>
+        </p>
+    """)
+    gr.Markdown("## Example inputs")
+    gr.Markdown(
+        "Click a button below to pre-populate input fields with example values. Then scroll down to Hit Submit to generate predictions."
+    )
+    with gr.Row():
+        truck_1_gen = gr.Button("Click for non-well-specified(?) vehicle-type example inputs")
+        gr.Markdown("<-- Multiple solutions with low training error. LLM sensitive to spurious(?) correlations.")
+        truck_2_gen = gr.Button("Click for well-specified vehicle-type example inputs")
+        gr.Markdown("<-- Fewer solutions with low training error. LLM less sensitive to spurious(?) correlations.")
+    gr.Markdown("## Input fields")
+    gr.Markdown(
+        f"A) Pick a spectrum of comma separated values for text injection and x-axis."
+    )
+    with gr.Row():
+        group_a_tokens = gr.Textbox(
+            type="text",
+            lines=3,
+            label="A) To-MASK tokens A: Comma separated words that account for accumulated group A softmax probs",
+        )
+        group_b_tokens = gr.Textbox(
+            type="text",
+            lines=3,
+            label="B) To-MASK tokens B: Comma separated words that account for accumulated group B softmax probs",
+        )
+    with gr.Row():
+        x_axis = gr.Textbox(
+            type="text",
+            lines=3,
+            label="C) Comma separated values for text injection and x-axis",
+        )
+    gr.Markdown("D) Pick a pre-loaded BERT-family model of interest on the right.")
+    gr.Markdown(
+        f"Or E) select `{OWN_MODEL_NAME}`, then add the mame of any other Hugging Face model that supports the [fill-mask](https://huggingface.co/models?pipeline_tag=fill-mask) task on the right (note: this may take some time to load)."
+    )
+    with gr.Row():
+        model_name = gr.Radio(
+            MODEL_NAMES + [OWN_MODEL_NAME],
+            type="value",
+            label="D) BERT-like model.",
+        )
+        own_model_name = gr.Textbox(
+            label="E) If you selected an 'add-a-model' model, put any Hugging Face pipeline model name (that supports the fill-mask task) here.",
+        )
+    gr.Markdown(
+        "F) Pick if you want to the predictions normalied to only those from group A or B."
+    )
+    gr.Markdown(
+        "G) Also tell the demo what special token you will use in your input text, that you would like replaced with the spectrum of values you listed above."
+    )
+    gr.Markdown(
+        "And H) the degree of polynomial fit used for high-lighting potential spurious association."
+    )
+    with gr.Row():
+        to_normalize = gr.Dropdown(
+            ["False", "True"],
+            label="D) Normalize model's predictions?",
+            type="index",
+        )
+        place_holder = gr.Textbox(
+            label="E) Special token place-holder",
+        )
+        n_fit = gr.Dropdown(
+            list(range(1, 5)),
+            label="F) Degree of polynomial fit",
+            type="value",
+        )
+    gr.Markdown(
+        "I) Finally, add input text that includes at least one of the '`To-MASK`' tokens from (A) or (B) and one place-holder token from (G)."
+    )
+    with gr.Row():
+        input_text = gr.Textbox(
+            lines=2,
+            label="I) Input text with a '`To-MASK`' and place-holder token",
+        )
+    gr.Markdown("## Outputs!")
+    with gr.Row():
+        btn = gr.Button("Hit submit to generate predictions!")
+    with gr.Row():
+        sample_text = gr.Textbox(
+            type="text", label="Output text: Sample of text fed to model"
+        )
+    with gr.Row():
+        female_fig = gr.Plot(type="auto")
+        male_fig = gr.Plot(type="auto")
+    with gr.Row():
+        df = gr.Dataframe(
+            show_label=True,
+            overflow_row_behaviour="show_ends",
+            label="Table of softmax probability for grouped predictions",
+        )
+    with gr.Row():
+        truck_1_gen.click(truck_1_fn, inputs=[], outputs=[model_name, own_model_name, group_a_tokens, group_b_tokens,
+                       x_axis, place_holder, to_normalize,  n_fit, input_text])
+        truck_2_gen.click(truck_2_fn, inputs=[], outputs=[model_name, own_model_name, group_a_tokens, group_b_tokens,
+                       x_axis, place_holder, to_normalize,  n_fit, input_text])
+    btn.click(
+        predict_masked_tokens,
+        inputs=[
+            model_name,
+            own_model_name,
+            group_a_tokens,
+            group_b_tokens,
+            x_axis,
+            place_holder,
+            to_normalize,
+            n_fit,
+            input_text,
+        ],
+        outputs=[sample_text, female_fig, male_fig, df],
+    )
+demo.launch(debug=True, share=True)
+# %%

non_well_spec.png ADDED Viewed

well_spec.png ADDED Viewed