Spaces:

LennardZuendorf
/

thesis

Runtime error

App Files Files Community

LennardZuendorf commited on Dec 31, 2023

Commit

2230009

unverified ·

1 Parent(s): 43cce2a

feat: implementing fixes and updates for version 1.0.1

Browse files

Files changed (6) hide show

backend/controller.py +0 -1
explanation/interpret.py +19 -53
explanation/visualize.py +44 -23
main.py +7 -5
public/about.md +44 -0
public/credits_dataprotection_license.md +0 -1

backend/controller.py CHANGED Viewed

@@ -10,7 +10,6 @@ from explanation import interpret, visualize
 # main interference function that that calls chat functions depending on selections
-# TODO: Limit maximum tokens/model input
 def interference(
     prompt: str,
     history: list,

 # main interference function that that calls chat functions depending on selections
 def interference(
     prompt: str,
     history: list,

explanation/interpret.py CHANGED Viewed

@@ -3,16 +3,11 @@
 import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
-from shap import models, maskers, plots, PartitionExplainer
-import torch
 # internal imports
 from utils import formatting as fmt
-# global variables
-TEACHER_FORCING = None
-TEXT_MASKER = None
 # main explain function that returns a chat with explanations
 def chat_explained(model, prompt):
@@ -32,27 +27,6 @@ def chat_explained(model, prompt):
     return response_text, graphic, plot
-def wrap_shap(model):
-    global TEXT_MASKER, TEACHER_FORCING
-    # set the device to cuda if gpu is available
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # updating the model settings again
-    model.set_config()
-    # (re)initialize the shap models and masker
-    text_generation = models.TextGeneration(model.MODEL, model.TOKENIZER)
-    TEACHER_FORCING = models.TeacherForcing(
-        text_generation,
-        model.TOKENIZER,
-        device=str(device),
-        similarity_model=model.MODEL,
-        similarity_tokenizer=model.TOKENIZER,
-    )
-    TEXT_MASKER = maskers.Text(model.TOKENIZER, " ", collapse_mask_token=True)
 # graphic plotting function that creates a html graphic (as string) for the explanation
 def create_graphic(shap_values):
     # create the html graphic using shap text plot function
@@ -68,29 +42,21 @@ def create_plot(shap_values):
     output_names = shap_values.output_names
     input_names = shap_values.data[0]
-    # Transpose the values for horizontal input names
-    transposed_values = np.transpose(values)
     # Set seaborn style to dark
-    sns.set(style="dark")
     fig, ax = plt.subplots()
-    # Making background transparent
-    ax.set_alpha(0)
-    fig.patch.set_alpha(0)
     # Setting figure size
     fig.set_size_inches(
-        max(transposed_values.shape[1] * 2, 10),
-        max(transposed_values.shape[0] / 1.5, 5),
     )
     # Plotting the heatmap with Seaborn's color palette
     im = ax.imshow(
-        transposed_values,
-        vmax=transposed_values.max(),
-        vmin=-transposed_values.min(),
         cmap=sns.color_palette("vlag_r", as_cmap=True),
         aspect="auto",
     )
@@ -98,25 +64,25 @@ def create_plot(shap_values):
     # Creating colorbar
     cbar = ax.figure.colorbar(im, ax=ax)
     cbar.ax.set_ylabel("Token Attribution", rotation=-90, va="bottom")
-    cbar.ax.yaxis.set_tick_params(color="white")
-    plt.setp(plt.getp(cbar.ax.axes, "yticklabels"), color="white")
     # Setting ticks and labels with white color for visibility
-    ax.set_xticks(np.arange(len(input_names)), labels=input_names)
-    ax.set_yticks(np.arange(len(output_names)), labels=output_names)
-    plt.setp(ax.get_xticklabels(), color="white", rotation=45, ha="right")
-    plt.setp(ax.get_yticklabels(), color="white")
     # Adjusting tick labels
     ax.tick_params(
         top=True, bottom=False, labeltop=False, labelbottom=True, color="white"
     )
-    # Adding text annotations - not used for readability
-    # for i in range(transposed_values.shape[0]):
-    #    for j in range(transposed_values.shape[1]):
-    #        val = transposed_values[i, j]
-    #        color = "black" if 0.2 < im.norm(val) < 0.8 else "white"
-    #        ax.text(j, i, f"{val:.4f}", ha="center", va="center", color=color)
     return plt

 import seaborn as sns
 import matplotlib.pyplot as plt
 import numpy as np
+from shap import plots, PartitionExplainer
 # internal imports
 from utils import formatting as fmt
 # main explain function that returns a chat with explanations
 def chat_explained(model, prompt):
     return response_text, graphic, plot
 # graphic plotting function that creates a html graphic (as string) for the explanation
 def create_graphic(shap_values):
     # create the html graphic using shap text plot function
     output_names = shap_values.output_names
     input_names = shap_values.data[0]
     # Set seaborn style to dark
+    sns.set(style="white")
     fig, ax = plt.subplots()
     # Setting figure size
     fig.set_size_inches(
+        max(values.shape[1] * 2, 10),
+        max(values.shape[0] * 1, 5),
     )
     # Plotting the heatmap with Seaborn's color palette
     im = ax.imshow(
+        values,
+        vmax=values.max(),
+        vmin=values.min(),
         cmap=sns.color_palette("vlag_r", as_cmap=True),
         aspect="auto",
     )
     # Creating colorbar
     cbar = ax.figure.colorbar(im, ax=ax)
     cbar.ax.set_ylabel("Token Attribution", rotation=-90, va="bottom")
+    cbar.ax.yaxis.set_tick_params(color="black")
+    plt.setp(plt.getp(cbar.ax.axes, "yticklabels"), color="black")
     # Setting ticks and labels with white color for visibility
+    ax.set_yticks(np.arange(len(input_names)), labels=input_names)
+    ax.set_xticks(np.arange(len(output_names)), labels=output_names)
+    plt.setp(ax.get_xticklabels(), color="black", rotation=45, ha="right")
+    plt.setp(ax.get_yticklabels(), color="black")
     # Adjusting tick labels
     ax.tick_params(
         top=True, bottom=False, labeltop=False, labelbottom=True, color="white"
     )
+    # Adding text annotations with appropriate contrast
+    for i in range(values.shape[0]):
+        for j in range(values.shape[1]):
+            val = values[i, j]
+            color = "white" if im.norm(values.max()) / 2 > im.norm(val) else "black"
+            ax.text(j, i, f"{val:.4f}", ha="center", va="center", color=color)
     return plt

explanation/visualize.py CHANGED Viewed

@@ -13,6 +13,7 @@ from utils import formatting as fmt
 # plotting function that plots the attention values in a heatmap
 def chat_explained(model, prompt):
     model.set_config()
     # get encoded input and output vectors
@@ -20,6 +21,8 @@ def chat_explained(model, prompt):
         prompt, return_tensors="pt", add_special_tokens=True
     ).input_ids
     decoder_input_ids = model.MODEL.generate(encoder_input_ids, output_attentions=True)
     encoder_text = fmt.format_tokens(
         model.TOKENIZER.convert_ids_to_tokens(encoder_input_ids[0])
     )
@@ -37,11 +40,20 @@ def chat_explained(model, prompt):
     # create the response text, graphic and plot
     response_text = fmt.format_output_text(decoder_text)
     graphic = create_graphic(attention_output, (encoder_text, decoder_text))
     plot = create_plot(attention_output, (encoder_text, decoder_text))
-    return response_text, graphic, plot
 # creating a html graphic using BERTViz
 def create_graphic(attention_output, enc_dec_texts: tuple):
     # calls the head_view function of BERTViz to return html graphic
@@ -58,27 +70,28 @@ def create_graphic(attention_output, enc_dec_texts: tuple):
 # creating an attention heatmap plot using seaborn
 def create_plot(attention_output, enc_dec_texts: tuple):
     # get the averaged attention weights
     attention = attention_output.cross_attentions[0][0].detach().numpy()
     averaged_attention_weights = np.mean(attention, axis=0)
-    # get the encoder and decoder tokens
     encoder_tokens = enc_dec_texts[0]
     decoder_tokens = enc_dec_texts[1]
     # set seaborn style to dark and initialize figure and axis
-    sns.set(style="dark")
     fig, ax = plt.subplots()
-    # Making background transparent
-    ax.set_alpha(0)
-    fig.patch.set_alpha(0)
     # Setting figure size
     fig.set_size_inches(
         max(averaged_attention_weights.shape[1] * 2, 10),
-        max(averaged_attention_weights.shape[0] / 1.5, 5),
     )
     # Plotting the heatmap with seaborn's color palette
@@ -92,19 +105,27 @@ def create_plot(attention_output, enc_dec_texts: tuple):
     # Creating colorbar
     cbar = ax.figure.colorbar(im, ax=ax)
-    cbar.ax.set_ylabel("Token Attribution", rotation=-90, va="bottom")
-    cbar.ax.yaxis.set_tick_params(color="white")
-    plt.setp(plt.getp(cbar.ax.axes, "yticklabels"), color="white")
-    # Setting ticks and labels with white color for visibility
-    ax.set_xticks(np.arange(len(encoder_tokens)), labels=encoder_tokens)
-    ax.set_yticks(np.arange(len(decoder_tokens)), labels=decoder_tokens)
-    plt.setp(ax.get_xticklabels(), color="white", rotation=45, ha="right")
-    plt.setp(ax.get_yticklabels(), color="white")
-    # Adjusting tick labels
-    ax.tick_params(
-        top=True, bottom=False, labeltop=False, labelbottom=True, color="white"
-    )
     return plt

 # plotting function that plots the attention values in a heatmap
 def chat_explained(model, prompt):
+    # reset the model config to default
     model.set_config()
     # get encoded input and output vectors
         prompt, return_tensors="pt", add_special_tokens=True
     ).input_ids
     decoder_input_ids = model.MODEL.generate(encoder_input_ids, output_attentions=True)
+    # get the text for the input and output vectors
     encoder_text = fmt.format_tokens(
         model.TOKENIZER.convert_ids_to_tokens(encoder_input_ids[0])
     )
     # create the response text, graphic and plot
     response_text = fmt.format_output_text(decoder_text)
     graphic = create_graphic(attention_output, (encoder_text, decoder_text))
+    graphic = (
+        '<div style="text-align: center"><h4>Interactive Graphic for Attention'
+        " currently WIP</h4></div>"
+    )
     plot = create_plot(attention_output, (encoder_text, decoder_text))
+    return (
+        response_text,
+        graphic,
+        plot,
+    )
 # creating a html graphic using BERTViz
+# TODO: FIX
 def create_graphic(attention_output, enc_dec_texts: tuple):
     # calls the head_view function of BERTViz to return html graphic
 # creating an attention heatmap plot using seaborn
+# CREDIT: adopted from official Matplotlib documentation
+## see https://matplotlib.org/stable/
 def create_plot(attention_output, enc_dec_texts: tuple):
     # get the averaged attention weights
     attention = attention_output.cross_attentions[0][0].detach().numpy()
     averaged_attention_weights = np.mean(attention, axis=0)
+    averaged_attention_weights = np.transpose(averaged_attention_weights)
+    # get the encoder and decoder tokens in text form
     encoder_tokens = enc_dec_texts[0]
     decoder_tokens = enc_dec_texts[1]
     # set seaborn style to dark and initialize figure and axis
+    sns.set(style="white")
     fig, ax = plt.subplots()
     # Setting figure size
     fig.set_size_inches(
         max(averaged_attention_weights.shape[1] * 2, 10),
+        max(averaged_attention_weights.shape[0] * 1, 5),
     )
     # Plotting the heatmap with seaborn's color palette
     # Creating colorbar
     cbar = ax.figure.colorbar(im, ax=ax)
+    cbar.ax.set_ylabel("Attention Weight Scale", rotation=-90, va="bottom")
+    cbar.ax.yaxis.set_tick_params(color="black")
+    plt.setp(plt.getp(cbar.ax.axes, "yticklabels"), color="black")
+    # Setting ticks and labels with black color for visibility
+    ax.set_yticks(np.arange(len(encoder_tokens)), labels=encoder_tokens)
+    ax.set_xticks(np.arange(len(decoder_tokens)), labels=decoder_tokens)
+    ax.set_title("Attention Weights by Token")
+    plt.setp(ax.get_xticklabels(), color="black", rotation=45, ha="right")
+    plt.setp(ax.get_yticklabels(), color="black")
+    # Adding text annotations with appropriate contrast
+    for i in range(averaged_attention_weights.shape[0]):
+        for j in range(averaged_attention_weights.shape[1]):
+            val = averaged_attention_weights[i, j]
+            color = (
+                "white"
+                if im.norm(averaged_attention_weights.max()) / 2 > im.norm(val)
+                else "black"
+            )
+            ax.text(j, i, f"{val:.4f}", ha="center", va="center", color=color)
+    # return the plot
     return plt

main.py CHANGED Viewed

@@ -187,7 +187,7 @@ with gr.Blocks(
                 Values have been excluded for readability. See colorbar for value indication.
                 """)
                 # plot component that takes a matplotlib figure as input
-                xai_plot = gr.Plot(label="Token Level Explanation", scale=3)
     # functions to trigger the controller
     ## takes information for the chat and the xai selection
@@ -205,10 +205,12 @@ with gr.Blocks(
         [user_prompt, chatbot, xai_interactive, xai_plot],
     )
-    # final row to show legal information
-    ## - credits, data protection and link to the License
-    with gr.Tab(label="Credits, Data Protection and License"):
-        gr.Markdown(value=load_md("public/credits_dataprotection_license.md"))
 # mount function for fastAPI Application
 app = gr.mount_gradio_app(app, ui, path="/")

                 Values have been excluded for readability. See colorbar for value indication.
                 """)
                 # plot component that takes a matplotlib figure as input
+                xai_plot = gr.Plot(label="Token Level Explanation")
     # functions to trigger the controller
     ## takes information for the chat and the xai selection
         [user_prompt, chatbot, xai_interactive, xai_plot],
     )
+    # final row to about information
+    ## and credits, data protection and link to the License
+    with gr.Tab(label="About"):
+        gr.Markdown(value=load_md("public/about.md"))
+        with gr.Accordion(label="Credits, Data Protection and License", open=False):
+            gr.Markdown(value=load_md("public/credits_dataprotection_license.md"))
 # mount function for fastAPI Application
 app = gr.mount_gradio_app(app, ui, path="/")

public/about.md ADDED Viewed

	@@ -0,0 +1,44 @@

+# About
+This is a non-commercial research projects as part of a Bachelor Thesis with the topic **"Building an Interpretable Natural Language AI Tool based on Transformer Models and Approaches of Explainable AI".**
+This research tackles the rise of LLM based applications such a chatbots and explores the possibilities of model interpretation and explainability. The goal is to build a tool that can be used to explain the predictions of a LLM based chatbot.
+## Implementation
+This project is an implementation of PartitionSHAP and BERTViz into GODEL by Microsoft - [GODEL Model](https://huggingface.co/microsoft/GODEL-v1_1-large-seq2seq) which is a generative seq2seq transformer fine-tuned for goal directed dialog. It supports context and knowledge base inputs.
+The UI is build with Gradio.
+## Usage
+You can chat with the model by entering a message into the input field and pressing enter. The model will then generate a response. You can also enter a context and knowledge base by clicking on the respective buttons and entering the data into the input fields. The model will then generate a response based on the context and knowledge base.
+To explore explanations, chose one of the explanations methods (HINT: The runtime can increase significantly). Then keep on chatting and explore the explanations in the respective tab.
+### Self Hosted
+You can run this application locally by cloning this repository, setting up a python environment and installing the requirements. Then run the `app.py` file or use "uvicorn main:app --reload" in the *python terminal*.
+For self-hosting you can use the Dockerfile to build a docker image and run it locally or directly use the provided docker image on the [GitHub page](https://github.com/lennardzuendorf/thesis-webapp/).
+## Credit & License
+This Product is licensed under the MIT license. See [LICENSE](https://github.com/LennardZuendorf/thesis-webapp/blob/main/LICENSE.md) at GitHub for more information.
+Please credit the original authors of this project (Lennard Zündorf) and the credits listed below if you use this project or parts of it in your own work.
+## Contact
+### Author
+- Lennard Zündorf
+- [email protected]
+- [GitHub](https://github.com/LennardZuendorf)
+- [LinkedIn](https://www.zuendorf.me/linkd)
+#### University
+Hochschule für Technik und Wirtschaft Berlin (HTW Berlin) - University of Applied Sciences for Engineering and Economics Berlin
+1. Supervisor: Prof. Dr. Katarina Simbeck
+2. Supervisor: Prof. Dr. Axel Hochstein

public/credits_dataprotection_license.md CHANGED Viewed

@@ -6,7 +6,6 @@
 # Credits
-For full credits, please refer to the [thesis print]()
 ### Models
 This implementation is build on GODEL by Microsoft, Inc.

 # Credits
 ### Models
 This implementation is build on GODEL by Microsoft, Inc.