File size: 7,238 Bytes
2d4811a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
"""
A group of neurons tend to fire in response to commas and other punctuation. Other groups of neurons tend to fire in response to pronouns. Use this visualization to factorize neuron activity in individual FFNN layers or in the entire model.
"""
import ecco
import streamlit as st
from streamlit.components.v1 import html
from src.subpages.page import Context, Page # type: ignore
_SETUP_HTML = """
<script src="https://requirejs.org/docs/release/2.3.6/minified/require.js"></script>
<script>
var ecco_url = 'https://storage.googleapis.com/ml-intro/ecco/'
//var ecco_url = 'http://localhost:8000/'
if (window.ecco === undefined) window.ecco = {}
// Setup the paths of the script we'll be using
requirejs.config({
urlArgs: "bust=" + (new Date()).getTime(),
nodeRequire: require,
paths: {
d3: "https://d3js.org/d3.v6.min", // This is only for use in setup.html and basic.html
"d3-array": "https://d3js.org/d3-array.v2.min",
jquery: "https://code.jquery.com/jquery-3.5.1.min",
ecco: ecco_url + 'js/0.0.6/ecco-bundle.min',
xregexp: 'https://cdnjs.cloudflare.com/ajax/libs/xregexp/3.2.0/xregexp-all.min'
}
});
// Add the css file
//requirejs(['d3'],
// function (d3) {
// d3.select('#css').attr('href', ecco_url + 'html/styles.css')
// })
console.log('Ecco initialize!!')
// returns a 'basic' object. basic.init() selects the html div we'll be
// rendering the html into, adds styles.css to the document.
define('basic', ['d3'],
function (d3) {
return {
init: function (viz_id = null) {
if (viz_id == null) {
viz_id = "viz_" + Math.round(Math.random() * 10000000)
}
// Select the div rendered below, change its id
const div = d3.select('#basic').attr('id', viz_id),
div_parent = d3.select('#' + viz_id).node().parentNode
// Link to CSS file
d3.select(div_parent).insert('link')
.attr('rel', 'stylesheet')
.attr('type', 'text/css')
.attr('href', ecco_url + 'html/0.0.2/styles.css')
return viz_id
}
}
}, function (err) {
console.log(err);
}
)
</script>
<head>
<link id='css' rel="stylesheet" type="text/css">
</head>
<div id="basic"></div>
"""
@st.cache(allow_output_mutation=True)
def _load_ecco_model():
model_config = {
"embedding": "embeddings.word_embeddings",
"type": "mlm",
"activations": [r"ffn\.lin1"],
"token_prefix": "",
"partial_token_prefix": "##",
}
return ecco.from_pretrained(
"elastic/distilbert-base-uncased-finetuned-conll03-english",
model_config=model_config,
activations=True,
)
class AttentionPage(Page):
name = "Activations"
icon = "activity"
def _get_widget_defaults(self):
return {
"act_n_components": 8,
"act_default_text": """Now I ask you: what can be expected of man since he is a being endowed with strange qualities? Shower upon him every earthly blessing, drown him in a sea of happiness, so that nothing but bubbles of bliss can be seen on the surface; give him economic prosperity, such that he should have nothing else to do but sleep, eat cakes and busy himself with the continuation of his species, and even then out of sheer ingratitude, sheer spite, man would play you some nasty trick. He would even risk his cakes and would deliberately desire the most fatal rubbish, the most uneconomical absurdity, simply to introduce into all this positive good sense his fatal fantastic element. It is just his fantastic dreams, his vulgar folly that he will desire to retain, simply in order to prove to himself--as though that were so necessary-- that men still are men and not the keys of a piano, which the laws of nature threaten to control so completely that soon one will be able to desire nothing but by the calendar. And that is not all: even if man really were nothing but a piano-key, even if this were proved to him by natural science and mathematics, even then he would not become reasonable, but would purposely do something perverse out of simple ingratitude, simply to gain his point. And if he does not find means he will contrive destruction and chaos, will contrive sufferings of all sorts, only to gain his point! He will launch a curse upon the world, and as only man can curse (it is his privilege, the primary distinction between him and other animals), may be by his curse alone he will attain his object--that is, convince himself that he is a man and not a piano-key!""",
"act_from_layer": 0,
"act_to_layer": 5,
}
def render(self, context: Context):
st.title(self.name)
with st.expander("ℹ️", expanded=True):
st.write(
"A group of neurons tend to fire in response to commas and other punctuation. Other groups of neurons tend to fire in response to pronouns. Use this visualization to factorize neuron activity in individual FFNN layers or in the entire model."
)
lm = _load_ecco_model()
col1, _, col2 = st.columns([1.5, 0.5, 4])
with col1:
st.subheader("Settings")
n_components = st.slider(
"#components",
key="act_n_components",
min_value=2,
max_value=10,
step=1,
)
from_layer = st.slider(
"from layer",
key="act_from_layer",
value=0,
min_value=0,
max_value=len(lm.model.transformer.layer) - 1,
step=1,
)
to_layer = (
st.slider(
"to layer",
key="act_to_layer",
value=0,
min_value=0,
max_value=len(lm.model.transformer.layer) - 1,
step=1,
)
+ 1
)
if to_layer <= from_layer:
st.error("to_layer must be >= from_layer")
st.stop()
with col2:
st.subheader("–")
text = st.text_area("Text", key="act_default_text", height=240)
inputs = lm.tokenizer([text], return_tensors="pt")
output = lm(inputs)
nmf = output.run_nmf(n_components=n_components, from_layer=from_layer, to_layer=to_layer)
data = nmf.explore(returnData=True)
_JS_TEMPLATE = f"""<script>requirejs(['basic', 'ecco'], function(basic, ecco){{
const viz_id = basic.init()
ecco.interactiveTokensAndFactorSparklines(viz_id, {data}, {{ 'hltrCFG': {{'tokenization_config': {{'token_prefix': '', 'partial_token_prefix': '##'}} }} }})
}}, function (err) {{
console.log(err);
}})</script>"""
html(_SETUP_HTML + _JS_TEMPLATE, height=800, scrolling=True)
|