hanchier commited on
Commit
7c66433
·
1 Parent(s): 3e7b074

visualization

Browse files
Files changed (1) hide show
  1. app.py +40 -12
app.py CHANGED
@@ -48,7 +48,7 @@ def word_embedding_space_analysis(
48
  for t in side_tokens:
49
  word = tokenizer.decode([t])
50
  if (
51
- len(word) > 2 and not word[0].isalpha() and
52
  word[1:].isalpha() and word[1:].lower().islower()
53
  ):
54
  word = word[1:]
@@ -65,7 +65,7 @@ def word_embedding_space_analysis(
65
  data,
66
  columns=["Words Contributing to the Style"],
67
  index=[f"Dim#{_i}" for _i in range(n_dim)],
68
- )
69
 
70
 
71
  # rgb tuple to hex color
@@ -77,6 +77,8 @@ def main():
77
  # set up the page
78
  random.seed(0)
79
  nltk.download('words')
 
 
80
  title = "LM-Steer: Word Embeddings Are Steers for Language Models"
81
  st.set_page_config(
82
  layout="wide",
@@ -92,8 +94,12 @@ def main():
92
  https://github.com/Glaciohound/LM-Steer.
93
  '''
94
  st.subheader("Overview")
95
- st.image('https://raw.githubusercontent.com/Glaciohound/LM-Steer'
96
- '/refs/heads/main/assets/overview_fig.jpg')
 
 
 
 
97
  '''
98
  Language models (LMs) automatically learn word embeddings during
99
  pre-training on language corpora. Although word embeddings are usually
@@ -168,7 +174,7 @@ def main():
168
  "Detoxification Strength (Toxic ↔︎ Clean)",
169
  -steer_range, steer_range, 0.0,
170
  steer_interval)
171
- max_length = col3.number_input("Max length", 20, 200, 20, 20)
172
  col1, col2, col3, _ = st.columns(4)
173
  randomness = col2.checkbox("Random sampling", value=False)
174
 
@@ -191,8 +197,9 @@ def main():
191
  do_sample=True,
192
  top_p=0.9,
193
  )
194
- st.session_state.analyzed_text = \
195
- st.text_area("Generated text:", st.session_state.output, height=200)
 
196
 
197
  # Analysing the sentence
198
  st.divider()
@@ -202,17 +209,19 @@ def main():
202
  LM-Steer also serves as a probe for analyzing the text. It can be used to
203
  analyze the sentiment and detoxification of the text. Now, we proceed and
204
  use LM-Steer to analyze the text in the box above. You can also modify the
205
- text or use your own. Please note that these two dimensions can be
206
  entangled, as a negative sentiment may also detoxify the text.
207
  '''
 
 
208
  if st.session_state.get("analyzed_text", "") != "" and \
209
  st.button("Analyze the text above", type="primary"):
210
  col1, col2 = st.columns(2)
211
  for name, col, dim, color, axis_annotation in zip(
212
- ["Sentiment", "Detoxification"],
213
  [col1, col2],
214
  [2, 0],
215
- ["#ff7f0e", "#1f77b4"],
216
  ["Negative ↔︎ Positive", "Toxic ↔︎ Clean"]
217
  ):
218
  with st.spinner(f"Analyzing {name}..."):
@@ -269,10 +278,10 @@ def main():
269
  style. This analysis can be used to understand the word embedding space
270
  and how it steers the model's generation.
271
  '''
272
- for dimension in ["Detoxification", "Sentiment"]:
273
  f'##### {dimension} Word Dimensions'
274
  dim = 2 if dimension == "Sentiment" else 0
275
- analysis_result = word_embedding_space_analysis(
276
  model_name, dim)
277
  with st.expander("Show the analysis results"):
278
  color_scale = 7
@@ -291,6 +300,25 @@ def main():
291
  for i in range(len(x))
292
  ]
293
  ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
 
296
  if __name__ == "__main__":
 
48
  for t in side_tokens:
49
  word = tokenizer.decode([t])
50
  if (
51
+ len(word) > 2 and word[0] == " " and
52
  word[1:].isalpha() and word[1:].lower().islower()
53
  ):
54
  word = word[1:]
 
65
  data,
66
  columns=["Words Contributing to the Style"],
67
  index=[f"Dim#{_i}" for _i in range(n_dim)],
68
+ ), D
69
 
70
 
71
  # rgb tuple to hex color
 
77
  # set up the page
78
  random.seed(0)
79
  nltk.download('words')
80
+ dimension_names = ["Detoxification", "Sentiment"]
81
+ dimension_colors = ["#1f77b4", "#ff7f0e"]
82
  title = "LM-Steer: Word Embeddings Are Steers for Language Models"
83
  st.set_page_config(
84
  layout="wide",
 
94
  https://github.com/Glaciohound/LM-Steer.
95
  '''
96
  st.subheader("Overview")
97
+ col1, col2, col3 = st.columns([1, 5, 1])
98
+ col2.image(
99
+ 'https://raw.githubusercontent.com/Glaciohound/LM-Steer'
100
+ '/refs/heads/main/assets/overview_fig.jpg',
101
+ caption="LM-Steer Method Overview"
102
+ )
103
  '''
104
  Language models (LMs) automatically learn word embeddings during
105
  pre-training on language corpora. Although word embeddings are usually
 
174
  "Detoxification Strength (Toxic ↔︎ Clean)",
175
  -steer_range, steer_range, 0.0,
176
  steer_interval)
177
+ max_length = col3.number_input("Max length", 20, 300, 20, 40)
178
  col1, col2, col3, _ = st.columns(4)
179
  randomness = col2.checkbox("Random sampling", value=False)
180
 
 
197
  do_sample=True,
198
  top_p=0.9,
199
  )
200
+
201
+ with st.chat_message("human"):
202
+ st.write(st.session_state.output)
203
 
204
  # Analysing the sentence
205
  st.divider()
 
209
  LM-Steer also serves as a probe for analyzing the text. It can be used to
210
  analyze the sentiment and detoxification of the text. Now, we proceed and
211
  use LM-Steer to analyze the text in the box above. You can also modify the
212
+ text or use your own. You may observe that these two dimensions can be
213
  entangled, as a negative sentiment may also detoxify the text.
214
  '''
215
+ st.session_state.analyzed_text = \
216
+ st.text_area("Text to analyze:", st.session_state.output, height=200)
217
  if st.session_state.get("analyzed_text", "") != "" and \
218
  st.button("Analyze the text above", type="primary"):
219
  col1, col2 = st.columns(2)
220
  for name, col, dim, color, axis_annotation in zip(
221
+ dimension_names,
222
  [col1, col2],
223
  [2, 0],
224
+ dimension_colors,
225
  ["Negative ↔︎ Positive", "Toxic ↔︎ Clean"]
226
  ):
227
  with st.spinner(f"Analyzing {name}..."):
 
278
  style. This analysis can be used to understand the word embedding space
279
  and how it steers the model's generation.
280
  '''
281
+ for dimension, color in zip(dimension_names, dimension_colors):
282
  f'##### {dimension} Word Dimensions'
283
  dim = 2 if dimension == "Sentiment" else 0
284
+ analysis_result, D = word_embedding_space_analysis(
285
  model_name, dim)
286
  with st.expander("Show the analysis results"):
287
  color_scale = 7
 
300
  for i in range(len(x))
301
  ]
302
  ))
303
+ embeddings = model.steer.lm_head.weight
304
+ dim1 = embeddings.matmul(D[0]).tolist()
305
+ dim2 = embeddings.matmul(D[1]).tolist()
306
+ words = [tokenizer.decode([i]) for i in range(len(embeddings))]
307
+ scatter_chart = [
308
+ (_d1, _d2, _word)
309
+ for _d1, _d2, _word in zip(dim1, dim2, words)
310
+ if len(_word) > 2 and _word[0] == " " and
311
+ _word[1:].isalpha() and _word[1:].lower().islower()
312
+ ]
313
+ scatter_chart = pd.DataFrame(
314
+ scatter_chart,
315
+ columns=["Dim1", "Dim2", "Word"]
316
+ )
317
+ st.scatter_chart(
318
+ scatter_chart, x="Dim1", y="Dim2",
319
+ color="Word",
320
+ # color=color,
321
+ height=1000, size=50,)
322
 
323
 
324
  if __name__ == "__main__":