Spaces:
Running
Running
molokhovdmitry
commited on
Commit
·
5000d19
1
Parent(s):
44c255c
Update t-SNE plots to use column
Browse files- src/app.py +40 -10
src/app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
from dotenv import load_dotenv
|
3 |
from transformers import pipeline
|
4 |
from sentence_transformers import SentenceTransformer
|
@@ -134,6 +135,9 @@ def nmf_plots(df,
|
|
134 |
for i, col in enumerate(topic_cols):
|
135 |
df[col] = nmf_embeddings[i]
|
136 |
|
|
|
|
|
|
|
137 |
# Get word values for every topic
|
138 |
word_df = pd.DataFrame(
|
139 |
nmf.components_.T,
|
@@ -171,7 +175,7 @@ def nmf_plots(df,
|
|
171 |
return df, [topic_words_fig, contributions_fig]
|
172 |
|
173 |
|
174 |
-
def tsne_plots(df, encoder, emotion_cols,
|
175 |
"""
|
176 |
Encodes all `text_original` values of `df` DataFrame with `encoder`,
|
177 |
uses t-SNE algorithm for visualization on these embeddings and on
|
@@ -193,12 +197,21 @@ def tsne_plots(df, encoder, emotion_cols, color_emotion, tsne_perplexity):
|
|
193 |
# Also use predicted emotions
|
194 |
if emotion_cols:
|
195 |
tsne_cols = embedding_cols + emotion_cols
|
196 |
-
color =
|
197 |
hover_data = ['first_emotion', 'second_emotion', 'text_original']
|
198 |
else:
|
199 |
tsne_cols = embedding_cols
|
200 |
color = None
|
201 |
-
hover_data = 'text_original'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
tsne_results = tsne.fit_transform(df[tsne_cols])
|
204 |
tsne_results = pd.DataFrame(
|
@@ -230,7 +243,8 @@ def tsne_plots(df, encoder, emotion_cols, color_emotion, tsne_perplexity):
|
|
230 |
hover_data=hover_data
|
231 |
)
|
232 |
fig3d.update_layout(
|
233 |
-
title_text="t-SNE Visualization Over Time"
|
|
|
234 |
)
|
235 |
|
236 |
return df, [fig2d, fig3d]
|
@@ -285,7 +299,15 @@ yt_api = YouTubeAPI(
|
|
285 |
|
286 |
# Input form
|
287 |
with st.form(key='input'):
|
288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
|
290 |
# Emotions
|
291 |
emotions_checkbox = st.checkbox(
|
@@ -302,7 +324,7 @@ with st.form(key='input'):
|
|
302 |
nmf_components = st.slider(
|
303 |
"Topics (NMF Components)",
|
304 |
min_value=2,
|
305 |
-
max_value=
|
306 |
value=8,
|
307 |
step=1,
|
308 |
)
|
@@ -335,9 +357,9 @@ with st.form(key='input'):
|
|
335 |
step=1,
|
336 |
)
|
337 |
|
338 |
-
|
339 |
-
"
|
340 |
-
options=['first_emotion', 'second_emotion']
|
341 |
)
|
342 |
|
343 |
# Language Map
|
@@ -356,6 +378,9 @@ if submit:
|
|
356 |
comments = yt_api.get_comments(video_id)
|
357 |
except KeyError:
|
358 |
st.write("Video not found.")
|
|
|
|
|
|
|
359 |
bad_id = True
|
360 |
|
361 |
if not bad_id:
|
@@ -387,10 +412,15 @@ if submit:
|
|
387 |
|
388 |
if tsne_checkbox:
|
389 |
# t-SNE visualization
|
|
|
|
|
|
|
|
|
|
|
390 |
df, tsne_figs = tsne_plots(df,
|
391 |
sentence_encoder,
|
392 |
emotion_cols,
|
393 |
-
|
394 |
tsne_perplexity)
|
395 |
plots.extend(tsne_figs)
|
396 |
|
|
|
1 |
import os
|
2 |
+
import urllib.parse as urlparse
|
3 |
from dotenv import load_dotenv
|
4 |
from transformers import pipeline
|
5 |
from sentence_transformers import SentenceTransformer
|
|
|
135 |
for i, col in enumerate(topic_cols):
|
136 |
df[col] = nmf_embeddings[i]
|
137 |
|
138 |
+
# Create `main_topic` column with the highest value topic name
|
139 |
+
df['main_topic'] = df[topic_cols].apply(lambda row: row.idxmax(), axis=1)
|
140 |
+
|
141 |
# Get word values for every topic
|
142 |
word_df = pd.DataFrame(
|
143 |
nmf.components_.T,
|
|
|
175 |
return df, [topic_words_fig, contributions_fig]
|
176 |
|
177 |
|
178 |
+
def tsne_plots(df, encoder, emotion_cols, tsne_color, tsne_perplexity):
|
179 |
"""
|
180 |
Encodes all `text_original` values of `df` DataFrame with `encoder`,
|
181 |
uses t-SNE algorithm for visualization on these embeddings and on
|
|
|
197 |
# Also use predicted emotions
|
198 |
if emotion_cols:
|
199 |
tsne_cols = embedding_cols + emotion_cols
|
200 |
+
color = tsne_color
|
201 |
hover_data = ['first_emotion', 'second_emotion', 'text_original']
|
202 |
else:
|
203 |
tsne_cols = embedding_cols
|
204 |
color = None
|
205 |
+
hover_data = ['text_original']
|
206 |
+
|
207 |
+
if 'main_topic' in df.columns:
|
208 |
+
hover_data.append('main_topic')
|
209 |
+
|
210 |
+
# Color column
|
211 |
+
if 'main_topic' in df.columns or emotion_cols:
|
212 |
+
color = tsne_color
|
213 |
+
else:
|
214 |
+
color = None
|
215 |
|
216 |
tsne_results = tsne.fit_transform(df[tsne_cols])
|
217 |
tsne_results = pd.DataFrame(
|
|
|
243 |
hover_data=hover_data
|
244 |
)
|
245 |
fig3d.update_layout(
|
246 |
+
title_text="t-SNE Visualization Over Time",
|
247 |
+
height=800
|
248 |
)
|
249 |
|
250 |
return df, [fig2d, fig3d]
|
|
|
299 |
|
300 |
# Input form
|
301 |
with st.form(key='input'):
|
302 |
+
# Input
|
303 |
+
url_input = st.text_input("URL or ID")
|
304 |
+
# Get ID from URL
|
305 |
+
url_data = urlparse.urlparse(url_input)
|
306 |
+
query = urlparse.parse_qs(url_data.query)
|
307 |
+
if 'v' in query:
|
308 |
+
video_id = query['v'][0]
|
309 |
+
else:
|
310 |
+
video_id = url_input
|
311 |
|
312 |
# Emotions
|
313 |
emotions_checkbox = st.checkbox(
|
|
|
324 |
nmf_components = st.slider(
|
325 |
"Topics (NMF Components)",
|
326 |
min_value=2,
|
327 |
+
max_value=12,
|
328 |
value=8,
|
329 |
step=1,
|
330 |
)
|
|
|
357 |
step=1,
|
358 |
)
|
359 |
|
360 |
+
tsne_color = st.selectbox(
|
361 |
+
"Plot Color",
|
362 |
+
options=['main_topic', 'first_emotion', 'second_emotion']
|
363 |
)
|
364 |
|
365 |
# Language Map
|
|
|
378 |
comments = yt_api.get_comments(video_id)
|
379 |
except KeyError:
|
380 |
st.write("Video not found.")
|
381 |
+
st.write(query)
|
382 |
+
st.write('v' in query)
|
383 |
+
st.write(video_id)
|
384 |
bad_id = True
|
385 |
|
386 |
if not bad_id:
|
|
|
412 |
|
413 |
if tsne_checkbox:
|
414 |
# t-SNE visualization
|
415 |
+
if not nmf_checkbox:
|
416 |
+
tsne_color = 'first_emotion'
|
417 |
+
if not emotions_checkbox:
|
418 |
+
tsne_color = 'main_topic'
|
419 |
+
|
420 |
df, tsne_figs = tsne_plots(df,
|
421 |
sentence_encoder,
|
422 |
emotion_cols,
|
423 |
+
tsne_color,
|
424 |
tsne_perplexity)
|
425 |
plots.extend(tsne_figs)
|
426 |
|