nickmuchi commited on
Commit
b628185
1 Parent(s): 978b2e2

Update variables.py

Browse files
Files changed (1) hide show
  1. variables.py +35 -1
variables.py CHANGED
@@ -24,6 +24,40 @@ from langchain.schema import (
24
  SystemMessage
25
  )
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  @st.experimental_singleton(suppress_st_warning=True)
28
  def get_latest_file():
29
  '''Get the latest file from output folder'''
@@ -44,7 +78,7 @@ def get_latest_file():
44
  return file_contents
45
 
46
  @st.experimental_singleton(suppress_st_warning=True)
47
- def process_tweets(file,model,query):
48
  '''Process file with latest tweets'''
49
 
50
  # Split tweets int chunks
 
24
  SystemMessage
25
  )
26
 
27
+ @st.experimental_singleton(suppress_st_warning=True)
28
+ def load_models():
29
+ '''load sentimant and topic clssification models'''
30
+ sent_pipe = pipeline(task,model=sent_model_id, tokenizer=sent_model_id)
31
+ topic_pipe = pipeline(task, model=topic_model_id, tokenizer=topic_model_id)
32
+
33
+ return sent_pipe, topic_pipe
34
+
35
+ @st.cache(allow_output_mutation=True, suppress_st_warning=True)
36
+ def process_tweets(df,df_users):
37
+ '''process tweets into a dataframe'''
38
+
39
+ df['author'] = df['author'].astype(np.int64)
40
+
41
+ df_merged = df.merge(df_users, on='author')
42
+
43
+ tweet_list = df_merged['tweet'].tolist()
44
+
45
+ sentiment, topic = pd.DataFrame(sentiment_classifier(tweet_list)), pd.DataFrame(topic_classifier(tweet_list))
46
+
47
+ sentiment.rename(columns={'score':'sentiment_confidence','label':'sentiment'}, inplace=True)
48
+
49
+ topic.rename(columns={'score':'topic_confidence','label':'topic'}, inplace=True)
50
+
51
+ df_group = pd.concat([df_merged,sentiment,topic],axis=1)
52
+
53
+ df_group[['sentiment_confidence','topic_confidence']] = df_group[['sentiment_confidence','topic_confidence']].round(2).mul(100)
54
+
55
+ df_tweets = df_group[['creation_time','username','tweet','sentiment','topic','sentiment_confidence','topic_confidence']]
56
+
57
+ df_tweets = df_tweets.sort_values(by=['creation_time'],ascending=False)
58
+
59
+ return df_tweets
60
+
61
  @st.experimental_singleton(suppress_st_warning=True)
62
  def get_latest_file():
63
  '''Get the latest file from output folder'''
 
78
  return file_contents
79
 
80
  @st.experimental_singleton(suppress_st_warning=True)
81
+ def embed_tweets(file,model,query):
82
  '''Process file with latest tweets'''
83
 
84
  # Split tweets int chunks