robertou2 commited on
Commit
7c18751
1 Parent(s): d9f763d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -48
app.py CHANGED
@@ -27,61 +27,71 @@ auth = tw.OAuthHandler(consumer_key, consumer_secret)
27
  auth.set_access_token(access_token, access_token_secret)
28
  api = tw.API(auth, wait_on_rate_limit=True)
29
 
30
-
31
-
32
-
33
  st.title('Analisis de comentarios sexistas en Twitter con Tweepy and HuggingFace Transformers')
34
  st.markdown('Esta app utiliza tweepy para descargar tweets de twitter en base a la informaci贸n de entrada y procesa los tweets usando transformers de HuggingFace para detectar comentarios sexistas. El resultado y los tweets correspondientes se almacenan en un dataframe para mostrarlo que es lo que se ve como resultado')
35
 
36
- def run():
37
- with st.form(key='Introduzca nombre'):
38
- search_words = st.text_input('Introduzca el termino para analizar')
39
- number_of_tweets = st.number_input('Introduzca n煤mero de twweets a analizar. M谩ximo 50', 0,50,10)
40
- submit_button = st.form_submit_button(label='Submit')
41
- if submit_button:
42
- tweets =tw.Cursor(api.search_tweets,q=search_words).items(number_of_tweets)
43
- tweet_list = [i.text for i in tweets]
44
- text= pd.DataFrame(tweet_list)
45
- text1=text[0].values
46
- indices1=tokenizer.batch_encode_plus(text1.tolist(),
47
  max_length=128,
48
  add_special_tokens=True,
49
  return_attention_mask=True,
50
  pad_to_max_length=True,
51
  truncation=True)
52
- input_ids1=indices1["input_ids"]
53
- attention_masks1=indices1["attention_mask"]
54
- prediction_inputs1= torch.tensor(input_ids1)
55
- prediction_masks1 = torch.tensor(attention_masks1)
56
- # Set the batch size.
57
- batch_size = 25
58
- # Create the DataLoader.
59
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
60
- prediction_sampler1 = SequentialSampler(prediction_data1)
61
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
62
- print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
63
- # Put model in evaluation mode
64
- model.eval()
65
- # Tracking variables
66
- predictions = []
67
- # Predict
68
- for batch in prediction_dataloader1:
69
- batch = tuple(t.to(device) for t in batch)
70
- # Unpack the inputs from our dataloader
71
- b_input_ids1, b_input_mask1 = batch
72
- # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
73
- with torch.no_grad():
74
- # Forward pass, calculate logit predictions
75
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
76
- logits1 = outputs1[0]
77
- # Move logits and labels to CPU
78
- logits1 = logits1.detach().cpu().numpy()
79
- # Store predictions and true labels
80
- predictions.append(logits1)
81
- flat_predictions = [item for sublist in predictions for item in sublist]
82
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()#p = [i for i in classifier(tweet_list)]
83
- df = pd.DataFrame(list(zip(tweet_list, flat_predictions)),columns =['Latest'+str(number_of_tweets)+'Tweets'+' on '+search_words, 'Sexista'])
84
- df['Sexista']= np.where(df['Sexista']== 0, 'No Sexista', 'Sexista')
85
- st.table(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  #st.write(df)
87
  run()
 
27
  auth.set_access_token(access_token, access_token_secret)
28
  api = tw.API(auth, wait_on_rate_limit=True)
29
 
 
 
 
30
  st.title('Analisis de comentarios sexistas en Twitter con Tweepy and HuggingFace Transformers')
31
  st.markdown('Esta app utiliza tweepy para descargar tweets de twitter en base a la informaci贸n de entrada y procesa los tweets usando transformers de HuggingFace para detectar comentarios sexistas. El resultado y los tweets correspondientes se almacenan en un dataframe para mostrarlo que es lo que se ve como resultado')
32
 
33
+ def principal(tweets):
34
+ tweet_list = [i.text for i in tweets]
35
+ text= pd.DataFrame(tweet_list)
36
+ text1=text[0].values
37
+ indices1=tokenizer.batch_encode_plus(text1.tolist(),
 
 
 
 
 
 
38
  max_length=128,
39
  add_special_tokens=True,
40
  return_attention_mask=True,
41
  pad_to_max_length=True,
42
  truncation=True)
43
+ input_ids1=indices1["input_ids"]
44
+ attention_masks1=indices1["attention_mask"]
45
+ prediction_inputs1= torch.tensor(input_ids1)
46
+ prediction_masks1 = torch.tensor(attention_masks1)
47
+ # Set the batch size.
48
+ batch_size = 25
49
+ # Create the DataLoader.
50
+ prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
51
+ prediction_sampler1 = SequentialSampler(prediction_data1)
52
+ prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
53
+ print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
54
+ # Put model in evaluation mode
55
+ model.eval()
56
+ # Tracking variables
57
+ predictions = []
58
+ # Predict
59
+ for batch in prediction_dataloader1:
60
+ batch = tuple(t.to(device) for t in batch)
61
+ # Unpack the inputs from our dataloader
62
+ b_input_ids1, b_input_mask1 = batch
63
+ # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
64
+ with torch.no_grad():
65
+ # Forward pass, calculate logit predictions
66
+ outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
67
+ logits1 = outputs1[0]
68
+ # Move logits and labels to CPU
69
+ logits1 = logits1.detach().cpu().numpy()
70
+ # Store predictions and true labels
71
+ predictions.append(logits1)
72
+ flat_predictions = [item for sublist in predictions for item in sublist]
73
+ flat_predictions = np.argmax(flat_predictions, axis=1).flatten()#p = [i for i in classifier(tweet_list)]
74
+ df = pd.DataFrame(list(zip(tweet_list, flat_predictions)),columns =['Latest'+str(number_of_tweets)+'Tweets'+' on '+search_words, 'Sexista'])
75
+ df['Sexista']= np.where(df['Sexista']== 0, 'No Sexista', 'Sexista')
76
+ st.table(df)
77
+
78
+ def run():
79
+ with st.form(key='Introduzca nombre'):
80
+ search_words = st.text_input('Introduzca el termino para analizar o Usuario a analizar')
81
+ number_of_tweets = st.number_input('Introduzca n煤mero de twweets a analizar. M谩ximo 50', 0,50,10)
82
+ submit_button = st.form_submit_button(label='T茅rmino')
83
+ submit_button1 = st.form_submit_button(label='Usuario')
84
+
85
+ if submit_button:
86
+ date_since = "2020-09-14"
87
+ new_search = search_words + " -filter:retweets"
88
+ tweets = tweepy.Cursor(api.search,q=new_search,lang="es",since=date_since).items(number_of_tweets)
89
+ principal(tweets)
90
+ #tweets =tw.Cursor(api.search_tweets,q=search_words).items(number_of_tweets)
91
+ if submit_button1:
92
+ tweets = api.user_timeline(screen_name = screen_name,count=500)
93
+ principal(tweets)
94
+
95
+
96
  #st.write(df)
97
  run()