zArabi commited on
Commit
5d1cadc
1 Parent(s): 2c051ce

add the latest model

Browse files
Files changed (1) hide show
  1. app.py +5 -92
app.py CHANGED
@@ -5,96 +5,7 @@ import torch.nn as nn
5
  import torch.nn.functional as F
6
  import huggingface_hub
7
  from huggingface_hub import hf_hub_download
8
- import hazm
9
- from cleantext import clean
10
- import regex as re
11
-
12
- huggingface_hub.Repository = 'zArabi/Persian-Sentiment-Analysis'
13
-
14
- def cleanhtml(raw_html):
15
- cleanr = re.compile('<.*?>')
16
- cleantext = re.sub(cleanr, '', raw_html)
17
- return cleantext
18
-
19
- def cleaning(text):
20
- text = text.strip()
21
-
22
- # regular cleaning
23
- # https://pypi.org/project/clean-text/ >> works well for eng and de languages
24
- text = clean(text,
25
- fix_unicode=True,
26
- to_ascii=False,
27
- lower=True,
28
- no_line_breaks=True,
29
- no_urls=True,
30
- no_emails=True,
31
- no_phone_numbers=True,
32
- no_numbers=False,
33
- no_digits=False,
34
- no_currency_symbols=True,
35
- no_punct=False, #Keep the punc
36
- replace_with_url="",
37
- replace_with_email="",
38
- replace_with_phone_number="",
39
- replace_with_number="",
40
- replace_with_digit="0",
41
- replace_with_currency_symbol="",
42
- )
43
-
44
- # cleaning htmls
45
- text = cleanhtml(text)
46
-
47
- # normalizing > https://github.com/sobhe/hazm
48
- normalizer = hazm.Normalizer()
49
- text = normalizer.normalize(text)
50
-
51
- # removing wierd patterns
52
- wierd_pattern = re.compile("["
53
- u"\U0001F600-\U0001F64F" # emoticons
54
- u"\U0001F300-\U0001F5FF" # symbols & pictographs
55
- u"\U0001F680-\U0001F6FF" # transport & map symbols
56
- u"\U0001F1E0-\U0001F1FF" # flags (iOS)
57
- u"\U00002702-\U000027B0"
58
- u"\U000024C2-\U0001F251"
59
- u"\U0001f926-\U0001f937"
60
- u'\U00010000-\U0010ffff'
61
- u"\u200d"
62
- u"\u2640-\u2642"
63
- u"\u2600-\u2B55"
64
- u"\u23cf"
65
- u"\u23e9"
66
- u"\u231a"
67
- u"\u3030"
68
- u"\ufe0f"
69
- u"\u2069"
70
- u"\u2066"
71
- # u"\u200c"
72
- u"\u2068"
73
- u"\u2067"
74
- "]+", flags=re.UNICODE)
75
-
76
- text = wierd_pattern.sub(r'', text)
77
-
78
- # removing extra spaces, hashtags
79
- text = re.sub("#", "", text)
80
- text = re.sub("\s+", " ", text)
81
-
82
- return text
83
-
84
- class SentimentModel(nn.Module):
85
- def __init__(self, config):
86
- super(SentimentModel, self).__init__()
87
- self.bert = BertModel.from_pretrained(modelName, return_dict=False)
88
- self.dropout = nn.Dropout(0.3)
89
- self.classifier = nn.Linear(config.hidden_size, config.num_labels)
90
-
91
- def forward(self, input_ids, attention_mask):
92
- _, pooled_output = self.bert(
93
- input_ids=input_ids,
94
- attention_mask=attention_mask)
95
- pooled_output = self.dropout(pooled_output)
96
- logits = self.classifier(pooled_output)
97
- return logits
98
 
99
  modelName = 'HooshvareLab/bert-fa-base-uncased'
100
  class_names = ['negative', 'neutral', 'positive']
@@ -107,8 +18,10 @@ config = BertConfig.from_pretrained(
107
  id2label=id2label,
108
  label2id=label2id)
109
 
110
- downloadedModelFile = hf_hub_download(repo_id="zArabi/Persian-Sentiment-Analysis", filename="persianModel")
 
111
  loaded_model = torch.load(downloadedModelFile,map_location="cpu")
 
112
 
113
 
114
  tokenizer = BertTokenizer.from_pretrained(modelName)
@@ -129,7 +42,7 @@ def predict(text):
129
  )
130
  input_ids = encoding["input_ids"].to(device)
131
  attention_mask = encoding["attention_mask"].to(device)
132
- outputs = loaded_model (input_ids, attention_mask)
133
  probs = F.softmax(outputs,dim=1)
134
  values, indices = torch.max(probs, dim=1)
135
  data = {
 
5
  import torch.nn.functional as F
6
  import huggingface_hub
7
  from huggingface_hub import hf_hub_download
8
+ from preprocessing import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  modelName = 'HooshvareLab/bert-fa-base-uncased'
11
  class_names = ['negative', 'neutral', 'positive']
 
18
  id2label=id2label,
19
  label2id=label2id)
20
 
21
+ path="HooshvareLab-bert-fa-base-uncased-3class-best-epoch-weight-decay=.001.bin"
22
+ downloadedModelFile = hf_hub_download(repo_id="zArabi/Persian-Sentiment-Analysis", filename=path)
23
  loaded_model = torch.load(downloadedModelFile,map_location="cpu")
24
+ loaded_model.eval()
25
 
26
 
27
  tokenizer = BertTokenizer.from_pretrained(modelName)
 
42
  )
43
  input_ids = encoding["input_ids"].to(device)
44
  attention_mask = encoding["attention_mask"].to(device)
45
+ outputs = loaded_model(input_ids, attention_mask)
46
  probs = F.softmax(outputs,dim=1)
47
  values, indices = torch.max(probs, dim=1)
48
  data = {