Spaces:

zArabi
/

Persian-Sentiment-Analysis

Runtime error

App Files Files Community

Persian-Sentiment-Analysis / app.py

zArabi

Update app.py

1635166 about 2 years ago

raw

history blame

4.54 kB

	import gradio as gr
	from transformers import BertModel, BertConfig, BertTokenizer
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import huggingface_hub
	from huggingface_hub import hf_hub_download
	import hazm
	from cleantext import clean
	import regex as re

	huggingface_hub.Repository = 'zArabi/Persian-Sentiment-Analysis'

	def cleanhtml(raw_html):
	cleanr = re.compile('<.*?>')
	cleantext = re.sub(cleanr, '', raw_html)
	return cleantext

	def cleaning(text):
	text = text.strip()

	# regular cleaning
	# https://pypi.org/project/clean-text/ >> works well for eng and de languages
	text = clean(text,
	fix_unicode=True,
	to_ascii=False,
	lower=True,
	no_line_breaks=True,
	no_urls=True,
	no_emails=True,
	no_phone_numbers=True,
	no_numbers=False,
	no_digits=False,
	no_currency_symbols=True,
	no_punct=False, #Keep the punc
	replace_with_url="",
	replace_with_email="",
	replace_with_phone_number="",
	replace_with_number="",
	replace_with_digit="0",
	replace_with_currency_symbol="",
	)

	# cleaning htmls
	text = cleanhtml(text)

	# normalizing > https://github.com/sobhe/hazm
	normalizer = hazm.Normalizer()
	text = normalizer.normalize(text)

	# removing wierd patterns
	wierd_pattern = re.compile("["
	u"\U0001F600-\U0001F64F" # emoticons
	u"\U0001F300-\U0001F5FF" # symbols & pictographs
	u"\U0001F680-\U0001F6FF" # transport & map symbols
	u"\U0001F1E0-\U0001F1FF" # flags (iOS)
	u"\U00002702-\U000027B0"
	u"\U000024C2-\U0001F251"
	u"\U0001f926-\U0001f937"
	u'\U00010000-\U0010ffff'
	u"\u200d"
	u"\u2640-\u2642"
	u"\u2600-\u2B55"
	u"\u23cf"
	u"\u23e9"
	u"\u231a"
	u"\u3030"
	u"\ufe0f"
	u"\u2069"
	u"\u2066"
	# u"\u200c"
	u"\u2068"
	u"\u2067"
	"]+", flags=re.UNICODE)

	text = wierd_pattern.sub(r'', text)

	# removing extra spaces, hashtags
	text = re.sub("#", "", text)
	text = re.sub("\s+", " ", text)

	return text

	class SentimentModel(nn.Module):
	def __init__(self, config):
	super(SentimentModel, self).__init__()
	self.bert = BertModel.from_pretrained(modelName, return_dict=False)
	self.dropout = nn.Dropout(0.3)
	self.classifier = nn.Linear(config.hidden_size, config.num_labels)

	def forward(self, input_ids, attention_mask):
	_, pooled_output = self.bert(
	input_ids=input_ids,
	attention_mask=attention_mask)
	pooled_output = self.dropout(pooled_output)
	logits = self.classifier(pooled_output)
	return logits

	modelName = 'HooshvareLab/bert-fa-base-uncased'
	class_names = ['negative', 'neutral', 'positive']
	label2id = {label: i for i, label in enumerate(class_names)}
	id2label = {v: k for k, v in label2id.items()}

	config = BertConfig.from_pretrained(
	modelName,
	num_labels=len(class_names),
	id2label=id2label,
	label2id=label2id)

	downloadedModelFile = hf_hub_download(repo_id="zArabi/Persian-Sentiment-Analysis", filename="persianModel")
	loaded_model = torch.load(downloadedModelFile,map_location="cpu")


	tokenizer = BertTokenizer.from_pretrained(modelName)
	max_len=512
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	def predict(text):
	text = cleaning(text)
	encoding = tokenizer.encode_plus(
	text,
	max_length=max_len,
	truncation=True,
	padding="max_length",
	add_special_tokens=True, # Add '[CLS]' and '[SEP]'
	return_token_type_ids=True,
	return_attention_mask=True,
	return_tensors='pt', # Return PyTorch tensors
	)
	input_ids = encoding["input_ids"].to(device)
	attention_mask = encoding["attention_mask"].to(device)
	outputs = loaded_model (input_ids, attention_mask)
	probs = F.softmax(outputs,dim=1)
	values, indices = torch.max(probs, dim=1)
	data = {
	'comments': text,
	'preds': indices.cpu().numpy()[0],
	'label': class_names[indices.cpu().numpy()[0]],
	'probablities': {class_names[i] : round(probs[0][i].item(),3) for i in range(len(probs[0]))}
	}
	return {class_names[i] : round(probs[0][i].item(),3) for i in range(len(probs[0]))}

	gr.Interface(
	predict,
	inputs=gr.Textbox(label="Explore your sentence!",lines=2, placeholder="Type Here..."),
	outputs=gr.outputs.Label(num_top_classes=3),
	title="How are feeling?!",
	).launch()