Spaces:

srini047
/

text-based-sentiment-analyzer

Runtime error

App Files Files Community

text-based-sentiment-analyzer / sentiment.py

srini047

added function file

df00128 over 2 years ago

raw

history blame

3.9 kB

	#!/usr/bin/env python
	# coding: utf-8

	# # Text Based Sentiment Analysis

	# # IMPORTING NECESSARY MODULES

	# In[1]:


	import numpy as np # For linear algebra
	import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv)
	import matplotlib.pyplot as plt # For Visualisation
	# get_ipython().run_line_magic('matplotlib', 'inline')
	import seaborn as sns # For Visualisation
	from bs4 import BeautifulSoup # For Text Parsing


	# # IMPORTING DATASET

	# In[2]:


	data = pd.read_csv('Reviews.csv')
	# data


	# # DATA PREPROCESSING & VISUALISATION

	# In[3]:


	#data.isnull().sum()


	# In[4]:


	data=data.dropna()
	#data.isnull().sum()


	# In[5]:


	#data.shape


	# In[6]:


	score_unique = data['Score'].unique()
	#print(score_unique)


	# In[7]:


	# 0-> NEGATIVE REVIEW
	# 1-> NEUTRAL REVIEW
	# 2-> POSTIVE REVIEW
	a=[]
	for i in data['Score']:
	if i <3:
	a.append(0)
	if i==3:
	a.append(1)
	if i>3:
	a.append(2)


	# In[8]:


	r_0, r_1, r_2 = 0, 0, 0
	for i in a:
	if i == 0:
	r_0 += 1
	elif i == 1:
	r_1 += 1
	else:
	r_2 += 1

	# print('Negative Reviews:',r_0)
	# print('Neutral Reviews:',r_1)
	# print('Positive Reviews:',r_2)


	# In[9]:


	# sns.countplot(a)
	# plt.xlabel('Reviews', color = 'red')
	# plt.ylabel('Count', color = 'red')
	# plt.xticks([0,1,2],['Negative','Neutral','Positive'])
	# plt.title('COUNT PLOT', color = 'r')
	# plt.show()


	# In[10]:


	data['sentiment']=a
	#data
	final_dataset = data[['Text','sentiment']]
	#final_dataset


	# In[11]:


	data_p=final_dataset[data['sentiment']==2]
	data_n=final_dataset[data['sentiment']==0]
	#len(data_p), len(data_n)


	# In[12]:


	datap = data_p.iloc[np.random.randint(1,443766,5000), :]
	datan = data_n.iloc[np.random.randint(1, 82007,5000), :]
	#len(datan), len(datap)


	# In[13]:


	data = pd.concat([datap,datan])
	len(data)


	# In[14]:


	c=[]
	for i in data['sentiment']:
	if i==0:
	c.append(0)
	if i==2:
	c.append(1)
	data['sentiment']=c


	# In[15]:


	# sns.countplot(data['sentiment'])
	# plt.show()


	# In[16]:


	def strip_html(text):
	soup = BeautifulSoup(text, "html.parser")
	return soup.get_text()
	data['review'] = data['Text'].apply(strip_html)

	data=data.drop('Text',axis=1)

	#data.head()


	# # MODEL BUILDING

	# In[17]:


	import nltk #Natural Language Processing Toolkit
	def punc_clean(text):
	import string as st
	a=[w for w in text if w not in st.punctuation]
	return ''.join(a)
	data['review'] = data['review'].apply(punc_clean)
	#data.head(2)


	# In[18]:


	def remove_stopword(text):
	stopword=nltk.corpus.stopwords.words('english')
	stopword.remove('not')
	a=[w for w in nltk.word_tokenize(text) if w not in stopword]
	return ' '.join(a)
	#data['review'] = data['review'].apply(remove_stopword)


	# In[19]:


	from sklearn.feature_extraction.text import TfidfVectorizer

	vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1)
	vectr.fit(data['review'])

	vect_X = vectr.transform(data['review'])


	# In[20]:


	from sklearn.linear_model import LogisticRegression

	model = LogisticRegression()

	clf=model.fit(vect_X,data['sentiment'])
	#clf.score(vect_X,data['sentiment'])*100


	# # PREDICTION

	# In[21]:


	clf.predict(vectr.transform(['''Nice look and build quality with moderately fast everything such as refresh rate, display quality, sound, processing, gaming experience and many more ..
	I didn't find any lagging or heating issue..And battery health I won't say great but I'll take that

	Only cons I can say about it is camera.. sharpening picture a little much at day light and low light photo you have to compromise.''']))


	# In[22]:


	clf.predict(vectr.transform(['''Phone has bugs , and screen quality is poor , Avoid realme. Gaming was just over hyped''']))


	# In[23]:


	clf.predict(vectr.transform(['''No lags found super speed and very good performance nice phone in this budget''']))


	# In[ ]: