sahiba12 commited on
Commit
8444252
1 Parent(s): 240593d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -128
app.py CHANGED
@@ -1,133 +1,23 @@
1
- # -*- coding: utf-8 -*-
2
- """First_Text_Classification.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1sdLss09e3OxYVoeK3oBA6qrUSj_iOxp-
8
-
9
- <h3 align = "center">Importing Libraries</h3>
10
- """
11
-
12
- import numpy as np
13
- import pandas as pd
14
-
15
- """<h3 align = "center">Importing Dataset</h3>"""
16
-
17
- data = pd.read_csv("spam.csv", encoding = "ISO-8859-1")
18
-
19
- """<h3 align = "center">Preliminary Data Checks</h3>"""
20
-
21
- data.head()
22
-
23
- data.isnull().sum()
24
-
25
- data.shape
26
-
27
- data['v1'].value_counts()
28
-
29
- data.info()
30
-
31
- """<h3 align = "center">Putting the Length of Characters of each row in a column.</h3>"""
32
-
33
- data["Unnamed: 2"] = data["v2"].str.len()
34
-
35
- """<h3 align = "center">Visualising Length of Characters for each category!</h3>"""
36
-
37
-
38
- """<h5>It is evident from the above plot that spam texts are usually longer in length!</h5>
39
-
40
- <h3 align = "center">Defining Variables</h3>
41
- """
42
-
43
- X = data["v2"]
44
- y = data["v1"]
45
-
46
- """<h3 align = "center">Train Test Split</h3>"""
47
-
48
- from sklearn.model_selection import train_test_split
49
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
50
-
51
- """<h3 align = "center">Vecrorizing Words into Matrix</h3>"""
52
-
53
- from sklearn.feature_extraction.text import CountVectorizer
54
- count_vect = CountVectorizer()
55
-
56
- X_train_counts = count_vect.fit_transform(X_train)
57
-
58
- X_train_counts
59
-
60
- X_train.shape
61
-
62
- X_train_counts.shape
63
-
64
- from sklearn.feature_extraction.text import TfidfTransformer
65
- tfidf_transformer = TfidfTransformer()
66
-
67
- X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
68
-
69
- X_train_tfidf.shape
70
-
71
- """<h3 align = "center">Using TDIF Vectorizer for optimum vectorization!</h3>"""
72
-
73
- from sklearn.feature_extraction.text import TfidfVectorizer
74
- vectorizer = TfidfVectorizer()
75
-
76
- X_train_tfidf = vectorizer.fit_transform(X_train)
77
-
78
- X_train_tfidf.shape
79
-
80
- """<h3 align = "center">Creating Model</h3>"""
81
-
82
- from sklearn.svm import LinearSVC
83
- clf = LinearSVC()
84
-
85
- clf.fit(X_train_tfidf,y_train)
86
-
87
- """<h3 align = "center">Creating Pipeline</h3>"""
88
-
89
- from sklearn.pipeline import Pipeline
90
-
91
- text_clf = Pipeline([("tfidf",TfidfVectorizer()),("clf",LinearSVC())])
92
-
93
- text_clf.fit(X_train,y_train)
94
-
95
- predictions = text_clf.predict(X_test)
96
-
97
- X_test
98
-
99
- from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
100
-
101
- print(confusion_matrix(y_test,predictions))
102
-
103
- print(classification_report(y_test,predictions))
104
-
105
- """<h3 align = "center">Accuracy Score</h3>"""
106
-
107
- print(accuracy_score(y_test,predictions))
108
-
109
- """<h3 align = "center">Predictions </h3>"""
110
-
111
- text_clf.predict(["Hi how are you doing today?"])
112
-
113
- text_clf.predict(["Congratulations! You are selected for a free vouchar worth $500"])
114
-
115
- """<h3 align = "center">Creating User Interface!</h3>"""
116
-
117
  import gradio as gr
 
 
 
118
 
119
- def first_nlp_spam_detector(text):
120
- list = []
121
- list.append(text)
122
- arr = text_clf.predict(list)
123
- if arr[0] == 'ham':
124
- return "Your Text is a Legitimate One!"
125
- else:
126
- return "Beware of such text messages, It\'s a Spam! "
127
 
128
- interface = gr.Interface(first_nlp_spam_detector,inputs = gr.Textbox(lines=2, placeholder="Enter your Text Here.....!", show_label = False),
129
- outputs = gr.Label(value = "Predicting the Text Classification..!"),description = "Predicting Text Legitimacy!")
130
 
131
- first_nlp_spam_detector("Congratulations! You are selected for a free vouchar worth $500")
 
 
 
132
 
133
- interface.launch()
 
 
 
 
 
 
1
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
4
+
5
+ HF_TOKEN = os.environ.get('HF_TOKEN')
6
 
7
+ model_checkpoint = "besijar/dspa_review_classification"
8
+ tokeniser = AutoTokenizer.from_pretrained(model_checkpoint, use_auth_token=HF_TOKEN)
9
+ model = TFAutoModelForSequenceClassification.from_pretrained(model_checkpoint, use_auth_token=HF_TOKEN)
 
 
 
 
 
10
 
11
+ example_review = "Tully's House Blend is the perfect K-Cup for me. Sure, I occasionally enjoy the special flavors.....Mocha, Italian roast, French vanilla, but my favorite 'go-to'coffee is House Blend. Wakes me up in the morning with it's coffee house full hearty taste."
 
12
 
13
+ def review_classify(review):
14
+ review = tokeniser.encode(review)
15
+ review = model.predict([review])
16
+ return int(review.logits.argmax())
17
 
18
+ iface = gr.Interface(review_classify,
19
+ title="Review Classification using DistilRoBERTa",
20
+ inputs=[gr.Text(label="Review")],
21
+ outputs=[gr.Number(label="Rating", precision=0)],
22
+ examples=[example_review])
23
+ iface.launch()