srini047 commited on
Commit
de1732e
·
1 Parent(s): eee4614

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +14 -0
  2. profanity.py +39 -0
  3. salient.py +67 -0
  4. test.py +8 -0
  5. train-cleaned.csv +0 -0
app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from salient import vectr, clf
3
+
4
+ def predict(text):
5
+ senti = clf.predict(vectr.transform([text]))
6
+ if (int(senti)):
7
+ text_sent = "Salient"
8
+ else:
9
+ text_sent = "Not salient"
10
+
11
+ return text_sent
12
+
13
+ demo = gr.Interface(fn=predict, inputs="text", outputs="text")
14
+ demo.launch(share=True)
profanity.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import time
3
+ import json
4
+
5
+ from profanityfilter import ProfanityFilter
6
+ pf = ProfanityFilter()
7
+ input_text = "like"
8
+ print(pf.is_profane(input_text))
9
+
10
+ model_name = "Dabid/abusive-tagalog-profanity-detection"
11
+ endpoint = f"https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
12
+
13
+ payload = {
14
+ "inputs": input_text,
15
+ "options": {
16
+ "max_length": 50,
17
+ "temperature": 1,
18
+ },
19
+ }
20
+
21
+ headers = {"Authorization": "Bearer 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"}
22
+ response = requests.post(endpoint, json=payload, headers=headers)
23
+ if response.status_code == 200:
24
+ result = json.loads(response.text)
25
+ filtered_text = result[0]
26
+ # print(filtered_text)
27
+ # print("Filtered Text:")
28
+ p = filtered_text[0]["label"]
29
+ val = filtered_text[0]["score"]
30
+ if p == "Non-Abusive" and val > 0.75:
31
+ print("Non Profane")
32
+ else:
33
+ print("Profane")
34
+ elif "Model is currently loading" in response.text:
35
+ print("Model is still loading. Retrying in a few seconds...")
36
+ time.sleep(20)
37
+ else:
38
+ print("API call failed with status code:", response.status_code)
39
+ print(response.text)
salient.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """# MODEL BUILDING"""
2
+ # Commented out IPython magic to ensure Python compatibility.
3
+ import numpy as np # For linear algebra
4
+ import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv)
5
+ # import matplotlib.pyplot as plt # For Visualisation
6
+ # %matplotlib inline
7
+ # import seaborn as sns # For Visualisation
8
+ # from bs4 import BeautifulSoup # For Text Parsing
9
+ # from ydata_profiling import ProfileReport # For generating data report
10
+
11
+ import nltk
12
+ from nltk.corpus import stopwords
13
+ nltk.download('stopwords')
14
+ nltk.download('punkt')
15
+
16
+ def remove_stopword(text):
17
+ stopword=nltk.corpus.stopwords.words('english')
18
+ stopword.remove('not')
19
+ a=[w for w in nltk.word_tokenize(text) if w not in stopword]
20
+ return ' '.join(a)
21
+ #data['Extracted text'] = data['Extracted text'].apply(remove_stopword)
22
+
23
+ data = pd.read_csv('train-cleaned.csv')
24
+ data
25
+
26
+ import nltk #Natural Language Processing Toolkit
27
+ def punc_clean(text):
28
+ import string as st
29
+ a=[w for w in text if w not in st.punctuation]
30
+ return ''.join(a)
31
+ data[''] = data['Extracted text'].apply(punc_clean)
32
+ #data.head(2)
33
+
34
+ from sklearn.feature_extraction.text import TfidfVectorizer
35
+
36
+ vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1)
37
+ vectr.fit(data['Extracted text'])
38
+
39
+ vect_X = vectr.transform(data['Extracted text'])
40
+
41
+ #from sklearn.linear_model import LogisticRegression
42
+
43
+ from sklearn.svm import SVC
44
+ from sklearn.linear_model import LogisticRegression
45
+ from sklearn.ensemble import VotingClassifier
46
+
47
+ svm_classifier = SVC(kernel='linear', probability=True)
48
+ logistic_classifier = LogisticRegression()
49
+
50
+
51
+ model = VotingClassifier(estimators=[
52
+ ('svm', svm_classifier),
53
+ ('logistic', logistic_classifier)
54
+ ], voting='hard')
55
+
56
+
57
+ clf=model.fit(vect_X,data['saliency'])
58
+ # clf.score(vect_X, data['saliency'])*100
59
+
60
+ # """# PREDICTION"""
61
+
62
+ # clf.predict(vectr.transform(['''thank you ''']))
63
+
64
+ # clf.predict(vectr.transform(['''Theres no trailers or nothing on the other side of me and its been facing away from my trailer straight''']))
65
+
66
+ # clf.predict(vectr.transform([''' I dont think that should really matter Um''']))
67
+
test.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client
2
+
3
+ client = Client("https://1712bd40513e8f2c8a.gradio.live/")
4
+ result = client.predict(
5
+ "Howdy!", # str in 'text' Textbox component
6
+ api_name="/predict"
7
+ )
8
+ print(result)
train-cleaned.csv ADDED
The diff for this file is too large to render. See raw diff