Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- app.py +14 -0
- profanity.py +39 -0
- salient.py +67 -0
- test.py +8 -0
- train-cleaned.csv +0 -0
app.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from salient import vectr, clf
|
3 |
+
|
4 |
+
def predict(text):
|
5 |
+
senti = clf.predict(vectr.transform([text]))
|
6 |
+
if (int(senti)):
|
7 |
+
text_sent = "Salient"
|
8 |
+
else:
|
9 |
+
text_sent = "Not salient"
|
10 |
+
|
11 |
+
return text_sent
|
12 |
+
|
13 |
+
demo = gr.Interface(fn=predict, inputs="text", outputs="text")
|
14 |
+
demo.launch(share=True)
|
profanity.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import time
|
3 |
+
import json
|
4 |
+
|
5 |
+
from profanityfilter import ProfanityFilter
|
6 |
+
pf = ProfanityFilter()
|
7 |
+
input_text = "like"
|
8 |
+
print(pf.is_profane(input_text))
|
9 |
+
|
10 |
+
model_name = "Dabid/abusive-tagalog-profanity-detection"
|
11 |
+
endpoint = f"https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
|
12 |
+
|
13 |
+
payload = {
|
14 |
+
"inputs": input_text,
|
15 |
+
"options": {
|
16 |
+
"max_length": 50,
|
17 |
+
"temperature": 1,
|
18 |
+
},
|
19 |
+
}
|
20 |
+
|
21 |
+
headers = {"Authorization": "Bearer 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"}
|
22 |
+
response = requests.post(endpoint, json=payload, headers=headers)
|
23 |
+
if response.status_code == 200:
|
24 |
+
result = json.loads(response.text)
|
25 |
+
filtered_text = result[0]
|
26 |
+
# print(filtered_text)
|
27 |
+
# print("Filtered Text:")
|
28 |
+
p = filtered_text[0]["label"]
|
29 |
+
val = filtered_text[0]["score"]
|
30 |
+
if p == "Non-Abusive" and val > 0.75:
|
31 |
+
print("Non Profane")
|
32 |
+
else:
|
33 |
+
print("Profane")
|
34 |
+
elif "Model is currently loading" in response.text:
|
35 |
+
print("Model is still loading. Retrying in a few seconds...")
|
36 |
+
time.sleep(20)
|
37 |
+
else:
|
38 |
+
print("API call failed with status code:", response.status_code)
|
39 |
+
print(response.text)
|
salient.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""# MODEL BUILDING"""
|
2 |
+
# Commented out IPython magic to ensure Python compatibility.
|
3 |
+
import numpy as np # For linear algebra
|
4 |
+
import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv)
|
5 |
+
# import matplotlib.pyplot as plt # For Visualisation
|
6 |
+
# %matplotlib inline
|
7 |
+
# import seaborn as sns # For Visualisation
|
8 |
+
# from bs4 import BeautifulSoup # For Text Parsing
|
9 |
+
# from ydata_profiling import ProfileReport # For generating data report
|
10 |
+
|
11 |
+
import nltk
|
12 |
+
from nltk.corpus import stopwords
|
13 |
+
nltk.download('stopwords')
|
14 |
+
nltk.download('punkt')
|
15 |
+
|
16 |
+
def remove_stopword(text):
|
17 |
+
stopword=nltk.corpus.stopwords.words('english')
|
18 |
+
stopword.remove('not')
|
19 |
+
a=[w for w in nltk.word_tokenize(text) if w not in stopword]
|
20 |
+
return ' '.join(a)
|
21 |
+
#data['Extracted text'] = data['Extracted text'].apply(remove_stopword)
|
22 |
+
|
23 |
+
data = pd.read_csv('train-cleaned.csv')
|
24 |
+
data
|
25 |
+
|
26 |
+
import nltk #Natural Language Processing Toolkit
|
27 |
+
def punc_clean(text):
|
28 |
+
import string as st
|
29 |
+
a=[w for w in text if w not in st.punctuation]
|
30 |
+
return ''.join(a)
|
31 |
+
data[''] = data['Extracted text'].apply(punc_clean)
|
32 |
+
#data.head(2)
|
33 |
+
|
34 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
35 |
+
|
36 |
+
vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1)
|
37 |
+
vectr.fit(data['Extracted text'])
|
38 |
+
|
39 |
+
vect_X = vectr.transform(data['Extracted text'])
|
40 |
+
|
41 |
+
#from sklearn.linear_model import LogisticRegression
|
42 |
+
|
43 |
+
from sklearn.svm import SVC
|
44 |
+
from sklearn.linear_model import LogisticRegression
|
45 |
+
from sklearn.ensemble import VotingClassifier
|
46 |
+
|
47 |
+
svm_classifier = SVC(kernel='linear', probability=True)
|
48 |
+
logistic_classifier = LogisticRegression()
|
49 |
+
|
50 |
+
|
51 |
+
model = VotingClassifier(estimators=[
|
52 |
+
('svm', svm_classifier),
|
53 |
+
('logistic', logistic_classifier)
|
54 |
+
], voting='hard')
|
55 |
+
|
56 |
+
|
57 |
+
clf=model.fit(vect_X,data['saliency'])
|
58 |
+
# clf.score(vect_X, data['saliency'])*100
|
59 |
+
|
60 |
+
# """# PREDICTION"""
|
61 |
+
|
62 |
+
# clf.predict(vectr.transform(['''thank you ''']))
|
63 |
+
|
64 |
+
# clf.predict(vectr.transform(['''Theres no trailers or nothing on the other side of me and its been facing away from my trailer straight''']))
|
65 |
+
|
66 |
+
# clf.predict(vectr.transform([''' I dont think that should really matter Um''']))
|
67 |
+
|
test.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio_client import Client
|
2 |
+
|
3 |
+
client = Client("https://1712bd40513e8f2c8a.gradio.live/")
|
4 |
+
result = client.predict(
|
5 |
+
"Howdy!", # str in 'text' Textbox component
|
6 |
+
api_name="/predict"
|
7 |
+
)
|
8 |
+
print(result)
|
train-cleaned.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|