Spaces:

srini047
/

saliency-profanity-detection

Runtime error

App Files Files Community

srini047 commited on Sep 21, 2023

Commit

de1732e

1 Parent(s): eee4614

Upload 5 files

Browse files

Files changed (5) hide show

app.py +14 -0
profanity.py +39 -0
salient.py +67 -0
test.py +8 -0
train-cleaned.csv +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import gradio as gr
+from salient import vectr, clf
+def predict(text):
+    senti = clf.predict(vectr.transform([text]))
+    if (int(senti)):
+        text_sent = "Salient"
+    else:
+        text_sent = "Not salient"
+    return text_sent
+demo = gr.Interface(fn=predict, inputs="text", outputs="text")
+demo.launch(share=True)

profanity.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import requests
+import time
+import json
+from profanityfilter import ProfanityFilter
+pf = ProfanityFilter()
+input_text = "like"
+print(pf.is_profane(input_text))
+model_name = "Dabid/abusive-tagalog-profanity-detection"
+endpoint = f"https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
+payload = {
+    "inputs": input_text,
+    "options": {
+        "max_length": 50,
+        "temperature": 1,
+    },
+}
+headers = {"Authorization": "Bearer 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"}
+response = requests.post(endpoint, json=payload, headers=headers)
+if response.status_code == 200:
+    result = json.loads(response.text)
+    filtered_text = result[0]
+    # print(filtered_text)
+    # print("Filtered Text:")
+    p = filtered_text[0]["label"]
+    val = filtered_text[0]["score"]
+    if p == "Non-Abusive" and val > 0.75:
+        print("Non Profane")
+    else:
+        print("Profane")
+elif "Model is currently loading" in response.text:
+    print("Model is still loading. Retrying in a few seconds...")
+    time.sleep(20)
+else:
+    print("API call failed with status code:", response.status_code)
+    print(response.text)

salient.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""# MODEL BUILDING"""
+# Commented out IPython magic to ensure Python compatibility.
+import numpy as np # For linear algebra
+import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv)
+# import matplotlib.pyplot as plt  # For Visualisation
+# %matplotlib inline
+# import seaborn as sns  # For Visualisation
+# from bs4 import BeautifulSoup  # For Text Parsing
+# from ydata_profiling import ProfileReport  # For generating data report
+import nltk
+from nltk.corpus import stopwords
+nltk.download('stopwords')
+nltk.download('punkt')
+def remove_stopword(text):
+    stopword=nltk.corpus.stopwords.words('english')
+    stopword.remove('not')
+    a=[w for w in nltk.word_tokenize(text) if w not in stopword]
+    return ' '.join(a)
+#data['Extracted text'] = data['Extracted text'].apply(remove_stopword)
+data = pd.read_csv('train-cleaned.csv')
+data
+import nltk  #Natural Language Processing Toolkit
+def punc_clean(text):
+    import string as st
+    a=[w for w in text if w not in st.punctuation]
+    return ''.join(a)
+data[''] = data['Extracted text'].apply(punc_clean)
+#data.head(2)
+from sklearn.feature_extraction.text import TfidfVectorizer
+vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1)
+vectr.fit(data['Extracted text'])
+vect_X = vectr.transform(data['Extracted text'])
+#from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import VotingClassifier
+svm_classifier = SVC(kernel='linear', probability=True)
+logistic_classifier = LogisticRegression()
+model = VotingClassifier(estimators=[
+    ('svm', svm_classifier),
+    ('logistic', logistic_classifier)
+], voting='hard')
+clf=model.fit(vect_X,data['saliency'])
+# clf.score(vect_X, data['saliency'])*100
+# """# PREDICTION"""
+# clf.predict(vectr.transform(['''thank you ''']))
+# clf.predict(vectr.transform(['''Theres no trailers or nothing on the other side of me and its been facing away from my trailer straight''']))
+# clf.predict(vectr.transform([''' I dont think that should really matter Um''']))

test.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from gradio_client import Client
+client = Client("https://1712bd40513e8f2c8a.gradio.live/")
+result = client.predict(
+				"Howdy!",	# str in 'text' Textbox component
+				api_name="/predict"
+)
+print(result)

train-cleaned.csv ADDED Viewed

The diff for this file is too large to render. See raw diff