KiddosSpace / app.py
JustKiddo's picture
google-t5/t5-small
a42b9fc verified
import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
import torch
from transformers import pipeline
class ContentFilter:
def __init__(self):
# Initialize toxic content detection model
self.toxicity_classifier = pipeline(
'text-classification',
model='unitary/toxic-bert',
return_all_scores=True
)
# Keyword blacklist
self.blacklist = [
'hate', 'discriminate', 'violent',
'offensive', 'inappropriate', 'racist',
'sexist', 'homophobic', 'transphobic'
]
def filter_toxicity(self, text, toxicity_threshold=0.5):
"""
Detect toxic content using pre-trained model
Args:
text (str): Input text to check
toxicity_threshold (float): Threshold for filtering
Returns:
dict: Filtering results
"""
results = self.toxicity_classifier(text)[0]
# Convert results to dictionary
toxicity_scores = {
result['label']: result['score']
for result in results
}
# Check if any toxic category exceeds threshold
is_toxic = any(
score > toxicity_threshold
for score in toxicity_scores.values()
)
return {
'is_toxic': is_toxic,
'toxicity_scores': toxicity_scores
}
def filter_keywords(self, text):
"""
Check text against keyword blacklist
Args:
text (str): Input text to check
Returns:
list: Matched blacklisted keywords
"""
matched_keywords = [
keyword for keyword in self.blacklist
if keyword.lower() in text.lower()
]
return matched_keywords
def comprehensive_filter(self, text):
"""
Perform comprehensive content filtering
Args:
text (str): Input text to filter
Returns:
dict: Comprehensive filtering results
"""
# Toxicity model filtering
toxicity_results = self.filter_toxicity(text)
# Keyword blacklist filtering
blacklisted_keywords = self.filter_keywords(text)
# Combine results
return {
'toxicity': toxicity_results,
'blacklisted_keywords': blacklisted_keywords,
'is_safe': not toxicity_results['is_toxic'] and len(blacklisted_keywords) == 0
}
# Initialize content filter
content_filter = ContentFilter()
# Initialize Hugging Face client
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("google-t5/t5-small")
# Load dataset (optional)
dataset = load_dataset("JustKiddo/KiddosVault")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p
):
# First, filter the incoming user message
message_filter_result = content_filter.comprehensive_filter(message)
# If message is not safe, return a warning
if not message_filter_result['is_safe']:
toxicity_details = message_filter_result['toxicity']['toxicity_scores']
blacklisted_keywords = message_filter_result['blacklisted_keywords']
warning_message = "Message flagged for inappropriate content. "
warning_message += "Detected issues: "
# Add toxicity details
for category, score in toxicity_details.items():
if score > 0.5:
warning_message += f"{category} (Score: {score:.2f}), "
# Add blacklisted keywords
if blacklisted_keywords:
warning_message += f"Blacklisted keywords: {', '.join(blacklisted_keywords)}"
return warning_message
# Prepare messages for chat completion
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
# Generate response
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p
):
token = message.choices[0].delta.content
response += token
yield response
# Create Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a professional and friendly assistant.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=6144,
value=6144,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=1,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
]
)
if __name__ == "__main__":
demo.launch(debug=True)