import gradio as gr
from huggingface_hub import InferenceClient
from datasets import load_dataset
import torch
from transformers import pipeline

class ContentFilter:
    def __init__(self):
        # Initialize toxic content detection model
        self.toxicity_classifier = pipeline(
            'text-classification', 
            model='unitary/toxic-bert', 
            return_all_scores=True
        )
        
        # Keyword blacklist
        self.blacklist = [
            'hate', 'discriminate', 'violent', 
            'offensive', 'inappropriate', 'racist', 
            'sexist', 'homophobic', 'transphobic'
        ]
    
    def filter_toxicity(self, text, toxicity_threshold=0.5):
        """
        Detect toxic content using pre-trained model
        
        Args:
            text (str): Input text to check
            toxicity_threshold (float): Threshold for filtering
        
        Returns:
            dict: Filtering results
        """
        results = self.toxicity_classifier(text)[0]
        
        # Convert results to dictionary
        toxicity_scores = {
            result['label']: result['score'] 
            for result in results
        }
        
        # Check if any toxic category exceeds threshold
        is_toxic = any(
            score > toxicity_threshold 
            for score in toxicity_scores.values()
        )
        
        return {
            'is_toxic': is_toxic,
            'toxicity_scores': toxicity_scores
        }
    
    def filter_keywords(self, text):
        """
        Check text against keyword blacklist
        
        Args:
            text (str): Input text to check
        
        Returns:
            list: Matched blacklisted keywords
        """
        matched_keywords = [
            keyword for keyword in self.blacklist 
            if keyword.lower() in text.lower()
        ]
        
        return matched_keywords
    
    def comprehensive_filter(self, text):
        """
        Perform comprehensive content filtering
        
        Args:
            text (str): Input text to filter
        
        Returns:
            dict: Comprehensive filtering results
        """
        # Toxicity model filtering
        toxicity_results = self.filter_toxicity(text)
        
        # Keyword blacklist filtering
        blacklisted_keywords = self.filter_keywords(text)
        
        # Combine results
        return {
            'toxicity': toxicity_results,
            'blacklisted_keywords': blacklisted_keywords,
            'is_safe': not toxicity_results['is_toxic'] and len(blacklisted_keywords) == 0
        }

# Initialize content filter
content_filter = ContentFilter()

# Initialize Hugging Face client
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("google-t5/t5-small")

# Load dataset (optional)
dataset = load_dataset("JustKiddo/KiddosVault")

def respond(
    message, 
    history: list[tuple[str, str]], 
    system_message, 
    max_tokens, 
    temperature, 
    top_p
):
    # First, filter the incoming user message
    message_filter_result = content_filter.comprehensive_filter(message)
    
    # If message is not safe, return a warning
    if not message_filter_result['is_safe']:
        toxicity_details = message_filter_result['toxicity']['toxicity_scores']
        blacklisted_keywords = message_filter_result['blacklisted_keywords']
        
        warning_message = "Message flagged for inappropriate content. "
        warning_message += "Detected issues: "
        
        # Add toxicity details
        for category, score in toxicity_details.items():
            if score > 0.5:
                warning_message += f"{category} (Score: {score:.2f}), "
        
        # Add blacklisted keywords
        if blacklisted_keywords:
            warning_message += f"Blacklisted keywords: {', '.join(blacklisted_keywords)}"
        
        return warning_message

    # Prepare messages for chat completion
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    messages.append({"role": "user", "content": message})

    # Generate response
    response = ""
    for message in client.chat_completion(
        messages, 
        max_tokens=max_tokens, 
        stream=True, 
        temperature=temperature, 
        top_p=top_p
    ):
        token = message.choices[0].delta.content
        response += token
        yield response

# Create Gradio interface
demo = gr.ChatInterface(
    respond, 
    additional_inputs=[
        gr.Textbox(
            value="You are a professional and friendly assistant.", 
            label="System message"
        ),
        gr.Slider(
            minimum=1, 
            maximum=6144, 
            value=6144, 
            step=1, 
            label="Max new tokens"
        ),
        gr.Slider(
            minimum=0.1, 
            maximum=4.0, 
            value=1, 
            step=0.1, 
            label="Temperature"
        ),
        gr.Slider(
            minimum=0.1, 
            maximum=1.0, 
            value=0.95, 
            step=0.05, 
            label="Top-p (nucleus sampling)"
        ),
    ]
)

if __name__ == "__main__":
    demo.launch(debug=True)