import logging
import os
import sys
from pathlib import Path
import json
import io
import uuid
import traceback
from typing import Dict, List, Any, Tuple, Optional
from dataclasses import dataclass


# Set UTF-8 encoding for Windows
if sys.platform == 'win32':
    os.environ["PYTHONIOENCODING"] = "utf-8"

import gradio as gr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from sklearn.datasets import load_iris
import cv2
from PIL import Image

# Additional libraries for web research & scraping
import wikipedia
import requests
from bs4 import BeautifulSoup

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# ---------------------------
# Agent Context & Memory System
# ---------------------------
@dataclass
class AgentMemory:
    short_term: List[Dict[str, Any]] = None
    long_term: Dict[str, Any] = None

    def __post_init__(self):
        if self.short_term is None:
            self.short_term = []
        if self.long_term is None:
            self.long_term = {}

    def add_short_term(self, data: Dict[str, Any]) -> None:
        self.short_term.append(data)
        if len(self.short_term) > 10:
            self.short_term.pop(0)

    def add_long_term(self, key: str, value: Any) -> None:
        self.long_term[key] = value

    def get_recent_context(self, n: int = 3) -> List[Dict[str, Any]]:
        return self.short_term[-n:] if len(self.short_term) >= n else self.short_term

    def search_long_term(self, query: str) -> List[Tuple[str, Any]]:
        results = []
        for key, value in self.long_term.items():
            if query.lower() in key.lower():
                results.append((key, value))
        return results

# ---------------------------
# Agent Hub
# ---------------------------
class AgentHub:
    def __init__(self):
        self.agents = {}
        self.global_memory = AgentMemory()
        self.session_id = str(uuid.uuid4())

        try:
            self.tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
            self.model = AutoModelForCausalLM.from_pretrained("distilgpt2")
            self.generator = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
            logger.info("Initialized text generation pipeline with distilgpt2")
        except Exception as e:
            logger.error(f"Failed to initialize text generation: {e}")
            self.generator = None

        try:
            self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
            logger.info("Initialized summarization pipeline")
        except Exception as e:
            logger.error(f"Failed to initialize summarizer: {e}")
            self.summarizer = None

    def register_agent(self, agent_id: str, agent_instance) -> None:
        self.agents[agent_id] = agent_instance
        logger.info(f"Registered agent: {agent_id}")

    def get_agent(self, agent_id: str):
        return self.agents.get(agent_id)

    def broadcast(self, message: Dict[str, Any], exclude: Optional[List[str]] = None) -> Dict[str, List[Dict]]:
        exclude = exclude or []
        responses = {}
        for agent_id, agent in self.agents.items():
            if agent_id not in exclude:
                try:
                    response = agent.process_message(message)
                    responses[agent_id] = response
                except Exception as e:
                    logger.error(f"Error in agent {agent_id}: {e}")
                    responses[agent_id] = {"error": str(e)}
        return responses

    def chain_of_thought(self, initial_task: str, agent_sequence: List[str]) -> Dict[str, Any]:
        results = {"final_output": None, "chain_outputs": [], "errors": []}
        current_input = initial_task
        for agent_id in agent_sequence:
            agent = self.get_agent(agent_id)
            if not agent:
                error = f"Agent {agent_id} not found"
                results["errors"].append(error)
                logger.error(error)
                continue
            try:
                output = agent.process_task(current_input)
                step_result = {"agent": agent_id, "input": current_input, "output": output}
                results["chain_outputs"].append(step_result)
                if isinstance(output, dict) and "text" in output:
                    current_input = output["text"]
                elif isinstance(output, str):
                    current_input = output
                else:
                    current_input = f"Result from {agent_id}: {type(output).__name__} object"
            except Exception as e:
                error = f"Error in agent {agent_id}: {str(e)}\n{traceback.format_exc()}"
                results["errors"].append(error)
                logger.error(error)
        if results["chain_outputs"]:
            last_output = results["chain_outputs"][-1]["output"]
            results["final_output"] = last_output if isinstance(last_output, dict) else {"text": str(last_output)}
        return results

# ---------------------------
# Intelligent Agent Base Class
# ---------------------------
class IntelligentAgent:
    def __init__(self, agent_id: str, hub: AgentHub):
        self.agent_id = agent_id
        self.hub = hub
        self.memory = AgentMemory()
        logger.info(f"Initialized agent: {agent_id}")

    def process_task(self, task: Any) -> Any:
        raise NotImplementedError("Subclasses must implement process_task")

    def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
        logger.info(f"Agent {self.agent_id} received message: {message}")
        self.memory.add_short_term({"timestamp": pd.Timestamp.now(), "message": message})
        return {"sender": self.agent_id, "received": True, "action": "acknowledge"}

    def request_assistance(self, target_agent_id: str, data: Dict[str, Any]) -> Dict[str, Any]:
        target_agent = self.hub.get_agent(target_agent_id)
        if not target_agent:
            logger.error(f"Agent {self.agent_id} requested unknown agent: {target_agent_id}")
            return {"error": f"Agent {target_agent_id} not found"}
        request = {"sender": self.agent_id, "type": "assistance_request", "data": data}
        return target_agent.process_message(request)

    def evaluate_result(self, result: Any) -> Dict[str, Any]:
        success = result is not None
        confidence = 0.8 if success else 0.2
        return {"success": success, "confidence": confidence, "timestamp": pd.Timestamp.now().isoformat()}

# ---------------------------
# Specialized Agent Implementations
# ---------------------------
class WebResearchAgent(IntelligentAgent):
    def __init__(self, hub: AgentHub):
        super().__init__("web_research", hub)

    def process_task(self, task: str) -> Dict[str, Any]:
        logger.info(f"WebResearchAgent processing: {task}")
        search_term = task
        if self.hub.summarizer:
            try:
                keywords = task.split()
                if len(keywords) > 5:
                    summary = self.hub.summarizer(task, max_length=20, min_length=5, do_sample=False)
                    search_term = summary[0]['summary_text']
                else:
                    search_term = task
            except Exception as e:
                logger.error(f"Summarization error in WebResearchAgent: {e}")
                search_term = task
        try:
            search_results = wikipedia.search(search_term)
            if not search_results:
                result = {"text": f"No Wikipedia pages found for '{task}'."}
                self.memory.add_short_term({"task": task, "result": result, "success": False})
                return result
            page_title = None
            summary_text = None
            error_details = []
            for candidate in search_results[:3]:
                try:
                    summary_text = wikipedia.summary(candidate, sentences=5)
                    page_title = candidate
                    break
                except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError) as e:
                    error_details.append(f"{candidate}: {str(e)}")
                    continue
            if not summary_text:
                result = {"text": f"Failed to get Wikipedia summary for '{task}'. Errors: {'; '.join(error_details)}", "search_results": search_results}
                self.memory.add_short_term({"task": task, "result": result, "success": False})
                return result
            self.memory.add_long_term(f"research:{search_term}", {"page_title": page_title, "summary": summary_text, "timestamp": pd.Timestamp.now().isoformat()})
            result = {"text": f"Research on '{page_title}':\n{summary_text}", "page_title": page_title, "related_topics": search_results[:5], "source": "Wikipedia"}
            self.memory.add_short_term({"task": task, "result": result, "success": True})
            return result
        except Exception as e:
            error_msg = f"Error in web research: {str(e)}"
            logger.error(error_msg)
            result = {"text": error_msg, "error": str(e)}
            self.memory.add_short_term({"task": task, "result": result, "success": False})
            return result

class WebScraperAgent(IntelligentAgent):
    def __init__(self, hub: AgentHub):
        super().__init__("web_scraper", hub)

    def process_task(self, task: str) -> Dict[str, Any]:
        logger.info(f"WebScraperAgent processing URL: {task}")
        if not task.startswith(('http://', 'https://')):
            return {"text": "Invalid URL format. Please provide a URL starting with http:// or https://"}
        try:
            headers = {'User-Agent': 'Mozilla/5.0'}
            response = requests.get(task, headers=headers, timeout=10)
            if response.status_code != 200:
                result = {"text": f"Error: received status code {response.status_code} from {task}"}
                self.memory.add_short_term({"url": task, "result": result, "success": False})
                return result
            soup = BeautifulSoup(response.text, 'html.parser')
            title = soup.title.string.strip() if soup.title and soup.title.string else "No title found"
            main_content = soup.find('main') or soup.find(id='content') or soup.find(class_='content')
            paras = main_content.find_all('p') if main_content else soup.find_all('p')
            content = "\n".join([p.get_text().strip() for p in paras if len(p.get_text().strip()) > 50])
            if len(content) > 2000 and self.hub.summarizer:
                chunks = [content[i:i+1000] for i in range(0, len(content), 1000)]
                summarized_chunks = []
                for chunk in chunks:
                    summary = self.hub.summarizer(chunk, max_length=100, min_length=30, do_sample=False)
                    summarized_chunks.append(summary[0]['summary_text'])
                content = "\n".join(summarized_chunks)
            elif len(content) > 2000:
                content = content[:2000] + "... (content truncated)"
            links = []
            for a in soup.find_all('a', href=True):
                href = a['href']
                if href.startswith('http') and len(links) < 5:
                    links.append({"url": href, "text": a.get_text().strip() or href})
            result = {"text": f"Content from {task}:\n\nTitle: {title}\n\n{content}", "title": title, "raw_content": content, "links": links, "source_url": task}
            self.memory.add_short_term({"url": task, "result": result, "success": True})
            self.memory.add_long_term(f"scraped:{task}", {"title": title, "content_preview": content[:200], "timestamp": pd.Timestamp.now().isoformat()})
            return result
        except requests.RequestException as e:
            error_msg = f"Request error for {task}: {str(e)}"
            logger.error(error_msg)
            return {"text": error_msg, "error": str(e)}
        except Exception as e:
            error_msg = f"Error scraping {task}: {str(e)}"
            logger.error(error_msg)
            return {"text": error_msg, "error": str(e)}

class TextProcessingAgent(IntelligentAgent):
    def __init__(self, hub: AgentHub):
        super().__init__("text_processing", hub)

    def process_task(self, task: str) -> Dict[str, Any]:
        logger.info(f"TextProcessingAgent processing text ({len(task)} chars)")
        if not task or len(task) < 10:
            return {"text": "Text too short to process meaningfully."}
        results = {}
        words = task.split()
        sentences = task.split('. ')
        results["statistics"] = {
            "character_count": len(task),
            "word_count": len(words),
            "estimated_sentences": len(sentences),
            "average_word_length": sum(len(word) for word in words) / len(words) if words else 0
        }
        if len(task) > 5000:
            chunk_size = 500
            chunking_strategy = "character_blocks"
        elif len(words) > 200:
            chunk_size = 50
            chunking_strategy = "word_blocks"
        else:
            chunk_size = 5
            chunking_strategy = "sentence_blocks"
        if chunking_strategy == "character_blocks":
            chunks = [task[i:i+chunk_size] for i in range(0, len(task), chunk_size)]
        elif chunking_strategy == "word_blocks":
            chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
        else:
            chunks = ['. '.join(sentences[i:i+chunk_size]) + '.' for i in range(0, len(sentences), chunk_size)]
        results["chunks"] = chunks
        results["chunking_strategy"] = chunking_strategy
        if self.hub.summarizer and len(task) > 200:
            try:
                task_for_summary = task[:1000] if len(task) > 1000 else task
                summary = self.hub.summarizer(task_for_summary, max_length=100, min_length=30, do_sample=False)
                results["summary"] = summary[0]['summary_text']
            except Exception as e:
                logger.error(f"Summarization error: {e}")
                results["summary_error"] = str(e)
        stop_words = set(['the', 'a', 'an', 'and', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
        word_freq = {}
        for word in words:
            w = word.lower().strip('.,!?:;()-"\'')
            if w and w not in stop_words and len(w) > 1:
                word_freq[w] = word_freq.get(w, 0) + 1
        results["frequent_words"] = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:10]
        positive_words = set(['good', 'great', 'excellent', 'positive', 'happy', 'best', 'better', 'success'])
        negative_words = set(['bad', 'worst', 'terrible', 'negative', 'sad', 'problem', 'fail', 'issue'])
        pos_count = sum(1 for word in words if word.lower().strip('.,!?:;()-"\'') in positive_words)
        neg_count = sum(1 for word in words if word.lower().strip('.,!?:;()-"\'') in negative_words)
        sentiment = "possibly positive" if pos_count > neg_count and pos_count > 2 else ("possibly negative" if neg_count > pos_count and neg_count > 2 else "neutral or mixed")
        results["basic_sentiment"] = {"assessment": sentiment, "positive_word_count": pos_count, "negative_word_count": neg_count}
        self.memory.add_short_term({"task_preview": task[:100] + "..." if len(task) > 100 else task, "word_count": results["statistics"]["word_count"], "result": results})
        text_response = (
            f"Text Analysis Results:\n- {results['statistics']['word_count']} words, {results['statistics']['character_count']} characters\n"
            f"- Split into {len(chunks)} chunks using {chunking_strategy}\n"
        )
        if "summary" in results:
            text_response += f"\nSummary:\n{results['summary']}\n"
        if results["frequent_words"]:
            text_response += "\nMost frequent words:\n"
            for word, count in results["frequent_words"][:5]:
                text_response += f"- {word}: {count} occurrences\n"
        text_response += f"\nOverall tone appears {results['basic_sentiment']['assessment']}"
        results["text"] = text_response
        return results

class DataAnalysisAgent(IntelligentAgent):
    def __init__(self, hub: AgentHub):
        super().__init__("data_analysis", hub)

    def process_task(self, task: str) -> Dict[str, Any]:
        logger.info(f"DataAnalysisAgent processing: {task}")
        file_path = None
        if "analyze" in task.lower() and ".csv" in task.lower():
            for word in task.split():
                if word.endswith('.csv'):
                    file_path = word
                    break
        if not file_path or not Path(file_path).exists():
            logger.info("No specific CSV file mentioned or file not found, creating sample data")
            if "time series" in task.lower():
                dates = pd.date_range(start='2023-01-01', periods=30, freq='D')
                df = pd.DataFrame({'date': dates, 'value': np.random.normal(100, 15, 30), 'trend': np.linspace(0, 20, 30) + np.random.normal(0, 2, 30)})
                file_path = "sample_timeseries.csv"
            elif "sales" in task.lower():
                products = ['ProductA', 'ProductB', 'ProductC', 'ProductD']
                regions = ['North', 'South', 'East', 'West']
                dates = pd.date_range(start='2023-01-01', periods=50, freq='D')
                data = []
                for _ in range(200):
                    data.append({'date': np.random.choice(dates), 'product': np.random.choice(products), 'region': np.random.choice(regions), 'units_sold': np.random.randint(10, 100), 'revenue': np.random.uniform(100, 1000)})
                df = pd.DataFrame(data)
                file_path = "sample_sales.csv"
            else:
                df = pd.DataFrame({
                    'A': np.random.normal(0, 1, 100),
                    'B': np.random.normal(5, 2, 100),
                    'C': np.random.uniform(-10, 10, 100),
                    'D': np.random.randint(0, 5, 100),
                    'label': np.random.choice(['X', 'Y', 'Z'], 100)
                })
                file_path = "sample_data.csv"
            df.to_csv(file_path, index=False)
            logger.info(f"Created sample data file: {file_path}")
        else:
            try:
                df = pd.read_csv(file_path)
                logger.info(f"Loaded existing file: {file_path}")
            except Exception as e:
                error_msg = f"Error loading CSV file {file_path}: {str(e)}"
                logger.error(error_msg)
                return {"text": error_msg, "error": str(e)}
        analysis_results = {}
        try:
            numeric_cols = df.select_dtypes(include=[np.number]).columns
            analysis_results["summary_stats"] = df[numeric_cols].describe().to_dict()
            categorical_cols = df.select_dtypes(exclude=[np.number]).columns
            for col in categorical_cols:
                if df[col].nunique() < 10:
                    analysis_results[f"{col}_distribution"] = df[col].value_counts().to_dict()
        except Exception as e:
            logger.error(f"Error in basic statistics: {e}")
            analysis_results["stats_error"] = str(e)
        try:
            missing_values = df.isnull().sum().to_dict()
            analysis_results["missing_values"] = {k: v for k, v in missing_values.items() if v > 0}
        except Exception as e:
            logger.error(f"Error in missing values analysis: {e}")
            analysis_results["missing_values_error"] = str(e)
        try:
            if len(numeric_cols) > 1:
                analysis_results["correlations"] = df[numeric_cols].corr().to_dict()
        except Exception as e:
            logger.error(f"Error in correlation analysis: {e}")
            analysis_results["correlation_error"] = str(e)
        try:
            plt.figure(figsize=(10, 8))
            categorical_cols = df.select_dtypes(exclude=[np.number]).columns
            if len(numeric_cols) >= 2:
                plt.subplot(2, 1, 1)
                x_col, y_col = numeric_cols[0], numeric_cols[1]
                sample_df = df.sample(1000) if len(df) > 1000 else df
                if len(categorical_cols) > 0 and df[categorical_cols[0]].nunique() < 10:
                    cat_col = categorical_cols[0]
                    for category, group in sample_df.groupby(cat_col):
                        plt.scatter(group[x_col], group[y_col], label=category, alpha=0.6)
                    plt.legend()
                else:
                    plt.scatter(sample_df[x_col], sample_df[y_col], alpha=0.6)
                plt.xlabel(x_col)
                plt.ylabel(y_col)
                plt.title(f"Scatter Plot: {x_col} vs {y_col}")
                plt.subplot(2, 1, 2)
                if 'date' in df.columns or any('time' in col.lower() for col in df.columns):
                    date_col = [col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()][0]
                    value_col = numeric_cols[0] if numeric_cols[0] != date_col else numeric_cols[1]
                    if not pd.api.types.is_datetime64_dtype(df[date_col]):
                        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
                    temp_df = df.dropna(subset=[date_col, value_col]).sort_values(date_col)
                    plt.plot(temp_df[date_col], temp_df[value_col])
                    plt.xlabel(date_col)
                    plt.ylabel(value_col)
                    plt.title(f"Time Series: {value_col} over {date_col}")
                    plt.xticks(rotation=45)
                else:
                    plt.hist(df[numeric_cols[0]].dropna(), bins=20, alpha=0.7)
                    plt.xlabel(numeric_cols[0])
                    plt.ylabel('Frequency')
                    plt.title(f"Distribution of {numeric_cols[0]}")
            else:
                if len(categorical_cols) > 0:
                    cat_col = categorical_cols[0]
                    df[cat_col].value_counts().plot(kind='bar')
                    plt.xlabel(cat_col)
                    plt.ylabel('Count')
                    plt.title(f"Counts by {cat_col}")
                    plt.xticks(rotation=45)
                else:
                    plt.hist(df[numeric_cols[0]].dropna(), bins=20)
                    plt.xlabel(numeric_cols[0])
                    plt.ylabel('Frequency')
                    plt.title(f"Distribution of {numeric_cols[0]}")
            plt.tight_layout()
            viz_path = f"{Path(file_path).stem}_viz.png"
            plt.savefig(viz_path)
            plt.close()
            analysis_results["visualization_path"] = viz_path
            analysis_results["visualization_created"] = True
            logger.info(f"Created visualization: {viz_path}")
        except Exception as e:
            logger.error(f"Error creating visualization: {e}")
            analysis_results["visualization_error"] = str(e)
            analysis_results["visualization_created"] = False
        insights = []
        try:
            for col in numeric_cols:
                q1 = df[col].quantile(0.25)
                q3 = df[col].quantile(0.75)
                iqr = q3 - q1
                outlier_count = ((df[col] < (q1 - 1.5 * iqr)) | (df[col] > (q3 + 1.5 * iqr))).sum()
                if outlier_count > 0:
                    insights.append(f"Found {outlier_count} potential outliers in '{col}'")
            if "correlations" in analysis_results:
                for col1, corr_dict in analysis_results["correlations"].items():
                    for col2, corr_val in corr_dict.items():
                        if col1 != col2 and abs(corr_val) > 0.7:
                            insights.append(f"Strong correlation ({corr_val:.2f}) between '{col1}' and '{col2}'")
            for col in categorical_cols:
                if df[col].nunique() < 10:
                    value_counts = df[col].value_counts()
                    most_common = value_counts.idxmax()
                    most_common_pct = value_counts.max() / value_counts.sum() * 100
                    if most_common_pct > 80:
                        insights.append(f"Imbalanced category in '{col}': '{most_common}' accounts for {most_common_pct:.1f}% of data")
            analysis_results["insights"] = insights
        except Exception as e:
            logger.error(f"Error extracting insights: {e}")
            analysis_results["insights_error"] = str(e)
        self.memory.add_short_term({"file": file_path, "columns": list(df.columns), "row_count": len(df), "analysis": analysis_results})
        if "sample" in file_path:
            self.memory.add_long_term(f"analysis:{file_path}", {"file": file_path, "type": "generated", "columns": list(df.columns), "row_count": len(df), "timestamp": pd.Timestamp.now().isoformat()})
        column_list = ", ".join(df.columns[:5]) + (", ..." if len(df.columns) > 5 else "")
        text_response = (
            f"Data Analysis Results for {file_path}\n- Dataset: {len(df)} rows x {len(df.columns)} columns ({column_list})\n"
        )
        if "missing_values" in analysis_results and analysis_results["missing_values"]:
            text_response += f"- Missing values found in {len(analysis_results['missing_values'])} columns\n"
        if insights:
            text_response += "\nKey Insights:\n"
            for i, insight in enumerate(insights[:5], 1):
                text_response += f"{i}. {insight}\n"
            if len(insights) > 5:
                text_response += f"... and {len(insights) - 5} more insights\n"
        text_response += f"\nVisualization saved to {viz_path}" if analysis_results.get("visualization_created") else "\nNo visualization created"
        analysis_results["text"] = text_response
        analysis_results["dataframe_shape"] = df.shape
        analysis_results["data_preview"] = df.head(5).to_dict()
        return analysis_results

class CodingAssistantAgent(IntelligentAgent):
    def __init__(self, hub: AgentHub):
        super().__init__("coding_assistant", hub)
        self.code_snippets = {
            "file_operations": {
                "read_file": '''
def read_file(file_path):
    """Read a file and return its contents"""
    with open(file_path, 'r') as file:
        return file.read()
''',
                "write_file": '''
def write_file(file_path, content):
    """Write content to a file"""
    with open(file_path, 'w') as file:
        file.write(content)
    return True
'''
            },
            "data_processing": {
                "pandas_read_csv": '''
import pandas as pd
def load_csv(file_path):
    """Load a CSV file into a Pandas DataFrame"""
    return pd.read_csv(file_path)
''',
                "pandas_basic_stats": '''
def get_basic_stats(df):
    """Get basic statistics for a DataFrame"""
    numeric_stats = df.describe()
    categorical_columns = df.select_dtypes(include=['object']).columns
    categorical_stats = {col: df[col].value_counts().to_dict() for col in categorical_columns}
    return {
        'numeric': numeric_stats.to_dict(),
        'categorical': categorical_stats
    }
'''
            },
            "visualization": {
                "matplotlib_basic_plot": '''
import matplotlib.pyplot as plt
def create_basic_plot(data, x_col, y_col, title="Plot", kind="line"):
    """Create a basic plot using matplotlib"""
    plt.figure(figsize=(10, 6))
    if kind == "line":
        plt.plot(data[x_col], data[y_col])
    elif kind == "scatter":
        plt.scatter(data[x_col], data[y_col])
    elif kind == "bar":
        plt.bar(data[x_col], data[y_col])
    plt.title(title)
    plt.xlabel(x_col)
    plt.ylabel(y_col)
    plt.tight_layout()
    plt.savefig(f"{title.lower().replace(' ', '_')}.png")
    plt.close()
    return f"{title.lower().replace(' ', '_')}.png"
'''
            },
            "web_scraping": {
                "requests_beautifulsoup": '''
import requests
from bs4 import BeautifulSoup
def scrape_webpage(url):
    """Scrape a webpage and extract text from paragraphs"""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        text = [p.get_text() for p in paragraphs]
        return {
            'title': soup.title.string if soup.title else "No title",
            'text': text,
            'url': url
        }
    except Exception as e:
        return {'error': str(e), 'url': url}
'''
            },
            "nlp": {
                "basic_text_analysis": '''
from collections import Counter
import re
def analyze_text(text):
    """Perform basic text analysis"""
    text = text.lower()
    words = re.findall(r'\w+', text)
    word_count = len(words)
    unique_words = len(set(words))
    stop_words = {'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'and', 'or'}
    word_freq = Counter([w for w in words if w not in stop_words and len(w) > 1])
    return {
        'word_count': word_count,
        'unique_words': unique_words,
        'avg_word_length': sum(len(w) for w in words) / word_count if word_count else 0,
        'most_common': word_freq.most_common(10)
    }
'''
            },
            "machine_learning": {
                "basic_classifier": '''
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
def train_basic_classifier(X, y, test_size=0.2, random_state=42):
    """Train a basic RandomForest classifier"""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    model = RandomForestClassifier(n_estimators=100, random_state=random_state)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    return {
        'model': model,
        'accuracy': report['accuracy'],
        'classification_report': report,
        'feature_importance': dict(zip(range(X.shape[1]), model.feature_importances_))
    }
'''
            }
        }

    def process_task(self, task: str) -> Dict[str, Any]:
        logger.info(f"CodingAssistantAgent processing: {task}")
        task_lower = task.lower()
        keyword_mapping = {
            "file": "file_operations",
            "read file": "file_operations",
            "write file": "file_operations",
            "csv": "data_processing",
            "data": "data_processing",
            "pandas": "data_processing",
            "dataframe": "data_processing",
            "plot": "visualization",
            "chart": "visualization",
            "graph": "visualization",
            "visualize": "visualization",
            "matplotlib": "visualization",
            "scrape": "web_scraping",
            "web": "web_scraping",
            "html": "web_scraping",
            "beautifulsoup": "web_scraping",
            "text analysis": "nlp",
            "nlp": "nlp",
            "natural language": "nlp",
            "word count": "nlp",
            "text processing": "nlp",
            "machine learning": "machine_learning",
            "ml": "machine_learning",
            "model": "machine_learning",
            "predict": "machine_learning",
            "classifier": "machine_learning"
        }
        code_category = None
        function_name = None
        for keyword, category in keyword_mapping.items():
            if keyword in task_lower:
                code_category = category
                for func_name in self.code_snippets.get(category, {}):
                    natural_func = func_name.replace('_', ' ')
                    if natural_func in task_lower:
                        function_name = func_name
                        break
                break
        if not code_category:
            if any(word in task_lower for word in ["add", "sum", "calculate", "compute"]):
                code_category = "data_processing"
            elif any(word in task_lower for word in ["show", "display", "generate"]):
                code_category = "visualization"
        if code_category and not function_name and self.code_snippets.get(code_category):
            function_name = next(iter(self.code_snippets[code_category]))
        if not code_category:
            function_parts = [word for word in task_lower.split() if word not in ["a", "the", "an", "to", "for", "function", "code", "create", "make"]]
            func_name = "_".join(function_parts[:2]) if len(function_parts) >= 2 else "custom_function"
            custom_code = f"""
def {func_name}(input_data):
    # Custom function based on your request: '{task}'
    result = None
    # TODO: Implement specific logic based on requirements
    if isinstance(input_data, list):
        result = len(input_data)
    elif isinstance(input_data, str):
        result = input_data.upper()
    elif isinstance(input_data, (int, float)):
        result = input_data * 2
    return {{
        'input': input_data,
        'result': result,
        'status': 'processed'
    }}
"""
            result = {
                "text": f"I've created a custom function template based on your request:\n\n```python\n{custom_code}\n```\n\nThis is a starting point you can customize further.",
                "code": custom_code,
                "language": "python",
                "type": "custom"
            }
        else:
            code_snippet = self.code_snippets[code_category][function_name]
            result = {
                "text": f"Here's a {code_category.replace('_', ' ')} function for {function_name.replace('_', ' ')}:\n\n```python\n{code_snippet}\n```\n\nYou can customize this code.",
                "code": code_snippet,
                "language": "python",
                "category": code_category,
                "function": function_name
            }
        self.memory.add_short_term({"task": task, "code_category": code_category, "function_provided": function_name, "timestamp": pd.Timestamp.now().isoformat()})
        return result

class ImageProcessingAgent(IntelligentAgent):
    def __init__(self, hub: AgentHub):
        super().__init__("image_processing", hub)

    def process_task(self, task: Any) -> Dict[str, Any]:
        logger.info("ImageProcessingAgent processing task")
        image = None
        task_type = None
        if isinstance(task, Image.Image):
            image = task
            task_type = "direct_image"
        elif isinstance(task, str):
            if Path(task).exists() and Path(task).suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
                try:
                    image = Image.open(task)
                    task_type = "image_path"
                except Exception as e:
                    return {"text": f"Error loading image from {task}: {str(e)}", "error": str(e)}
            else:
                task_type = "text_instruction"
        elif isinstance(task, dict) and 'image' in task:
            if isinstance(task['image'], Image.Image):
                image = task['image']
            elif isinstance(task['image'], str) and Path(task['image']).exists():
                try:
                    image = Image.open(task['image'])
                except Exception as e:
                    return {"text": f"Error loading image from {task['image']}: {str(e)}", "error": str(e)}
            task_type = "dict_with_image"
        if task_type == "text_instruction" and not image:
            return {"text": "Please provide an image to process along with instructions."}
        if not image:
            return {"text": "No valid image provided for processing."}
        processing_type = "edge_detection"
        if task_type in ["text_instruction", "dict_with_image"] and isinstance(task, dict):
            instruction = task.get('instruction', '').lower()
            if 'blur' in instruction or 'smooth' in instruction:
                processing_type = "blur"
            elif 'edge' in instruction or 'contour' in instruction:
                processing_type = "edge_detection"
            elif 'gray' in instruction or 'greyscale' in instruction or 'black and white' in instruction:
                processing_type = "grayscale"
            elif 'bright' in instruction or 'contrast' in instruction:
                processing_type = "enhance"
            elif 'resize' in instruction or 'scale' in instruction:
                processing_type = "resize"
        try:
            img_array = np.array(image)
            if img_array.ndim == 3 and img_array.shape[-1] == 4:
                img_cv = cv2.cvtColor(img_array, cv2.COLOR_RGBA2BGR)
            else:
                img_cv = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
            processed_img = None
            processing_details = {"original_size": image.size}
            if processing_type == "edge_detection":
                gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
                edges = cv2.Canny(gray, 100, 200)
                processed_img = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
                processing_details["processing"] = "Edge detection using Canny"
            elif processing_type == "blur":
                processed_img = cv2.GaussianBlur(img_cv, (7, 7), 0)
                processing_details["processing"] = "Gaussian Blur"
            elif processing_type == "grayscale":
                processed_img = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
                processed_img = cv2.cvtColor(processed_img, cv2.COLOR_GRAY2BGR)
                processing_details["processing"] = "Grayscale conversion"
            elif processing_type == "enhance":
                lab = cv2.cvtColor(img_cv, cv2.COLOR_BGR2LAB)
                l, a, b = cv2.split(lab)
                clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
                cl = clahe.apply(l)
                limg = cv2.merge((cl, a, b))
                processed_img = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
                processing_details["processing"] = "Contrast enhancement"
            elif processing_type == "resize":
                processed_img = cv2.resize(img_cv, (image.size[0]//2, image.size[1]//2))
                processing_details["processing"] = "Resized to half"
            else:
                processed_img = img_cv
                processing_details["processing"] = "No processing applied"
            processed_pil = Image.fromarray(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB))
            return {"text": f"Image processing completed with {processing_details['processing']}.", "image": processed_pil, "details": processing_details}
        except Exception as e:
            error_msg = f"Error processing image: {str(e)}\n{traceback.format_exc()}"
            logger.error(error_msg)
            return {"text": f"Error processing image: {str(e)}", "error": str(e)}

class FileManagementAgent(IntelligentAgent):
    def __init__(self, hub: AgentHub):
        super().__init__("file_management", hub)

    def process_task(self, task: str) -> Dict[str, Any]:
        logger.info(f"FileManagementAgent processing: {task}")
        task_lower = task.lower()
        if any(word in task_lower for word in ["create", "make", "generate", "write"]):
            operation = "create"
        elif any(word in task_lower for word in ["read", "open", "show", "display", "content"]):
            operation = "read"
        elif any(word in task_lower for word in ["list", "find", "directory", "folder", "files in"]):
            operation = "list"
        elif any(word in task_lower for word in ["delete", "remove"]):
            operation = "delete"
        else:
            operation = "unknown"
        filename = None
        file_extensions = ['.txt', '.json', '.csv', '.md', '.py', '.html', '.js', '.css']
        words = task.split()
        for word in words:
            for ext in file_extensions:
                if ext in word.lower():
                    filename = word.strip(':"\'.,;')
                    break
            if filename:
                break
        if not filename:
            file_keywords = ["file", "named", "called", "filename"]
            for i, word in enumerate(words):
                if word.lower() in file_keywords and i < len(words) - 1:
                    potential_name = words[i+1].strip(':"\'.,;')
                    if '.' not in potential_name:
                        if "json" in task_lower:
                            potential_name += ".json"
                        elif "csv" in task_lower:
                            potential_name += ".csv"
                        elif "python" in task_lower or "py" in task_lower:
                            potential_name += ".py"
                        else:
                            potential_name += ".txt"
                    filename = potential_name
                    break
        if not filename:
            if "json" in task_lower:
                filename = f"data_{uuid.uuid4().hex[:6]}.json"
            elif "csv" in task_lower:
                filename = f"data_{uuid.uuid4().hex[:6]}.csv"
            elif "python" in task_lower or "py" in task_lower:
                filename = f"script_{uuid.uuid4().hex[:6]}.py"
            elif "log" in task_lower:
                filename = f"log_{uuid.uuid4().hex[:6]}.txt"
            else:
                filename = f"file_{uuid.uuid4().hex[:6]}.txt"
        result = {}
        if operation == "create":
            if filename.endswith('.json'):
                content = json.dumps({
                    "name": "Sample Data",
                    "description": task,
                    "created": pd.Timestamp.now().isoformat(),
                    "values": [1, 2, 3, 4, 5],
                    "metadata": {"source": "FileManagementAgent", "version": "1.0"}
                }, indent=2)
            elif filename.endswith('.csv'):
                content = "id,name,value,timestamp\n"
                for i in range(5):
                    content += f"{i+1},Item{i+1},{np.random.randint(1, 100)},{pd.Timestamp.now().isoformat()}\n"
            elif filename.endswith('.py'):
                content = f"""# Generated Python Script: {filename}
# Created: {pd.Timestamp.now().isoformat()}
# Description: {task}

def main():
    print("Hello from the FileManagementAgent!")
    data = [1, 2, 3, 4, 5]
    result = sum(data)
    print(f"Sample calculation: sum(data) = {{result}}")
    return result

if __name__ == "__main__":
    main()
"""
            else:
                content = f"File created by FileManagementAgent\nCreated: {pd.Timestamp.now().isoformat()}\nBased on request: {task}\n\nThis is sample content."
            try:
                with open(filename, 'w', encoding='utf-8') as f:
                    f.write(content)
                result = {"text": f"Successfully created file: {filename}", "operation": "create", "filename": filename, "size": len(content), "preview": content[:200] + "..." if len(content) > 200 else content}
                self.memory.add_short_term({"operation": "create", "filename": filename, "timestamp": pd.Timestamp.now().isoformat()})
                self.memory.add_long_term(f"file:{filename}", {"operation": "create", "type": Path(filename).suffix, "timestamp": pd.Timestamp.now().isoformat()})
            except Exception as e:
                error_msg = f"Error creating file {filename}: {str(e)}"
                logger.error(error_msg)
                result = {"text": error_msg, "error": str(e)}
        elif operation == "read":
            if not filename:
                result = {"text": "Please specify a filename to read."}
            elif not Path(filename).exists():
                result = {"text": f"File '{filename}' not found."}
            else:
                try:
                    with open(filename, 'r', encoding='utf-8') as f:
                        content = f.read()
                    result = {"text": f"Content of {filename}:\n\n{content}", "operation": "read", "filename": filename, "content": content, "size": len(content)}
                    self.memory.add_short_term({"operation": "read", "filename": filename, "timestamp": pd.Timestamp.now().isoformat()})
                except Exception as e:
                    error_msg = f"Error reading file {filename}: {str(e)}"
                    logger.error(error_msg)
                    result = {"text": error_msg, "error": str(e)}
        elif operation == "list":
            try:
                directory = "."
                for term in ["directory", "folder", "in"]:
                    if term in task_lower:
                        parts = task_lower.split(term)
                        if len(parts) > 1:
                            potential_dir = parts[1].strip().split()[0].strip(':"\'.,;')
                            if Path(potential_dir).exists() and Path(potential_dir).is_dir():
                                directory = potential_dir
                extension_filter = None
                for ext in file_extensions:
                    if ext in task_lower:
                        extension_filter = ext
                        break
                files = list(Path(directory).glob('*' + (extension_filter or '')))
                file_groups = {}
                for file in files:
                    file_groups.setdefault(file.suffix, []).append({
                        "name": file.name,
                        "size": file.stat().st_size,
                        "modified": pd.Timestamp(file.stat().st_mtime, unit='s').isoformat()
                    })
                response_text = f"Found {len(files)} files" + (f" with extension {extension_filter}" if extension_filter else "") + f" in {directory}:\n\n"
                for ext, group in file_groups.items():
                    response_text += f"{ext} files ({len(group)}):\n"
                    for file_info in sorted(group, key=lambda x: x["name"]):
                        size_kb = file_info["size"] / 1024
                        response_text += f"- {file_info['name']} ({size_kb:.1f} KB, modified: {file_info['modified']})\n"
                    response_text += "\n"
                result = {"text": response_text, "operation": "list", "directory": directory, "file_count": len(files), "files": file_groups}
                self.memory.add_short_term({"operation": "list", "directory": directory, "file_count": len(files), "timestamp": pd.Timestamp.now().isoformat()})
            except Exception as e:
                error_msg = f"Error listing files: {str(e)}"
                logger.error(error_msg)
                result = {"text": error_msg, "error": str(e)}
        elif operation == "delete":
            if not filename:
                result = {"text": "Please specify a filename to delete."}
            elif not Path(filename).exists():
                result = {"text": f"File '{filename}' not found."}
            else:
                try:
                    os.remove(filename)
                    result = {"text": f"Successfully deleted file: {filename}", "operation": "delete", "filename": filename}
                    self.memory.add_short_term({"operation": "delete", "filename": filename, "timestamp": pd.Timestamp.now().isoformat()})
                    self.memory.add_long_term(f"file:{filename}", {"operation": "delete", "timestamp": pd.Timestamp.now().isoformat()})
                except Exception as e:
                    error_msg = f"Error deleting file {filename}: {str(e)}"
                    logger.error(error_msg)
                    result = {"text": error_msg, "error": str(e)}
        else:
            result = {"text": f"Unknown operation requested in task: {task}"}
        return result

# ---------------------------
# Gradio Interface Setup
# ---------------------------
def create_agent_hub():
    hub = AgentHub()
    hub.register_agent("web_research", WebResearchAgent(hub))
    hub.register_agent("web_scraper", WebScraperAgent(hub))
    hub.register_agent("text_processing", TextProcessingAgent(hub))
    hub.register_agent("data_analysis", DataAnalysisAgent(hub))
    hub.register_agent("coding_assistant", CodingAssistantAgent(hub))
    hub.register_agent("image_processing", ImageProcessingAgent(hub))
    hub.register_agent("file_management", FileManagementAgent(hub))
    return hub

def create_gradio_interface():
    hub = create_agent_hub()
    def process_request(request_type, input_data, extra_data=""):
        try:
            if request_type == "chain":
                agent_sequence = [agent.strip() for agent in extra_data.split(",") if agent.strip()]
                return hub.chain_of_thought(input_data, agent_sequence)
            else:
                agent = hub.get_agent(request_type)
                if not agent:
                    return {"error": f"Unknown agent type: {request_type}"}
                return agent.process_task(input_data)
        except Exception as e:
            logger.error(f"Error processing request: {e}")
            return {"error": str(e)}
    with gr.Blocks(title="SmolAgents Toolbelt") as interface:
        gr.Markdown("# SmolAgents Toolbelt")
        gr.Markdown("A collection of specialized agents for various tasks with evolved logic :contentReference[oaicite:0]{index=0}.")
        with gr.Tabs():
            with gr.Tab("Single Agent"):
                agent_type = gr.Dropdown(
                    choices=["web_research", "web_scraper", "text_processing", "data_analysis", "coding_assistant", "image_processing", "file_management"],
                    label="Select Agent",
                    value="web_research"
                )
                with gr.Row():
                    input_text = gr.Textbox(label="Input", placeholder="Enter your request...")
                    extra_input = gr.Textbox(label="Extra (e.g., image path or additional info)", placeholder="Optional extra input...")
                output_text = gr.JSON(label="Output")
                process_btn = gr.Button("Process")
                process_btn.click(fn=process_request, inputs=[agent_type, input_text, extra_input], outputs=output_text)
            with gr.Tab("Chain of Thought"):
                chain_input = gr.Textbox(label="Input", placeholder="Enter your request for the chain...")
                chain_sequence = gr.Textbox(label="Agent Sequence", placeholder="Comma-separated agent names (e.g., text_processing,data_analysis)")
                chain_output = gr.JSON(label="Chain Output")
                chain_type = gr.State("chain")
                chain_btn = gr.Button("Process Chain")
                chain_btn.click(fn=process_request, inputs=[chain_type, chain_input, chain_sequence], outputs=chain_output)
            with gr.Tab("Help"):
                gr.Markdown("""
                ## Available Agents
                
                - **Web Research Agent**: Searches Wikipedia for information.
                - **Web Scraper Agent**: Scrapes content from provided URLs.
                - **Text Processing Agent**: Analyzes and processes text.
                - **Data Analysis Agent**: Performs data analysis and visualization.
                - **Coding Assistant Agent**: Generates code snippets.
                - **Image Processing Agent**: Processes images based on instructions.
                - **File Management Agent**: Handles file creation, reading, listing, and deletion.
                
                ### Usage
                1. Select an agent (or choose 'Chain of Thought' for a sequence).
                2. Enter your request.
                3. For chains, provide a comma-separated list of agent IDs.
                """)
    return interface

if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)