Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1 +1,1048 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
from pathlib import Path
|
5 |
+
import json
|
6 |
+
import io
|
7 |
+
import uuid
|
8 |
+
import traceback
|
9 |
+
from typing import Dict, List, Any, Tuple, Optional
|
10 |
+
from dataclasses import dataclass
|
11 |
+
|
12 |
+
# Set UTF-8 encoding for Windows
|
13 |
+
if sys.platform == 'win32':
|
14 |
+
os.environ["PYTHONIOENCODING"] = "utf-8"
|
15 |
+
|
16 |
+
import gradio as gr
|
17 |
+
import numpy as np
|
18 |
+
import pandas as pd
|
19 |
+
import matplotlib.pyplot as plt
|
20 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
21 |
+
from sklearn.datasets import load_iris
|
22 |
+
import cv2
|
23 |
+
from PIL import Image
|
24 |
+
|
25 |
+
# Additional libraries for web research & scraping
|
26 |
+
import wikipedia
|
27 |
+
import requests
|
28 |
+
from bs4 import BeautifulSoup
|
29 |
+
|
30 |
+
# Configure logging
|
31 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
32 |
+
logger = logging.getLogger(__name__)
|
33 |
+
|
34 |
+
# ---------------------------
|
35 |
+
# Agent Context & Memory System
|
36 |
+
# ---------------------------
|
37 |
+
@dataclass
|
38 |
+
class AgentMemory:
|
39 |
+
short_term: List[Dict[str, Any]] = None
|
40 |
+
long_term: Dict[str, Any] = None
|
41 |
+
|
42 |
+
def __post_init__(self):
|
43 |
+
if self.short_term is None:
|
44 |
+
self.short_term = []
|
45 |
+
if self.long_term is None:
|
46 |
+
self.long_term = {}
|
47 |
+
|
48 |
+
def add_short_term(self, data: Dict[str, Any]) -> None:
|
49 |
+
self.short_term.append(data)
|
50 |
+
if len(self.short_term) > 10:
|
51 |
+
self.short_term.pop(0)
|
52 |
+
|
53 |
+
def add_long_term(self, key: str, value: Any) -> None:
|
54 |
+
self.long_term[key] = value
|
55 |
+
|
56 |
+
def get_recent_context(self, n: int = 3) -> List[Dict[str, Any]]:
|
57 |
+
return self.short_term[-n:] if len(self.short_term) >= n else self.short_term
|
58 |
+
|
59 |
+
def search_long_term(self, query: str) -> List[Tuple[str, Any]]:
|
60 |
+
results = []
|
61 |
+
for key, value in self.long_term.items():
|
62 |
+
if query.lower() in key.lower():
|
63 |
+
results.append((key, value))
|
64 |
+
return results
|
65 |
+
|
66 |
+
# ---------------------------
|
67 |
+
# Agent Hub
|
68 |
+
# ---------------------------
|
69 |
+
class AgentHub:
|
70 |
+
def __init__(self):
|
71 |
+
self.agents = {}
|
72 |
+
self.global_memory = AgentMemory()
|
73 |
+
self.session_id = str(uuid.uuid4())
|
74 |
+
|
75 |
+
try:
|
76 |
+
self.tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
|
77 |
+
self.model = AutoModelForCausalLM.from_pretrained("distilgpt2")
|
78 |
+
self.generator = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
|
79 |
+
logger.info("Initialized text generation pipeline with distilgpt2")
|
80 |
+
except Exception as e:
|
81 |
+
logger.error(f"Failed to initialize text generation: {e}")
|
82 |
+
self.generator = None
|
83 |
+
|
84 |
+
try:
|
85 |
+
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
86 |
+
logger.info("Initialized summarization pipeline")
|
87 |
+
except Exception as e:
|
88 |
+
logger.error(f"Failed to initialize summarizer: {e}")
|
89 |
+
self.summarizer = None
|
90 |
+
|
91 |
+
def register_agent(self, agent_id: str, agent_instance) -> None:
|
92 |
+
self.agents[agent_id] = agent_instance
|
93 |
+
logger.info(f"Registered agent: {agent_id}")
|
94 |
+
|
95 |
+
def get_agent(self, agent_id: str):
|
96 |
+
return self.agents.get(agent_id)
|
97 |
+
|
98 |
+
def broadcast(self, message: Dict[str, Any], exclude: Optional[List[str]] = None) -> Dict[str, List[Dict]]:
|
99 |
+
exclude = exclude or []
|
100 |
+
responses = {}
|
101 |
+
for agent_id, agent in self.agents.items():
|
102 |
+
if agent_id not in exclude:
|
103 |
+
try:
|
104 |
+
response = agent.process_message(message)
|
105 |
+
responses[agent_id] = response
|
106 |
+
except Exception as e:
|
107 |
+
logger.error(f"Error in agent {agent_id}: {e}")
|
108 |
+
responses[agent_id] = {"error": str(e)}
|
109 |
+
return responses
|
110 |
+
|
111 |
+
def chain_of_thought(self, initial_task: str, agent_sequence: List[str]) -> Dict[str, Any]:
|
112 |
+
results = {"final_output": None, "chain_outputs": [], "errors": []}
|
113 |
+
current_input = initial_task
|
114 |
+
for agent_id in agent_sequence:
|
115 |
+
agent = self.get_agent(agent_id)
|
116 |
+
if not agent:
|
117 |
+
error = f"Agent {agent_id} not found"
|
118 |
+
results["errors"].append(error)
|
119 |
+
logger.error(error)
|
120 |
+
continue
|
121 |
+
try:
|
122 |
+
output = agent.process_task(current_input)
|
123 |
+
step_result = {"agent": agent_id, "input": current_input, "output": output}
|
124 |
+
results["chain_outputs"].append(step_result)
|
125 |
+
if isinstance(output, dict) and "text" in output:
|
126 |
+
current_input = output["text"]
|
127 |
+
elif isinstance(output, str):
|
128 |
+
current_input = output
|
129 |
+
else:
|
130 |
+
current_input = f"Result from {agent_id}: {type(output).__name__} object"
|
131 |
+
except Exception as e:
|
132 |
+
error = f"Error in agent {agent_id}: {str(e)}\n{traceback.format_exc()}"
|
133 |
+
results["errors"].append(error)
|
134 |
+
logger.error(error)
|
135 |
+
if results["chain_outputs"]:
|
136 |
+
last_output = results["chain_outputs"][-1]["output"]
|
137 |
+
results["final_output"] = last_output if isinstance(last_output, dict) else {"text": str(last_output)}
|
138 |
+
return results
|
139 |
+
|
140 |
+
# ---------------------------
|
141 |
+
# Intelligent Agent Base Class
|
142 |
+
# ---------------------------
|
143 |
+
class IntelligentAgent:
|
144 |
+
def __init__(self, agent_id: str, hub: AgentHub):
|
145 |
+
self.agent_id = agent_id
|
146 |
+
self.hub = hub
|
147 |
+
self.memory = AgentMemory()
|
148 |
+
logger.info(f"Initialized agent: {agent_id}")
|
149 |
+
|
150 |
+
def process_task(self, task: Any) -> Any:
|
151 |
+
raise NotImplementedError("Subclasses must implement process_task")
|
152 |
+
|
153 |
+
def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
154 |
+
logger.info(f"Agent {self.agent_id} received message: {message}")
|
155 |
+
self.memory.add_short_term({"timestamp": pd.Timestamp.now(), "message": message})
|
156 |
+
return {"sender": self.agent_id, "received": True, "action": "acknowledge"}
|
157 |
+
|
158 |
+
def request_assistance(self, target_agent_id: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
159 |
+
target_agent = self.hub.get_agent(target_agent_id)
|
160 |
+
if not target_agent:
|
161 |
+
logger.error(f"Agent {self.agent_id} requested unknown agent: {target_agent_id}")
|
162 |
+
return {"error": f"Agent {target_agent_id} not found"}
|
163 |
+
request = {"sender": self.agent_id, "type": "assistance_request", "data": data}
|
164 |
+
return target_agent.process_message(request)
|
165 |
+
|
166 |
+
def evaluate_result(self, result: Any) -> Dict[str, Any]:
|
167 |
+
success = result is not None
|
168 |
+
confidence = 0.8 if success else 0.2
|
169 |
+
return {"success": success, "confidence": confidence, "timestamp": pd.Timestamp.now().isoformat()}
|
170 |
+
|
171 |
+
# ---------------------------
|
172 |
+
# Specialized Agent Implementations
|
173 |
+
# ---------------------------
|
174 |
+
class WebResearchAgent(IntelligentAgent):
|
175 |
+
def __init__(self, hub: AgentHub):
|
176 |
+
super().__init__("web_research", hub)
|
177 |
+
|
178 |
+
def process_task(self, task: str) -> Dict[str, Any]:
|
179 |
+
logger.info(f"WebResearchAgent processing: {task}")
|
180 |
+
search_term = task
|
181 |
+
if self.hub.summarizer:
|
182 |
+
try:
|
183 |
+
keywords = task.split()
|
184 |
+
if len(keywords) > 5:
|
185 |
+
summary = self.hub.summarizer(task, max_length=20, min_length=5, do_sample=False)
|
186 |
+
search_term = summary[0]['summary_text']
|
187 |
+
else:
|
188 |
+
search_term = task
|
189 |
+
except Exception as e:
|
190 |
+
logger.error(f"Summarization error in WebResearchAgent: {e}")
|
191 |
+
search_term = task
|
192 |
+
try:
|
193 |
+
search_results = wikipedia.search(search_term)
|
194 |
+
if not search_results:
|
195 |
+
result = {"text": f"No Wikipedia pages found for '{task}'."}
|
196 |
+
self.memory.add_short_term({"task": task, "result": result, "success": False})
|
197 |
+
return result
|
198 |
+
page_title = None
|
199 |
+
summary_text = None
|
200 |
+
error_details = []
|
201 |
+
for candidate in search_results[:3]:
|
202 |
+
try:
|
203 |
+
summary_text = wikipedia.summary(candidate, sentences=5)
|
204 |
+
page_title = candidate
|
205 |
+
break
|
206 |
+
except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError) as e:
|
207 |
+
error_details.append(f"{candidate}: {str(e)}")
|
208 |
+
continue
|
209 |
+
if not summary_text:
|
210 |
+
result = {"text": f"Failed to get Wikipedia summary for '{task}'. Errors: {'; '.join(error_details)}", "search_results": search_results}
|
211 |
+
self.memory.add_short_term({"task": task, "result": result, "success": False})
|
212 |
+
return result
|
213 |
+
self.memory.add_long_term(f"research:{search_term}", {"page_title": page_title, "summary": summary_text, "timestamp": pd.Timestamp.now().isoformat()})
|
214 |
+
result = {"text": f"Research on '{page_title}':\n{summary_text}", "page_title": page_title, "related_topics": search_results[:5], "source": "Wikipedia"}
|
215 |
+
self.memory.add_short_term({"task": task, "result": result, "success": True})
|
216 |
+
return result
|
217 |
+
except Exception as e:
|
218 |
+
error_msg = f"Error in web research: {str(e)}"
|
219 |
+
logger.error(error_msg)
|
220 |
+
result = {"text": error_msg, "error": str(e)}
|
221 |
+
self.memory.add_short_term({"task": task, "result": result, "success": False})
|
222 |
+
return result
|
223 |
+
|
224 |
+
class WebScraperAgent(IntelligentAgent):
|
225 |
+
def __init__(self, hub: AgentHub):
|
226 |
+
super().__init__("web_scraper", hub)
|
227 |
+
|
228 |
+
def process_task(self, task: str) -> Dict[str, Any]:
|
229 |
+
logger.info(f"WebScraperAgent processing URL: {task}")
|
230 |
+
if not task.startswith(('http://', 'https://')):
|
231 |
+
return {"text": "Invalid URL format. Please provide a URL starting with http:// or https://"}
|
232 |
+
try:
|
233 |
+
headers = {'User-Agent': 'Mozilla/5.0'}
|
234 |
+
response = requests.get(task, headers=headers, timeout=10)
|
235 |
+
if response.status_code != 200:
|
236 |
+
result = {"text": f"Error: received status code {response.status_code} from {task}"}
|
237 |
+
self.memory.add_short_term({"url": task, "result": result, "success": False})
|
238 |
+
return result
|
239 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
240 |
+
title = soup.title.string.strip() if soup.title and soup.title.string else "No title found"
|
241 |
+
main_content = soup.find('main') or soup.find(id='content') or soup.find(class_='content')
|
242 |
+
paras = main_content.find_all('p') if main_content else soup.find_all('p')
|
243 |
+
content = "\n".join([p.get_text().strip() for p in paras if len(p.get_text().strip()) > 50])
|
244 |
+
if len(content) > 2000 and self.hub.summarizer:
|
245 |
+
chunks = [content[i:i+1000] for i in range(0, len(content), 1000)]
|
246 |
+
summarized_chunks = []
|
247 |
+
for chunk in chunks:
|
248 |
+
summary = self.hub.summarizer(chunk, max_length=100, min_length=30, do_sample=False)
|
249 |
+
summarized_chunks.append(summary[0]['summary_text'])
|
250 |
+
content = "\n".join(summarized_chunks)
|
251 |
+
elif len(content) > 2000:
|
252 |
+
content = content[:2000] + "... (content truncated)"
|
253 |
+
links = []
|
254 |
+
for a in soup.find_all('a', href=True):
|
255 |
+
href = a['href']
|
256 |
+
if href.startswith('http') and len(links) < 5:
|
257 |
+
links.append({"url": href, "text": a.get_text().strip() or href})
|
258 |
+
result = {"text": f"Content from {task}:\n\nTitle: {title}\n\n{content}", "title": title, "raw_content": content, "links": links, "source_url": task}
|
259 |
+
self.memory.add_short_term({"url": task, "result": result, "success": True})
|
260 |
+
self.memory.add_long_term(f"scraped:{task}", {"title": title, "content_preview": content[:200], "timestamp": pd.Timestamp.now().isoformat()})
|
261 |
+
return result
|
262 |
+
except requests.RequestException as e:
|
263 |
+
error_msg = f"Request error for {task}: {str(e)}"
|
264 |
+
logger.error(error_msg)
|
265 |
+
return {"text": error_msg, "error": str(e)}
|
266 |
+
except Exception as e:
|
267 |
+
error_msg = f"Error scraping {task}: {str(e)}"
|
268 |
+
logger.error(error_msg)
|
269 |
+
return {"text": error_msg, "error": str(e)}
|
270 |
+
|
271 |
+
class TextProcessingAgent(IntelligentAgent):
|
272 |
+
def __init__(self, hub: AgentHub):
|
273 |
+
super().__init__("text_processing", hub)
|
274 |
+
|
275 |
+
def process_task(self, task: str) -> Dict[str, Any]:
|
276 |
+
logger.info(f"TextProcessingAgent processing text ({len(task)} chars)")
|
277 |
+
if not task or len(task) < 10:
|
278 |
+
return {"text": "Text too short to process meaningfully."}
|
279 |
+
results = {}
|
280 |
+
words = task.split()
|
281 |
+
sentences = task.split('. ')
|
282 |
+
results["statistics"] = {
|
283 |
+
"character_count": len(task),
|
284 |
+
"word_count": len(words),
|
285 |
+
"estimated_sentences": len(sentences),
|
286 |
+
"average_word_length": sum(len(word) for word in words) / len(words) if words else 0
|
287 |
+
}
|
288 |
+
if len(task) > 5000:
|
289 |
+
chunk_size = 500
|
290 |
+
chunking_strategy = "character_blocks"
|
291 |
+
elif len(words) > 200:
|
292 |
+
chunk_size = 50
|
293 |
+
chunking_strategy = "word_blocks"
|
294 |
+
else:
|
295 |
+
chunk_size = 5
|
296 |
+
chunking_strategy = "sentence_blocks"
|
297 |
+
if chunking_strategy == "character_blocks":
|
298 |
+
chunks = [task[i:i+chunk_size] for i in range(0, len(task), chunk_size)]
|
299 |
+
elif chunking_strategy == "word_blocks":
|
300 |
+
chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
|
301 |
+
else:
|
302 |
+
chunks = ['. '.join(sentences[i:i+chunk_size]) + '.' for i in range(0, len(sentences), chunk_size)]
|
303 |
+
results["chunks"] = chunks
|
304 |
+
results["chunking_strategy"] = chunking_strategy
|
305 |
+
if self.hub.summarizer and len(task) > 200:
|
306 |
+
try:
|
307 |
+
task_for_summary = task[:1000] if len(task) > 1000 else task
|
308 |
+
summary = self.hub.summarizer(task_for_summary, max_length=100, min_length=30, do_sample=False)
|
309 |
+
results["summary"] = summary[0]['summary_text']
|
310 |
+
except Exception as e:
|
311 |
+
logger.error(f"Summarization error: {e}")
|
312 |
+
results["summary_error"] = str(e)
|
313 |
+
stop_words = set(['the', 'a', 'an', 'and', 'in', 'on', 'at', 'to', 'for', 'of', 'with'])
|
314 |
+
word_freq = {}
|
315 |
+
for word in words:
|
316 |
+
w = word.lower().strip('.,!?:;()-"\'')
|
317 |
+
if w and w not in stop_words and len(w) > 1:
|
318 |
+
word_freq[w] = word_freq.get(w, 0) + 1
|
319 |
+
results["frequent_words"] = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:10]
|
320 |
+
positive_words = set(['good', 'great', 'excellent', 'positive', 'happy', 'best', 'better', 'success'])
|
321 |
+
negative_words = set(['bad', 'worst', 'terrible', 'negative', 'sad', 'problem', 'fail', 'issue'])
|
322 |
+
pos_count = sum(1 for word in words if word.lower().strip('.,!?:;()-"\'') in positive_words)
|
323 |
+
neg_count = sum(1 for word in words if word.lower().strip('.,!?:;()-"\'') in negative_words)
|
324 |
+
sentiment = "possibly positive" if pos_count > neg_count and pos_count > 2 else ("possibly negative" if neg_count > pos_count and neg_count > 2 else "neutral or mixed")
|
325 |
+
results["basic_sentiment"] = {"assessment": sentiment, "positive_word_count": pos_count, "negative_word_count": neg_count}
|
326 |
+
self.memory.add_short_term({"task_preview": task[:100] + "..." if len(task) > 100 else task, "word_count": results["statistics"]["word_count"], "result": results})
|
327 |
+
text_response = (
|
328 |
+
f"Text Analysis Results:\n- {results['statistics']['word_count']} words, {results['statistics']['character_count']} characters\n"
|
329 |
+
f"- Split into {len(chunks)} chunks using {chunking_strategy}\n"
|
330 |
+
)
|
331 |
+
if "summary" in results:
|
332 |
+
text_response += f"\nSummary:\n{results['summary']}\n"
|
333 |
+
if results["frequent_words"]:
|
334 |
+
text_response += "\nMost frequent words:\n"
|
335 |
+
for word, count in results["frequent_words"][:5]:
|
336 |
+
text_response += f"- {word}: {count} occurrences\n"
|
337 |
+
text_response += f"\nOverall tone appears {results['basic_sentiment']['assessment']}"
|
338 |
+
results["text"] = text_response
|
339 |
+
return results
|
340 |
+
|
341 |
+
class DataAnalysisAgent(IntelligentAgent):
|
342 |
+
def __init__(self, hub: AgentHub):
|
343 |
+
super().__init__("data_analysis", hub)
|
344 |
+
|
345 |
+
def process_task(self, task: str) -> Dict[str, Any]:
|
346 |
+
logger.info(f"DataAnalysisAgent processing: {task}")
|
347 |
+
file_path = None
|
348 |
+
if "analyze" in task.lower() and ".csv" in task.lower():
|
349 |
+
for word in task.split():
|
350 |
+
if word.endswith('.csv'):
|
351 |
+
file_path = word
|
352 |
+
break
|
353 |
+
if not file_path or not Path(file_path).exists():
|
354 |
+
logger.info("No specific CSV file mentioned or file not found, creating sample data")
|
355 |
+
if "time series" in task.lower():
|
356 |
+
dates = pd.date_range(start='2023-01-01', periods=30, freq='D')
|
357 |
+
df = pd.DataFrame({'date': dates, 'value': np.random.normal(100, 15, 30), 'trend': np.linspace(0, 20, 30) + np.random.normal(0, 2, 30)})
|
358 |
+
file_path = "sample_timeseries.csv"
|
359 |
+
elif "sales" in task.lower():
|
360 |
+
products = ['ProductA', 'ProductB', 'ProductC', 'ProductD']
|
361 |
+
regions = ['North', 'South', 'East', 'West']
|
362 |
+
dates = pd.date_range(start='2023-01-01', periods=50, freq='D')
|
363 |
+
data = []
|
364 |
+
for _ in range(200):
|
365 |
+
data.append({'date': np.random.choice(dates), 'product': np.random.choice(products), 'region': np.random.choice(regions), 'units_sold': np.random.randint(10, 100), 'revenue': np.random.uniform(100, 1000)})
|
366 |
+
df = pd.DataFrame(data)
|
367 |
+
file_path = "sample_sales.csv"
|
368 |
+
else:
|
369 |
+
df = pd.DataFrame({
|
370 |
+
'A': np.random.normal(0, 1, 100),
|
371 |
+
'B': np.random.normal(5, 2, 100),
|
372 |
+
'C': np.random.uniform(-10, 10, 100),
|
373 |
+
'D': np.random.randint(0, 5, 100),
|
374 |
+
'label': np.random.choice(['X', 'Y', 'Z'], 100)
|
375 |
+
})
|
376 |
+
file_path = "sample_data.csv"
|
377 |
+
df.to_csv(file_path, index=False)
|
378 |
+
logger.info(f"Created sample data file: {file_path}")
|
379 |
+
else:
|
380 |
+
try:
|
381 |
+
df = pd.read_csv(file_path)
|
382 |
+
logger.info(f"Loaded existing file: {file_path}")
|
383 |
+
except Exception as e:
|
384 |
+
error_msg = f"Error loading CSV file {file_path}: {str(e)}"
|
385 |
+
logger.error(error_msg)
|
386 |
+
return {"text": error_msg, "error": str(e)}
|
387 |
+
analysis_results = {}
|
388 |
+
try:
|
389 |
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
390 |
+
analysis_results["summary_stats"] = df[numeric_cols].describe().to_dict()
|
391 |
+
categorical_cols = df.select_dtypes(exclude=[np.number]).columns
|
392 |
+
for col in categorical_cols:
|
393 |
+
if df[col].nunique() < 10:
|
394 |
+
analysis_results[f"{col}_distribution"] = df[col].value_counts().to_dict()
|
395 |
+
except Exception as e:
|
396 |
+
logger.error(f"Error in basic statistics: {e}")
|
397 |
+
analysis_results["stats_error"] = str(e)
|
398 |
+
try:
|
399 |
+
missing_values = df.isnull().sum().to_dict()
|
400 |
+
analysis_results["missing_values"] = {k: v for k, v in missing_values.items() if v > 0}
|
401 |
+
except Exception as e:
|
402 |
+
logger.error(f"Error in missing values analysis: {e}")
|
403 |
+
analysis_results["missing_values_error"] = str(e)
|
404 |
+
try:
|
405 |
+
if len(numeric_cols) > 1:
|
406 |
+
analysis_results["correlations"] = df[numeric_cols].corr().to_dict()
|
407 |
+
except Exception as e:
|
408 |
+
logger.error(f"Error in correlation analysis: {e}")
|
409 |
+
analysis_results["correlation_error"] = str(e)
|
410 |
+
try:
|
411 |
+
plt.figure(figsize=(10, 8))
|
412 |
+
categorical_cols = df.select_dtypes(exclude=[np.number]).columns
|
413 |
+
if len(numeric_cols) >= 2:
|
414 |
+
plt.subplot(2, 1, 1)
|
415 |
+
x_col, y_col = numeric_cols[0], numeric_cols[1]
|
416 |
+
sample_df = df.sample(1000) if len(df) > 1000 else df
|
417 |
+
if len(categorical_cols) > 0 and df[categorical_cols[0]].nunique() < 10:
|
418 |
+
cat_col = categorical_cols[0]
|
419 |
+
for category, group in sample_df.groupby(cat_col):
|
420 |
+
plt.scatter(group[x_col], group[y_col], label=category, alpha=0.6)
|
421 |
+
plt.legend()
|
422 |
+
else:
|
423 |
+
plt.scatter(sample_df[x_col], sample_df[y_col], alpha=0.6)
|
424 |
+
plt.xlabel(x_col)
|
425 |
+
plt.ylabel(y_col)
|
426 |
+
plt.title(f"Scatter Plot: {x_col} vs {y_col}")
|
427 |
+
plt.subplot(2, 1, 2)
|
428 |
+
if 'date' in df.columns or any('time' in col.lower() for col in df.columns):
|
429 |
+
date_col = [col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()][0]
|
430 |
+
value_col = numeric_cols[0] if numeric_cols[0] != date_col else numeric_cols[1]
|
431 |
+
if not pd.api.types.is_datetime64_dtype(df[date_col]):
|
432 |
+
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
|
433 |
+
temp_df = df.dropna(subset=[date_col, value_col]).sort_values(date_col)
|
434 |
+
plt.plot(temp_df[date_col], temp_df[value_col])
|
435 |
+
plt.xlabel(date_col)
|
436 |
+
plt.ylabel(value_col)
|
437 |
+
plt.title(f"Time Series: {value_col} over {date_col}")
|
438 |
+
plt.xticks(rotation=45)
|
439 |
+
else:
|
440 |
+
plt.hist(df[numeric_cols[0]].dropna(), bins=20, alpha=0.7)
|
441 |
+
plt.xlabel(numeric_cols[0])
|
442 |
+
plt.ylabel('Frequency')
|
443 |
+
plt.title(f"Distribution of {numeric_cols[0]}")
|
444 |
+
else:
|
445 |
+
if len(categorical_cols) > 0:
|
446 |
+
cat_col = categorical_cols[0]
|
447 |
+
df[cat_col].value_counts().plot(kind='bar')
|
448 |
+
plt.xlabel(cat_col)
|
449 |
+
plt.ylabel('Count')
|
450 |
+
plt.title(f"Counts by {cat_col}")
|
451 |
+
plt.xticks(rotation=45)
|
452 |
+
else:
|
453 |
+
plt.hist(df[numeric_cols[0]].dropna(), bins=20)
|
454 |
+
plt.xlabel(numeric_cols[0])
|
455 |
+
plt.ylabel('Frequency')
|
456 |
+
plt.title(f"Distribution of {numeric_cols[0]}")
|
457 |
+
plt.tight_layout()
|
458 |
+
viz_path = f"{Path(file_path).stem}_viz.png"
|
459 |
+
plt.savefig(viz_path)
|
460 |
+
plt.close()
|
461 |
+
analysis_results["visualization_path"] = viz_path
|
462 |
+
analysis_results["visualization_created"] = True
|
463 |
+
logger.info(f"Created visualization: {viz_path}")
|
464 |
+
except Exception as e:
|
465 |
+
logger.error(f"Error creating visualization: {e}")
|
466 |
+
analysis_results["visualization_error"] = str(e)
|
467 |
+
analysis_results["visualization_created"] = False
|
468 |
+
insights = []
|
469 |
+
try:
|
470 |
+
for col in numeric_cols:
|
471 |
+
q1 = df[col].quantile(0.25)
|
472 |
+
q3 = df[col].quantile(0.75)
|
473 |
+
iqr = q3 - q1
|
474 |
+
outlier_count = ((df[col] < (q1 - 1.5 * iqr)) | (df[col] > (q3 + 1.5 * iqr))).sum()
|
475 |
+
if outlier_count > 0:
|
476 |
+
insights.append(f"Found {outlier_count} potential outliers in '{col}'")
|
477 |
+
if "correlations" in analysis_results:
|
478 |
+
for col1, corr_dict in analysis_results["correlations"].items():
|
479 |
+
for col2, corr_val in corr_dict.items():
|
480 |
+
if col1 != col2 and abs(corr_val) > 0.7:
|
481 |
+
insights.append(f"Strong correlation ({corr_val:.2f}) between '{col1}' and '{col2}'")
|
482 |
+
for col in categorical_cols:
|
483 |
+
if df[col].nunique() < 10:
|
484 |
+
value_counts = df[col].value_counts()
|
485 |
+
most_common = value_counts.idxmax()
|
486 |
+
most_common_pct = value_counts.max() / value_counts.sum() * 100
|
487 |
+
if most_common_pct > 80:
|
488 |
+
insights.append(f"Imbalanced category in '{col}': '{most_common}' accounts for {most_common_pct:.1f}% of data")
|
489 |
+
analysis_results["insights"] = insights
|
490 |
+
except Exception as e:
|
491 |
+
logger.error(f"Error extracting insights: {e}")
|
492 |
+
analysis_results["insights_error"] = str(e)
|
493 |
+
self.memory.add_short_term({"file": file_path, "columns": list(df.columns), "row_count": len(df), "analysis": analysis_results})
|
494 |
+
if "sample" in file_path:
|
495 |
+
self.memory.add_long_term(f"analysis:{file_path}", {"file": file_path, "type": "generated", "columns": list(df.columns), "row_count": len(df), "timestamp": pd.Timestamp.now().isoformat()})
|
496 |
+
column_list = ", ".join(df.columns[:5]) + (", ..." if len(df.columns) > 5 else "")
|
497 |
+
text_response = (
|
498 |
+
f"Data Analysis Results for {file_path}\n- Dataset: {len(df)} rows x {len(df.columns)} columns ({column_list})\n"
|
499 |
+
)
|
500 |
+
if "missing_values" in analysis_results and analysis_results["missing_values"]:
|
501 |
+
text_response += f"- Missing values found in {len(analysis_results['missing_values'])} columns\n"
|
502 |
+
if insights:
|
503 |
+
text_response += "\nKey Insights:\n"
|
504 |
+
for i, insight in enumerate(insights[:5], 1):
|
505 |
+
text_response += f"{i}. {insight}\n"
|
506 |
+
if len(insights) > 5:
|
507 |
+
text_response += f"... and {len(insights) - 5} more insights\n"
|
508 |
+
text_response += f"\nVisualization saved to {viz_path}" if analysis_results.get("visualization_created") else "\nNo visualization created"
|
509 |
+
analysis_results["text"] = text_response
|
510 |
+
analysis_results["dataframe_shape"] = df.shape
|
511 |
+
analysis_results["data_preview"] = df.head(5).to_dict()
|
512 |
+
return analysis_results
|
513 |
+
|
514 |
+
class CodingAssistantAgent(IntelligentAgent):
|
515 |
+
def __init__(self, hub: AgentHub):
|
516 |
+
super().__init__("coding_assistant", hub)
|
517 |
+
self.code_snippets = {
|
518 |
+
"file_operations": {
|
519 |
+
"read_file": '''
|
520 |
+
def read_file(file_path):
|
521 |
+
"""Read a file and return its contents"""
|
522 |
+
with open(file_path, 'r') as file:
|
523 |
+
return file.read()
|
524 |
+
''',
|
525 |
+
"write_file": '''
|
526 |
+
def write_file(file_path, content):
|
527 |
+
"""Write content to a file"""
|
528 |
+
with open(file_path, 'w') as file:
|
529 |
+
file.write(content)
|
530 |
+
return True
|
531 |
+
'''
|
532 |
+
},
|
533 |
+
"data_processing": {
|
534 |
+
"pandas_read_csv": '''
|
535 |
+
import pandas as pd
|
536 |
+
def load_csv(file_path):
|
537 |
+
"""Load a CSV file into a Pandas DataFrame"""
|
538 |
+
return pd.read_csv(file_path)
|
539 |
+
''',
|
540 |
+
"pandas_basic_stats": '''
|
541 |
+
def get_basic_stats(df):
|
542 |
+
"""Get basic statistics for a DataFrame"""
|
543 |
+
numeric_stats = df.describe()
|
544 |
+
categorical_columns = df.select_dtypes(include=['object']).columns
|
545 |
+
categorical_stats = {col: df[col].value_counts().to_dict() for col in categorical_columns}
|
546 |
+
return {
|
547 |
+
'numeric': numeric_stats.to_dict(),
|
548 |
+
'categorical': categorical_stats
|
549 |
+
}
|
550 |
+
'''
|
551 |
+
},
|
552 |
+
"visualization": {
|
553 |
+
"matplotlib_basic_plot": '''
|
554 |
+
import matplotlib.pyplot as plt
|
555 |
+
def create_basic_plot(data, x_col, y_col, title="Plot", kind="line"):
|
556 |
+
"""Create a basic plot using matplotlib"""
|
557 |
+
plt.figure(figsize=(10, 6))
|
558 |
+
if kind == "line":
|
559 |
+
plt.plot(data[x_col], data[y_col])
|
560 |
+
elif kind == "scatter":
|
561 |
+
plt.scatter(data[x_col], data[y_col])
|
562 |
+
elif kind == "bar":
|
563 |
+
plt.bar(data[x_col], data[y_col])
|
564 |
+
plt.title(title)
|
565 |
+
plt.xlabel(x_col)
|
566 |
+
plt.ylabel(y_col)
|
567 |
+
plt.tight_layout()
|
568 |
+
plt.savefig(f"{title.lower().replace(' ', '_')}.png")
|
569 |
+
plt.close()
|
570 |
+
return f"{title.lower().replace(' ', '_')}.png"
|
571 |
+
'''
|
572 |
+
},
|
573 |
+
"web_scraping": {
|
574 |
+
"requests_beautifulsoup": '''
|
575 |
+
import requests
|
576 |
+
from bs4 import BeautifulSoup
|
577 |
+
def scrape_webpage(url):
|
578 |
+
"""Scrape a webpage and extract text from paragraphs"""
|
579 |
+
try:
|
580 |
+
response = requests.get(url)
|
581 |
+
response.raise_for_status()
|
582 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
583 |
+
paragraphs = soup.find_all('p')
|
584 |
+
text = [p.get_text() for p in paragraphs]
|
585 |
+
return {
|
586 |
+
'title': soup.title.string if soup.title else "No title",
|
587 |
+
'text': text,
|
588 |
+
'url': url
|
589 |
+
}
|
590 |
+
except Exception as e:
|
591 |
+
return {'error': str(e), 'url': url}
|
592 |
+
'''
|
593 |
+
},
|
594 |
+
"nlp": {
|
595 |
+
"basic_text_analysis": '''
|
596 |
+
from collections import Counter
|
597 |
+
import re
|
598 |
+
def analyze_text(text):
|
599 |
+
"""Perform basic text analysis"""
|
600 |
+
text = text.lower()
|
601 |
+
words = re.findall(r'\w+', text)
|
602 |
+
word_count = len(words)
|
603 |
+
unique_words = len(set(words))
|
604 |
+
stop_words = {'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'and', 'or'}
|
605 |
+
word_freq = Counter([w for w in words if w not in stop_words and len(w) > 1])
|
606 |
+
return {
|
607 |
+
'word_count': word_count,
|
608 |
+
'unique_words': unique_words,
|
609 |
+
'avg_word_length': sum(len(w) for w in words) / word_count if word_count else 0,
|
610 |
+
'most_common': word_freq.most_common(10)
|
611 |
+
}
|
612 |
+
'''
|
613 |
+
},
|
614 |
+
"machine_learning": {
|
615 |
+
"basic_classifier": '''
|
616 |
+
from sklearn.model_selection import train_test_split
|
617 |
+
from sklearn.ensemble import RandomForestClassifier
|
618 |
+
from sklearn.metrics import classification_report
|
619 |
+
def train_basic_classifier(X, y, test_size=0.2, random_state=42):
|
620 |
+
"""Train a basic RandomForest classifier"""
|
621 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
|
622 |
+
model = RandomForestClassifier(n_estimators=100, random_state=random_state)
|
623 |
+
model.fit(X_train, y_train)
|
624 |
+
y_pred = model.predict(X_test)
|
625 |
+
report = classification_report(y_test, y_pred, output_dict=True)
|
626 |
+
return {
|
627 |
+
'model': model,
|
628 |
+
'accuracy': report['accuracy'],
|
629 |
+
'classification_report': report,
|
630 |
+
'feature_importance': dict(zip(range(X.shape[1]), model.feature_importances_))
|
631 |
+
}
|
632 |
+
'''
|
633 |
+
}
|
634 |
+
}
|
635 |
+
|
636 |
+
def process_task(self, task: str) -> Dict[str, Any]:
|
637 |
+
logger.info(f"CodingAssistantAgent processing: {task}")
|
638 |
+
task_lower = task.lower()
|
639 |
+
keyword_mapping = {
|
640 |
+
"file": "file_operations",
|
641 |
+
"read file": "file_operations",
|
642 |
+
"write file": "file_operations",
|
643 |
+
"csv": "data_processing",
|
644 |
+
"data": "data_processing",
|
645 |
+
"pandas": "data_processing",
|
646 |
+
"dataframe": "data_processing",
|
647 |
+
"plot": "visualization",
|
648 |
+
"chart": "visualization",
|
649 |
+
"graph": "visualization",
|
650 |
+
"visualize": "visualization",
|
651 |
+
"matplotlib": "visualization",
|
652 |
+
"scrape": "web_scraping",
|
653 |
+
"web": "web_scraping",
|
654 |
+
"html": "web_scraping",
|
655 |
+
"beautifulsoup": "web_scraping",
|
656 |
+
"text analysis": "nlp",
|
657 |
+
"nlp": "nlp",
|
658 |
+
"natural language": "nlp",
|
659 |
+
"word count": "nlp",
|
660 |
+
"text processing": "nlp",
|
661 |
+
"machine learning": "machine_learning",
|
662 |
+
"ml": "machine_learning",
|
663 |
+
"model": "machine_learning",
|
664 |
+
"predict": "machine_learning",
|
665 |
+
"classifier": "machine_learning"
|
666 |
+
}
|
667 |
+
code_category = None
|
668 |
+
function_name = None
|
669 |
+
for keyword, category in keyword_mapping.items():
|
670 |
+
if keyword in task_lower:
|
671 |
+
code_category = category
|
672 |
+
for func_name in self.code_snippets.get(category, {}):
|
673 |
+
natural_func = func_name.replace('_', ' ')
|
674 |
+
if natural_func in task_lower:
|
675 |
+
function_name = func_name
|
676 |
+
break
|
677 |
+
break
|
678 |
+
if not code_category:
|
679 |
+
if any(word in task_lower for word in ["add", "sum", "calculate", "compute"]):
|
680 |
+
code_category = "data_processing"
|
681 |
+
elif any(word in task_lower for word in ["show", "display", "generate"]):
|
682 |
+
code_category = "visualization"
|
683 |
+
if code_category and not function_name and self.code_snippets.get(code_category):
|
684 |
+
function_name = next(iter(self.code_snippets[code_category]))
|
685 |
+
if not code_category:
|
686 |
+
function_parts = [word for word in task_lower.split() if word not in ["a", "the", "an", "to", "for", "function", "code", "create", "make"]]
|
687 |
+
func_name = "_".join(function_parts[:2]) if len(function_parts) >= 2 else "custom_function"
|
688 |
+
custom_code = f"""
|
689 |
+
def {func_name}(input_data):
|
690 |
+
# Custom function based on your request: '{task}'
|
691 |
+
result = None
|
692 |
+
# TODO: Implement specific logic based on requirements
|
693 |
+
if isinstance(input_data, list):
|
694 |
+
result = len(input_data)
|
695 |
+
elif isinstance(input_data, str):
|
696 |
+
result = input_data.upper()
|
697 |
+
elif isinstance(input_data, (int, float)):
|
698 |
+
result = input_data * 2
|
699 |
+
return {{
|
700 |
+
'input': input_data,
|
701 |
+
'result': result,
|
702 |
+
'status': 'processed'
|
703 |
+
}}
|
704 |
+
"""
|
705 |
+
result = {
|
706 |
+
"text": f"I've created a custom function template based on your request:\n\n```python\n{custom_code}\n```\n\nThis is a starting point you can customize further.",
|
707 |
+
"code": custom_code,
|
708 |
+
"language": "python",
|
709 |
+
"type": "custom"
|
710 |
+
}
|
711 |
+
else:
|
712 |
+
code_snippet = self.code_snippets[code_category][function_name]
|
713 |
+
result = {
|
714 |
+
"text": f"Here's a {code_category.replace('_', ' ')} function for {function_name.replace('_', ' ')}:\n\n```python\n{code_snippet}\n```\n\nYou can customize this code.",
|
715 |
+
"code": code_snippet,
|
716 |
+
"language": "python",
|
717 |
+
"category": code_category,
|
718 |
+
"function": function_name
|
719 |
+
}
|
720 |
+
self.memory.add_short_term({"task": task, "code_category": code_category, "function_provided": function_name, "timestamp": pd.Timestamp.now().isoformat()})
|
721 |
+
return result
|
722 |
+
|
723 |
+
class ImageProcessingAgent(IntelligentAgent):
|
724 |
+
def __init__(self, hub: AgentHub):
|
725 |
+
super().__init__("image_processing", hub)
|
726 |
+
|
727 |
+
def process_task(self, task: Any) -> Dict[str, Any]:
|
728 |
+
logger.info("ImageProcessingAgent processing task")
|
729 |
+
image = None
|
730 |
+
task_type = None
|
731 |
+
if isinstance(task, Image.Image):
|
732 |
+
image = task
|
733 |
+
task_type = "direct_image"
|
734 |
+
elif isinstance(task, str):
|
735 |
+
if Path(task).exists() and Path(task).suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
|
736 |
+
try:
|
737 |
+
image = Image.open(task)
|
738 |
+
task_type = "image_path"
|
739 |
+
except Exception as e:
|
740 |
+
return {"text": f"Error loading image from {task}: {str(e)}", "error": str(e)}
|
741 |
+
else:
|
742 |
+
task_type = "text_instruction"
|
743 |
+
elif isinstance(task, dict) and 'image' in task:
|
744 |
+
if isinstance(task['image'], Image.Image):
|
745 |
+
image = task['image']
|
746 |
+
elif isinstance(task['image'], str) and Path(task['image']).exists():
|
747 |
+
try:
|
748 |
+
image = Image.open(task['image'])
|
749 |
+
except Exception as e:
|
750 |
+
return {"text": f"Error loading image from {task['image']}: {str(e)}", "error": str(e)}
|
751 |
+
task_type = "dict_with_image"
|
752 |
+
if task_type == "text_instruction" and not image:
|
753 |
+
return {"text": "Please provide an image to process along with instructions."}
|
754 |
+
if not image:
|
755 |
+
return {"text": "No valid image provided for processing."}
|
756 |
+
processing_type = "edge_detection"
|
757 |
+
if task_type in ["text_instruction", "dict_with_image"] and isinstance(task, dict):
|
758 |
+
instruction = task.get('instruction', '').lower()
|
759 |
+
if 'blur' in instruction or 'smooth' in instruction:
|
760 |
+
processing_type = "blur"
|
761 |
+
elif 'edge' in instruction or 'contour' in instruction:
|
762 |
+
processing_type = "edge_detection"
|
763 |
+
elif 'gray' in instruction or 'greyscale' in instruction or 'black and white' in instruction:
|
764 |
+
processing_type = "grayscale"
|
765 |
+
elif 'bright' in instruction or 'contrast' in instruction:
|
766 |
+
processing_type = "enhance"
|
767 |
+
elif 'resize' in instruction or 'scale' in instruction:
|
768 |
+
processing_type = "resize"
|
769 |
+
try:
|
770 |
+
img_array = np.array(image)
|
771 |
+
if img_array.ndim == 3 and img_array.shape[-1] == 4:
|
772 |
+
img_cv = cv2.cvtColor(img_array, cv2.COLOR_RGBA2BGR)
|
773 |
+
else:
|
774 |
+
img_cv = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
775 |
+
processed_img = None
|
776 |
+
processing_details = {"original_size": image.size}
|
777 |
+
if processing_type == "edge_detection":
|
778 |
+
gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
|
779 |
+
edges = cv2.Canny(gray, 100, 200)
|
780 |
+
processed_img = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
|
781 |
+
processing_details["processing"] = "Edge detection using Canny"
|
782 |
+
elif processing_type == "blur":
|
783 |
+
processed_img = cv2.GaussianBlur(img_cv, (7, 7), 0)
|
784 |
+
processing_details["processing"] = "Gaussian Blur"
|
785 |
+
elif processing_type == "grayscale":
|
786 |
+
processed_img = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
|
787 |
+
processed_img = cv2.cvtColor(processed_img, cv2.COLOR_GRAY2BGR)
|
788 |
+
processing_details["processing"] = "Grayscale conversion"
|
789 |
+
elif processing_type == "enhance":
|
790 |
+
lab = cv2.cvtColor(img_cv, cv2.COLOR_BGR2LAB)
|
791 |
+
l, a, b = cv2.split(lab)
|
792 |
+
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
|
793 |
+
cl = clahe.apply(l)
|
794 |
+
limg = cv2.merge((cl, a, b))
|
795 |
+
processed_img = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
|
796 |
+
processing_details["processing"] = "Contrast enhancement"
|
797 |
+
elif processing_type == "resize":
|
798 |
+
processed_img = cv2.resize(img_cv, (image.size[0]//2, image.size[1]//2))
|
799 |
+
processing_details["processing"] = "Resized to half"
|
800 |
+
else:
|
801 |
+
processed_img = img_cv
|
802 |
+
processing_details["processing"] = "No processing applied"
|
803 |
+
processed_pil = Image.fromarray(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB))
|
804 |
+
return {"text": f"Image processing completed with {processing_details['processing']}.", "image": processed_pil, "details": processing_details}
|
805 |
+
except Exception as e:
|
806 |
+
error_msg = f"Error processing image: {str(e)}\n{traceback.format_exc()}"
|
807 |
+
logger.error(error_msg)
|
808 |
+
return {"text": f"Error processing image: {str(e)}", "error": str(e)}
|
809 |
+
|
810 |
+
class FileManagementAgent(IntelligentAgent):
|
811 |
+
def __init__(self, hub: AgentHub):
|
812 |
+
super().__init__("file_management", hub)
|
813 |
+
|
814 |
+
def process_task(self, task: str) -> Dict[str, Any]:
|
815 |
+
logger.info(f"FileManagementAgent processing: {task}")
|
816 |
+
task_lower = task.lower()
|
817 |
+
if any(word in task_lower for word in ["create", "make", "generate", "write"]):
|
818 |
+
operation = "create"
|
819 |
+
elif any(word in task_lower for word in ["read", "open", "show", "display", "content"]):
|
820 |
+
operation = "read"
|
821 |
+
elif any(word in task_lower for word in ["list", "find", "directory", "folder", "files in"]):
|
822 |
+
operation = "list"
|
823 |
+
elif any(word in task_lower for word in ["delete", "remove"]):
|
824 |
+
operation = "delete"
|
825 |
+
else:
|
826 |
+
operation = "unknown"
|
827 |
+
filename = None
|
828 |
+
file_extensions = ['.txt', '.json', '.csv', '.md', '.py', '.html', '.js', '.css']
|
829 |
+
words = task.split()
|
830 |
+
for word in words:
|
831 |
+
for ext in file_extensions:
|
832 |
+
if ext in word.lower():
|
833 |
+
filename = word.strip(':"\'.,;')
|
834 |
+
break
|
835 |
+
if filename:
|
836 |
+
break
|
837 |
+
if not filename:
|
838 |
+
file_keywords = ["file", "named", "called", "filename"]
|
839 |
+
for i, word in enumerate(words):
|
840 |
+
if word.lower() in file_keywords and i < len(words) - 1:
|
841 |
+
potential_name = words[i+1].strip(':"\'.,;')
|
842 |
+
if '.' not in potential_name:
|
843 |
+
if "json" in task_lower:
|
844 |
+
potential_name += ".json"
|
845 |
+
elif "csv" in task_lower:
|
846 |
+
potential_name += ".csv"
|
847 |
+
elif "python" in task_lower or "py" in task_lower:
|
848 |
+
potential_name += ".py"
|
849 |
+
else:
|
850 |
+
potential_name += ".txt"
|
851 |
+
filename = potential_name
|
852 |
+
break
|
853 |
+
if not filename:
|
854 |
+
if "json" in task_lower:
|
855 |
+
filename = f"data_{uuid.uuid4().hex[:6]}.json"
|
856 |
+
elif "csv" in task_lower:
|
857 |
+
filename = f"data_{uuid.uuid4().hex[:6]}.csv"
|
858 |
+
elif "python" in task_lower or "py" in task_lower:
|
859 |
+
filename = f"script_{uuid.uuid4().hex[:6]}.py"
|
860 |
+
elif "log" in task_lower:
|
861 |
+
filename = f"log_{uuid.uuid4().hex[:6]}.txt"
|
862 |
+
else:
|
863 |
+
filename = f"file_{uuid.uuid4().hex[:6]}.txt"
|
864 |
+
result = {}
|
865 |
+
if operation == "create":
|
866 |
+
if filename.endswith('.json'):
|
867 |
+
content = json.dumps({
|
868 |
+
"name": "Sample Data",
|
869 |
+
"description": task,
|
870 |
+
"created": pd.Timestamp.now().isoformat(),
|
871 |
+
"values": [1, 2, 3, 4, 5],
|
872 |
+
"metadata": {"source": "FileManagementAgent", "version": "1.0"}
|
873 |
+
}, indent=2)
|
874 |
+
elif filename.endswith('.csv'):
|
875 |
+
content = "id,name,value,timestamp\n"
|
876 |
+
for i in range(5):
|
877 |
+
content += f"{i+1},Item{i+1},{np.random.randint(1, 100)},{pd.Timestamp.now().isoformat()}\n"
|
878 |
+
elif filename.endswith('.py'):
|
879 |
+
content = f"""# Generated Python Script: {filename}
|
880 |
+
# Created: {pd.Timestamp.now().isoformat()}
|
881 |
+
# Description: {task}
|
882 |
+
|
883 |
+
def main():
|
884 |
+
print("Hello from the FileManagementAgent!")
|
885 |
+
data = [1, 2, 3, 4, 5]
|
886 |
+
result = sum(data)
|
887 |
+
print(f"Sample calculation: sum(data) = {{result}}")
|
888 |
+
return result
|
889 |
+
|
890 |
+
if __name__ == "__main__":
|
891 |
+
main()
|
892 |
+
"""
|
893 |
+
else:
|
894 |
+
content = f"File created by FileManagementAgent\nCreated: {pd.Timestamp.now().isoformat()}\nBased on request: {task}\n\nThis is sample content."
|
895 |
+
try:
|
896 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
897 |
+
f.write(content)
|
898 |
+
result = {"text": f"Successfully created file: {filename}", "operation": "create", "filename": filename, "size": len(content), "preview": content[:200] + "..." if len(content) > 200 else content}
|
899 |
+
self.memory.add_short_term({"operation": "create", "filename": filename, "timestamp": pd.Timestamp.now().isoformat()})
|
900 |
+
self.memory.add_long_term(f"file:{filename}", {"operation": "create", "type": Path(filename).suffix, "timestamp": pd.Timestamp.now().isoformat()})
|
901 |
+
except Exception as e:
|
902 |
+
error_msg = f"Error creating file {filename}: {str(e)}"
|
903 |
+
logger.error(error_msg)
|
904 |
+
result = {"text": error_msg, "error": str(e)}
|
905 |
+
elif operation == "read":
|
906 |
+
if not filename:
|
907 |
+
result = {"text": "Please specify a filename to read."}
|
908 |
+
elif not Path(filename).exists():
|
909 |
+
result = {"text": f"File '{filename}' not found."}
|
910 |
+
else:
|
911 |
+
try:
|
912 |
+
with open(filename, 'r', encoding='utf-8') as f:
|
913 |
+
content = f.read()
|
914 |
+
result = {"text": f"Content of {filename}:\n\n{content}", "operation": "read", "filename": filename, "content": content, "size": len(content)}
|
915 |
+
self.memory.add_short_term({"operation": "read", "filename": filename, "timestamp": pd.Timestamp.now().isoformat()})
|
916 |
+
except Exception as e:
|
917 |
+
error_msg = f"Error reading file {filename}: {str(e)}"
|
918 |
+
logger.error(error_msg)
|
919 |
+
result = {"text": error_msg, "error": str(e)}
|
920 |
+
elif operation == "list":
|
921 |
+
try:
|
922 |
+
directory = "."
|
923 |
+
for term in ["directory", "folder", "in"]:
|
924 |
+
if term in task_lower:
|
925 |
+
parts = task_lower.split(term)
|
926 |
+
if len(parts) > 1:
|
927 |
+
potential_dir = parts[1].strip().split()[0].strip(':"\'.,;')
|
928 |
+
if Path(potential_dir).exists() and Path(potential_dir).is_dir():
|
929 |
+
directory = potential_dir
|
930 |
+
extension_filter = None
|
931 |
+
for ext in file_extensions:
|
932 |
+
if ext in task_lower:
|
933 |
+
extension_filter = ext
|
934 |
+
break
|
935 |
+
files = list(Path(directory).glob('*' + (extension_filter or '')))
|
936 |
+
file_groups = {}
|
937 |
+
for file in files:
|
938 |
+
file_groups.setdefault(file.suffix, []).append({
|
939 |
+
"name": file.name,
|
940 |
+
"size": file.stat().st_size,
|
941 |
+
"modified": pd.Timestamp(file.stat().st_mtime, unit='s').isoformat()
|
942 |
+
})
|
943 |
+
response_text = f"Found {len(files)} files" + (f" with extension {extension_filter}" if extension_filter else "") + f" in {directory}:\n\n"
|
944 |
+
for ext, group in file_groups.items():
|
945 |
+
response_text += f"{ext} files ({len(group)}):\n"
|
946 |
+
for file_info in sorted(group, key=lambda x: x["name"]):
|
947 |
+
size_kb = file_info["size"] / 1024
|
948 |
+
response_text += f"- {file_info['name']} ({size_kb:.1f} KB, modified: {file_info['modified']})\n"
|
949 |
+
response_text += "\n"
|
950 |
+
result = {"text": response_text, "operation": "list", "directory": directory, "file_count": len(files), "files": file_groups}
|
951 |
+
self.memory.add_short_term({"operation": "list", "directory": directory, "file_count": len(files), "timestamp": pd.Timestamp.now().isoformat()})
|
952 |
+
except Exception as e:
|
953 |
+
error_msg = f"Error listing files: {str(e)}"
|
954 |
+
logger.error(error_msg)
|
955 |
+
result = {"text": error_msg, "error": str(e)}
|
956 |
+
elif operation == "delete":
|
957 |
+
if not filename:
|
958 |
+
result = {"text": "Please specify a filename to delete."}
|
959 |
+
elif not Path(filename).exists():
|
960 |
+
result = {"text": f"File '{filename}' not found."}
|
961 |
+
else:
|
962 |
+
try:
|
963 |
+
os.remove(filename)
|
964 |
+
result = {"text": f"Successfully deleted file: {filename}", "operation": "delete", "filename": filename}
|
965 |
+
self.memory.add_short_term({"operation": "delete", "filename": filename, "timestamp": pd.Timestamp.now().isoformat()})
|
966 |
+
self.memory.add_long_term(f"file:{filename}", {"operation": "delete", "timestamp": pd.Timestamp.now().isoformat()})
|
967 |
+
except Exception as e:
|
968 |
+
error_msg = f"Error deleting file {filename}: {str(e)}"
|
969 |
+
logger.error(error_msg)
|
970 |
+
result = {"text": error_msg, "error": str(e)}
|
971 |
+
else:
|
972 |
+
result = {"text": f"Unknown operation requested in task: {task}"}
|
973 |
+
return result
|
974 |
+
|
975 |
+
# ---------------------------
|
976 |
+
# Gradio Interface Setup
|
977 |
+
# ---------------------------
|
978 |
+
def create_agent_hub():
|
979 |
+
hub = AgentHub()
|
980 |
+
hub.register_agent("web_research", WebResearchAgent(hub))
|
981 |
+
hub.register_agent("web_scraper", WebScraperAgent(hub))
|
982 |
+
hub.register_agent("text_processing", TextProcessingAgent(hub))
|
983 |
+
hub.register_agent("data_analysis", DataAnalysisAgent(hub))
|
984 |
+
hub.register_agent("coding_assistant", CodingAssistantAgent(hub))
|
985 |
+
hub.register_agent("image_processing", ImageProcessingAgent(hub))
|
986 |
+
hub.register_agent("file_management", FileManagementAgent(hub))
|
987 |
+
return hub
|
988 |
+
|
989 |
+
def create_gradio_interface():
|
990 |
+
hub = create_agent_hub()
|
991 |
+
def process_request(request_type, input_data, extra_data=""):
|
992 |
+
try:
|
993 |
+
if request_type == "chain":
|
994 |
+
agent_sequence = [agent.strip() for agent in extra_data.split(",") if agent.strip()]
|
995 |
+
return hub.chain_of_thought(input_data, agent_sequence)
|
996 |
+
else:
|
997 |
+
agent = hub.get_agent(request_type)
|
998 |
+
if not agent:
|
999 |
+
return {"error": f"Unknown agent type: {request_type}"}
|
1000 |
+
return agent.process_task(input_data)
|
1001 |
+
except Exception as e:
|
1002 |
+
logger.error(f"Error processing request: {e}")
|
1003 |
+
return {"error": str(e)}
|
1004 |
+
with gr.Blocks(title="SmolAgents Toolbelt") as interface:
|
1005 |
+
gr.Markdown("# SmolAgents Toolbelt")
|
1006 |
+
gr.Markdown("A collection of specialized agents for various tasks with evolved logic :contentReference[oaicite:0]{index=0}.")
|
1007 |
+
with gr.Tabs():
|
1008 |
+
with gr.Tab("Single Agent"):
|
1009 |
+
agent_type = gr.Dropdown(
|
1010 |
+
choices=["web_research", "web_scraper", "text_processing", "data_analysis", "coding_assistant", "image_processing", "file_management"],
|
1011 |
+
label="Select Agent",
|
1012 |
+
value="web_research"
|
1013 |
+
)
|
1014 |
+
with gr.Row():
|
1015 |
+
input_text = gr.Textbox(label="Input", placeholder="Enter your request...")
|
1016 |
+
extra_input = gr.Textbox(label="Extra (e.g., image path or additional info)", placeholder="Optional extra input...")
|
1017 |
+
output_text = gr.JSON(label="Output")
|
1018 |
+
process_btn = gr.Button("Process")
|
1019 |
+
process_btn.click(fn=process_request, inputs=[agent_type, input_text, extra_input], outputs=output_text)
|
1020 |
+
with gr.Tab("Chain of Thought"):
|
1021 |
+
chain_input = gr.Textbox(label="Input", placeholder="Enter your request for the chain...")
|
1022 |
+
chain_sequence = gr.Textbox(label="Agent Sequence", placeholder="Comma-separated agent names (e.g., text_processing,data_analysis)")
|
1023 |
+
chain_output = gr.JSON(label="Chain Output")
|
1024 |
+
chain_type = gr.State("chain")
|
1025 |
+
chain_btn = gr.Button("Process Chain")
|
1026 |
+
chain_btn.click(fn=process_request, inputs=[chain_type, chain_input, chain_sequence], outputs=chain_output)
|
1027 |
+
with gr.Tab("Help"):
|
1028 |
+
gr.Markdown("""
|
1029 |
+
## Available Agents
|
1030 |
+
|
1031 |
+
- **Web Research Agent**: Searches Wikipedia for information.
|
1032 |
+
- **Web Scraper Agent**: Scrapes content from provided URLs.
|
1033 |
+
- **Text Processing Agent**: Analyzes and processes text.
|
1034 |
+
- **Data Analysis Agent**: Performs data analysis and visualization.
|
1035 |
+
- **Coding Assistant Agent**: Generates code snippets.
|
1036 |
+
- **Image Processing Agent**: Processes images based on instructions.
|
1037 |
+
- **File Management Agent**: Handles file creation, reading, listing, and deletion.
|
1038 |
+
|
1039 |
+
### Usage
|
1040 |
+
1. Select an agent (or choose 'Chain of Thought' for a sequence).
|
1041 |
+
2. Enter your request.
|
1042 |
+
3. For chains, provide a comma-separated list of agent IDs.
|
1043 |
+
""")
|
1044 |
+
return interface
|
1045 |
+
|
1046 |
+
if __name__ == "__main__":
|
1047 |
+
demo = create_gradio_interface()
|
1048 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|