|
|
|
|
|
from twisted.internet import reactor, defer, task |
|
from agent import AutonomousWebAgent |
|
import random |
|
import logging |
|
import sys |
|
import time |
|
import codecs |
|
|
|
IS_COLAB = 'google.colab' in sys.modules |
|
|
|
|
|
|
|
if IS_COLAB: |
|
logging.basicConfig(level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
else: |
|
logging.basicConfig(level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
handlers=[ |
|
logging.FileHandler("agent_training.log", encoding='utf-8'), |
|
logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer)) |
|
]) |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
QUERIES = [ |
|
"machine learning", "climate change", "renewable energy", "artificial intelligence", |
|
"quantum computing", "blockchain technology", "gene editing", "virtual reality", |
|
"space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things", |
|
"3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics", |
|
"data science", "neural networks", "cloud computing", "edge computing", "5G technology", |
|
"cryptocurrency", "natural language processing", "computer vision" |
|
] |
|
|
|
@defer.inlineCallbacks |
|
def train_agent(): |
|
|
|
state_size = 7 |
|
action_size = 3 |
|
num_options = 3 |
|
|
|
|
|
agent = AutonomousWebAgent( |
|
state_size=state_size, |
|
action_size=action_size, |
|
num_options=num_options, |
|
hidden_size=64, |
|
learning_rate=0.001, |
|
gamma=0.99, |
|
epsilon=1.0, |
|
epsilon_decay=0.995, |
|
epsilon_min=0.01, |
|
knowledge_base_path='knowledge_base.json' |
|
) |
|
logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}") |
|
|
|
num_episodes = 10 |
|
total_training_reward = 0 |
|
start_time = time.time() |
|
|
|
for episode in range(num_episodes): |
|
query = random.choice(QUERIES) |
|
logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}") |
|
episode_start_time = time.time() |
|
|
|
try: |
|
|
|
search_deferred = agent.search(query) |
|
search_deferred.addTimeout(300, reactor) |
|
total_reward = yield search_deferred |
|
total_training_reward += total_reward |
|
episode_duration = time.time() - episode_start_time |
|
logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds") |
|
except defer.TimeoutError: |
|
logger.error(f"Episode {episode + 1} timed out") |
|
total_reward = -1 |
|
total_training_reward += total_reward |
|
except Exception as e: |
|
logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True) |
|
total_reward = -1 |
|
total_training_reward += total_reward |
|
|
|
|
|
if (episode + 1) % 10 == 0: |
|
logger.info(f"Updating target models at episode {episode + 1}") |
|
agent.update_worker_target_model() |
|
agent.update_manager_target_model() |
|
agent.manager.update_target_model() |
|
|
|
|
|
progress = (episode + 1) / num_episodes |
|
elapsed_time = time.time() - start_time |
|
estimated_total_time = elapsed_time / progress if progress > 0 else 0 |
|
remaining_time = estimated_total_time - elapsed_time |
|
logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s") |
|
|
|
total_training_time = time.time() - start_time |
|
average_reward = total_training_reward / num_episodes |
|
logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}") |
|
logger.info(f"Total training time: {total_training_time:.2f} seconds") |
|
logger.info("Saving models.") |
|
|
|
|
|
agent.save_worker_model("worker_model.pth") |
|
agent.save_manager_model("manager_model.pth") |
|
agent.save("web_agent_model.pth") |
|
|
|
if reactor.running: |
|
logger.info("Stopping reactor") |
|
reactor.stop() |
|
|
|
def main(is_colab=False): |
|
global IS_COLAB |
|
IS_COLAB = is_colab |
|
logger.info("Starting agent training") |
|
d = task.deferLater(reactor, 0, train_agent) |
|
d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True)) |
|
d.addBoth(lambda _: reactor.stop()) |
|
reactor.run() |
|
|
|
if __name__ == "__main__": |
|
main(IS_COLAB) |
|
|