File size: 5,307 Bytes
a1e5744
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# train_agent.py

from twisted.internet import reactor, defer, task
from agent import AutonomousWebAgent
import random
import logging
import sys
import time
import codecs

IS_COLAB = 'google.colab' in sys.modules


# Configure logging
if IS_COLAB:
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
else:
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        handlers=[
                            logging.FileHandler("agent_training.log", encoding='utf-8'),
                            logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer))
                        ])

logger = logging.getLogger(__name__)

# List of diverse queries
QUERIES = [
    "machine learning", "climate change", "renewable energy", "artificial intelligence",
    "quantum computing", "blockchain technology", "gene editing", "virtual reality",
    "space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things",
    "3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics",
    "data science", "neural networks", "cloud computing", "edge computing", "5G technology",
    "cryptocurrency", "natural language processing", "computer vision"
]

@defer.inlineCallbacks
def train_agent():
    # Updated state_size to 7 to match the feature extraction in AutonomousWebAgent
    state_size = 7  # word_count, link_count, header_count, semantic_similarity, image_count, script_count, css_count
    action_size = 3  # 0: Click Link, 1: Summarize, 2: RAG Generate
    num_options = 3  # 0: Search, 1: Summarize, 2: RAG Generate

    # Initialize the AutonomousWebAgent with the required arguments
    agent = AutonomousWebAgent(
        state_size=state_size,
        action_size=action_size,
        num_options=num_options,  # Added parameter for HRL
        hidden_size=64,
        learning_rate=0.001,
        gamma=0.99,
        epsilon=1.0,
        epsilon_decay=0.995,
        epsilon_min=0.01,
        knowledge_base_path='knowledge_base.json'
    )
    logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}")

    num_episodes = 10  # Adjust as needed
    total_training_reward = 0
    start_time = time.time()

    for episode in range(num_episodes):
        query = random.choice(QUERIES)
        logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}")
        episode_start_time = time.time()
        
        try:
            # Initiate the search process
            search_deferred = agent.search(query)
            search_deferred.addTimeout(300, reactor)  # 5-minute timeout
            total_reward = yield search_deferred
            total_training_reward += total_reward
            episode_duration = time.time() - episode_start_time
            logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds")
        except defer.TimeoutError:
            logger.error(f"Episode {episode + 1} timed out")
            total_reward = -1  # Assign a negative reward for timeout
            total_training_reward += total_reward
        except Exception as e:
            logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True)
            total_reward = -1  # Assign a negative reward for errors
            total_training_reward += total_reward

        # Update target models periodically
        if (episode + 1) % 10 == 0:
            logger.info(f"Updating target models at episode {episode + 1}")
            agent.update_worker_target_model()
            agent.update_manager_target_model()
            agent.manager.update_target_model()

        # Log overall progress
        progress = (episode + 1) / num_episodes
        elapsed_time = time.time() - start_time
        estimated_total_time = elapsed_time / progress if progress > 0 else 0
        remaining_time = estimated_total_time - elapsed_time
        logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s")

    total_training_time = time.time() - start_time
    average_reward = total_training_reward / num_episodes
    logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}")
    logger.info(f"Total training time: {total_training_time:.2f} seconds")
    logger.info("Saving models.")

    # Save both Worker and Manager models
    agent.save_worker_model("worker_model.pth")
    agent.save_manager_model("manager_model.pth")
    agent.save("web_agent_model.pth")  # Assuming this saves additional components if needed
    
    if reactor.running:
        logger.info("Stopping reactor")
        reactor.stop()

def main(is_colab=False):
    global IS_COLAB
    IS_COLAB = is_colab
    logger.info("Starting agent training")
    d = task.deferLater(reactor, 0, train_agent)
    d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True))
    d.addBoth(lambda _: reactor.stop())
    reactor.run()

if __name__ == "__main__":
    main(IS_COLAB)