CharlesZhang commited on
Commit
8f842da
·
1 Parent(s): 2ec5014

add reacher env and all mujoco envs now support COT, SPP, SELF-REFLEXION, EXE methods under L1&L3 setting.

Browse files
envs/__init__.py CHANGED
@@ -100,6 +100,11 @@ REGISTRY["RepresentedPong_basic_policies"] = [
100
 
101
  from .mujoco import invertedPendulum_translator, invertedPendulum_policies
102
  from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
 
 
 
 
 
103
  from .mujoco import hopper_translator, hopper_policies
104
  from .mujoco import walker2d_translator, walker2d_policies
105
 
@@ -109,16 +114,22 @@ from .mujoco import ant_translator, ant_policies
109
  REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
110
  REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
111
  REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
112
-
113
  REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
114
  REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
115
  REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
116
 
117
 
 
 
 
 
 
 
 
 
118
  REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
119
  REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
120
  REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
121
-
122
  REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
123
  REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
124
  REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
 
100
 
101
  from .mujoco import invertedPendulum_translator, invertedPendulum_policies
102
  from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
103
+
104
+ from .mujoco import swimmer_translator, swimmer_policies
105
+
106
+ from .mujoco import reacher_translator, reacher_policies
107
+
108
  from .mujoco import hopper_translator, hopper_policies
109
  from .mujoco import walker2d_translator, walker2d_policies
110
 
 
114
  REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
115
  REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
116
  REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
 
117
  REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
118
  REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
119
  REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
120
 
121
 
122
+ REGISTRY["swimmer_init_translator"] = swimmer_translator.GameDescriber
123
+ REGISTRY["swimmer_basic_translator"] = swimmer_translator.BasicStateSequenceTranslator
124
+ REGISTRY["swimmer_policies"] = [swimmer_policies.pseudo_random_policy, swimmer_policies.real_random_policy]
125
+
126
+ REGISTRY["reacher_init_translator"] = reacher_translator.GameDescriber
127
+ REGISTRY["reacher_basic_translator"] = reacher_translator.BasicStateSequenceTranslator
128
+ REGISTRY["reacher_policies"] = [reacher_policies.pseudo_random_policy, reacher_policies.real_random_policy]
129
+
130
  REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
131
  REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
132
  REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
 
133
  REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
134
  REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
135
  REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
envs/mujoco/reacher_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [2 * random.random() - 1 for i in range(2)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [2 * random.random() - 1 for i in range(2)]
envs/mujoco/reacher_translator.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''Reacher
2
+ Action Space Box(-1.0, 1.0, (2,), float32)
3
+
4
+ Observation Space Box(-inf, inf, (11,), float64)
5
+ '''
6
+ class BasicLevelTranslator:
7
+ def __init__(self):
8
+ pass
9
+
10
+ def translate(self, state):
11
+ (cos_angle_arm1, cos_angle_arm2, sin_angle_arm1, sin_angle_arm2,
12
+ target_x, target_y, angular_vel_arm1, angular_vel_arm2,
13
+ diff_x, diff_y, diff_z) = state
14
+
15
+ res = (f"Arm1 has a cosine angle of {cos_angle_arm1:.2f} and a sine angle of {sin_angle_arm1:.2f}. "\
16
+ f"Arm2 has a cosine angle of {cos_angle_arm2:.2f} and a sine angle of {sin_angle_arm2:.2f}. "\
17
+ f"Target position is at ({target_x:.2f}, {target_y:.2f}). "\
18
+ f"Arm1's angular velocity is {angular_vel_arm1:.2f} rad/s, and Arm2's is {angular_vel_arm2:.2f} rad/s. "\
19
+ f"Vector difference between fingertip and target is ({diff_x:.2f}, {diff_y:.2f}, {diff_z:.2f}).")
20
+ return res
21
+
22
+ class GameDescriber:
23
+ def __init__(self, args):
24
+ self.is_only_local_obs = args.is_only_local_obs == 1
25
+ self.max_episode_len = args.max_episode_len
26
+ self.action_desc_dict = {
27
+ }
28
+ self.reward_desc_dict = {
29
+ }
30
+
31
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
32
+ return ""
33
+
34
+ def translate_potential_next_state(self, state, action):
35
+ return ""
36
+
37
+ def describe_goal(self):
38
+ return "The goal is to control a two-jointed robot arm to move its end effector (fingertip) close to a randomly spawned target."
39
+
40
+ def describe_game(self):
41
+ return ("In the Reacher game, you control a two-jointed robot arm. The objective is to maneuver the arm's fingertip close to a target. "\
42
+ "The observation space includes the cosine and sine of the arm angles, coordinates of the target, angular velocities of the arms, "\
43
+ "and the vector from the fingertip to the target. The episode ends after 50 timesteps or if any state space value becomes non-finite. "\
44
+ "Rewards are given based on the distance of the fingertip from the target and the magnitude of actions applied.")
45
+
46
+ def describe_action(self):
47
+ return ("Your next move: \n Please provide two numerical values representing the torques applied at the two hinge joints. "\
48
+ "Each value should be within the range of [-1, 1].")
49
+
50
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
51
+ def translate(self, infos, is_current=False):
52
+ descriptions = []
53
+ if is_current:
54
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
55
+ return state_desc
56
+ for i, info in enumerate(infos):
57
+ assert 'state' in info, "info should contain state information"
58
+
59
+ state_desc = BasicLevelTranslator().translate(info['state'])
60
+ action_desc = ("Take Action: Apply Torque at Joint 1: {:.2f}, "
61
+ "Joint 2 Torque: {:.2f}"
62
+ ).format(info['action'][0], info['action'][1])
63
+
64
+ reward_desc = f"Result: Reward of {info['reward']:.2f}, "
65
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
66
+ descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
67
+ return descriptions
envs/mujoco/swimmer_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [2 * random.random() - 1 for i in range(2)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [2 * random.random() - 1 for i in range(2)]
envs/mujoco/swimmer_translator.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''Swimmer
2
+ Action Space Box(-1.0, 1.0, (2,), float32)
3
+
4
+ Observation Space Box(-inf, inf, (8,), float64)
5
+ '''
6
+
7
+ class BasicLevelTranslator:
8
+ def translate(self, state):
9
+ res = (
10
+ f"Angle of the front tip: {state[0]:.2f} rad\n"
11
+ f"Angle of the first rotor: {state[1]:.2f} rad\n"
12
+ f"Angle of the second rotor: {state[2]:.2f} rad\n"
13
+ f"Velocity of the tip along the x-axis: {state[3]:.2f} m/s\n"
14
+ f"Velocity of the tip along the y-axis: {state[4]:.2f} m/s\n"
15
+ f"Angular velocity of front tip: {state[5]:.2f} rad/s\n"
16
+ f"Angular velocity of the first rotor: {state[6]:.2f} rad/s\n"
17
+ f"Angular velocity of the second rotor: {state[7]:.2f} rad/s"
18
+ )
19
+ return res
20
+
21
+ class GameDescriber:
22
+
23
+ def __init__(self, args):
24
+ self.is_only_local_obs = args.is_only_local_obs == 1
25
+ self.max_episode_len = args.max_episode_len
26
+ self.action_desc_dict = {
27
+ }
28
+ self.reward_desc_dict = {
29
+ }
30
+
31
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
32
+ return ""
33
+
34
+ def translate_potential_next_state(self, state, action):
35
+ return ""
36
+
37
+ def describe_goal(self):
38
+ return (
39
+ "The goal in the Swimmer environment is to move as fast as possible towards the right "\
40
+ "by applying torque to the rotors and utilizing fluid friction. The swimmer consists of "\
41
+ "three or more segments connected by rotors, and the objective is to achieve efficient "\
42
+ "swimming motion."
43
+ )
44
+
45
+ def describe_game(self):
46
+ return (
47
+ "In the Swimmer environment, you control a swimmer consisting of three or more segments "\
48
+ "connected by rotors. Your goal is to make the swimmer move as fast as possible to the right "\
49
+ "in a two-dimensional pool. You can achieve this by applying torques to the rotors and utilizing "\
50
+ "fluid friction. The environment provides observations of the swimmer's angles, velocities, "\
51
+ "and angular velocities."
52
+ )
53
+
54
+ def describe_action(self):
55
+ return (
56
+ "Your next move: \nPlease provide a list of two numerical values, each within the range of [-1, 1], "\
57
+ "representing the torques to be applied to the two rotors of the swimmer. These torques will help "\
58
+ "control the swimmer's movement and achieve efficient swimming."
59
+ )
60
+
61
+
62
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
63
+ def translate(self, infos, is_current=False):
64
+ descriptions = []
65
+ if is_current:
66
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
67
+ return state_desc
68
+ for i, info in enumerate(infos):
69
+ assert 'state' in info, "info should contain state information"
70
+ state_desc = BasicLevelTranslator().translate(info['state'])
71
+ action_desc = (
72
+ "Torques Applied: "
73
+ f"Rotor 1: {info['action'][0]:.2f}, Rotor 2: {info['action'][1]:.2f}"
74
+ )
75
+ reward_desc = f"Result: Reward of {info['reward']:.2f}"
76
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
77
+ descriptions.append(
78
+ f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
79
+ )
80
+ return descriptions
record_reflexion.csv CHANGED
@@ -10,5 +10,10 @@ FrozenLake-v1,1,expert,200.0
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
13
- Ant-v4,1,expert,5000
 
 
 
 
 
14
 
 
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
13
+ Ant-v4,1,expert,5000.2
14
+ HalfCheetah-v4,1,expert,12138.8
15
+ Hopper-v4,1,expert,3542.2
16
+ Walker2d-v4,1,expert,5000.0
17
+ Swimmer-v4,1,expert,44.4
18
+ Reacher-v4,1,expert,-2.6
19
 
test_atari.sh → shell/test_atari.sh RENAMED
File without changes
shell/test_mujoco_ant.sh CHANGED
@@ -1,6 +1,18 @@
1
 
2
  # Ant-v4
3
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # REFLEXION
5
  python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
6
 
 
1
 
2
  # Ant-v4
3
 
4
+ # COT
5
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
6
+
7
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
8
+
9
+ # SPP
10
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
11
+
12
+ python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
13
+
14
+
15
+
16
  # REFLEXION
17
  python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
18
 
shell/test_mujoco_invertedDoublePendulum.sh CHANGED
@@ -1,5 +1,16 @@
1
  # InvertedDoublePendulum-v4
2
 
 
 
 
 
 
 
 
 
 
 
 
3
  # REFLEXION
4
  python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
 
 
1
  # InvertedDoublePendulum-v4
2
 
3
+ # COT
4
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
+
6
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+
8
+ # SPP
9
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
+
11
+ python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
+
13
+
14
  # REFLEXION
15
  python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
16
 
shell/test_mujoco_invertedPendulum.sh CHANGED
@@ -1,16 +1,25 @@
1
  # InvertedPendulum-v4
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  # REFLEXION
4
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
 
6
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
7
 
8
- python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
9
 
10
 
11
  # exe
12
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
13
 
14
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
15
-
16
- python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
1
  # InvertedPendulum-v4
2
 
3
+ # COT
4
+ python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
+
6
+ python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+
8
+ # SPP
9
+ python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
+
11
+ python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
+
13
+
14
+
15
  # REFLEXION
16
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
17
 
18
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
19
 
 
20
 
21
 
22
  # exe
23
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
24
 
25
  python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
 
 
shell/test_mujoco_reacher.sh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reacher-v4
2
+
3
+ # COT
4
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
+
6
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+
8
+ # SPP
9
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
+
11
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
+
13
+
14
+ # REFLEXION
15
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
16
+
17
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
18
+
19
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
20
+
21
+
22
+ # exe
23
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
24
+
25
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
26
+
27
+ python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_mujoco_swimmer.sh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Swimmer-v4
2
+
3
+ # COT
4
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
+
6
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+
8
+ # SPP
9
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
+
11
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
+
13
+
14
+ # REFLEXION
15
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
16
+
17
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
18
+
19
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
20
+
21
+
22
+ # exe
23
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
24
+
25
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
26
+
27
+ python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_mujoco_walker2d.sh CHANGED
@@ -1,5 +1,17 @@
1
  # Walker2d-v4
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  # REFLEXION
4
  python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
 
 
1
  # Walker2d-v4
2
 
3
+ # COT
4
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
+
6
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+
8
+ # SPP
9
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
+
11
+ python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
+
13
+
14
+
15
  # REFLEXION
16
  python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
17
 
test_reflexion.sh → shell/test_reflexion.sh RENAMED
File without changes