Spaces:
Runtime error
Runtime error
Commit
·
8f842da
1
Parent(s):
2ec5014
add reacher env and all mujoco envs now support COT, SPP, SELF-REFLEXION, EXE methods under L1&L3 setting.
Browse files- envs/__init__.py +13 -2
- envs/mujoco/reacher_policies.py +15 -0
- envs/mujoco/reacher_translator.py +67 -0
- envs/mujoco/swimmer_policies.py +15 -0
- envs/mujoco/swimmer_translator.py +80 -0
- record_reflexion.csv +6 -1
- test_atari.sh → shell/test_atari.sh +0 -0
- shell/test_mujoco_ant.sh +12 -0
- shell/test_mujoco_invertedDoublePendulum.sh +11 -0
- shell/test_mujoco_invertedPendulum.sh +12 -3
- shell/test_mujoco_reacher.sh +27 -0
- shell/test_mujoco_swimmer.sh +27 -0
- shell/test_mujoco_walker2d.sh +12 -0
- test_reflexion.sh → shell/test_reflexion.sh +0 -0
envs/__init__.py
CHANGED
@@ -100,6 +100,11 @@ REGISTRY["RepresentedPong_basic_policies"] = [
|
|
100 |
|
101 |
from .mujoco import invertedPendulum_translator, invertedPendulum_policies
|
102 |
from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
|
|
|
|
|
|
|
|
|
|
|
103 |
from .mujoco import hopper_translator, hopper_policies
|
104 |
from .mujoco import walker2d_translator, walker2d_policies
|
105 |
|
@@ -109,16 +114,22 @@ from .mujoco import ant_translator, ant_policies
|
|
109 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
110 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
111 |
REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
|
112 |
-
|
113 |
REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
|
114 |
REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
|
115 |
REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
|
116 |
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
|
119 |
REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
|
120 |
REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
|
121 |
-
|
122 |
REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
|
123 |
REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
|
124 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
|
|
100 |
|
101 |
from .mujoco import invertedPendulum_translator, invertedPendulum_policies
|
102 |
from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
|
103 |
+
|
104 |
+
from .mujoco import swimmer_translator, swimmer_policies
|
105 |
+
|
106 |
+
from .mujoco import reacher_translator, reacher_policies
|
107 |
+
|
108 |
from .mujoco import hopper_translator, hopper_policies
|
109 |
from .mujoco import walker2d_translator, walker2d_policies
|
110 |
|
|
|
114 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
115 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
116 |
REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
|
|
|
117 |
REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
|
118 |
REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
|
119 |
REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
|
120 |
|
121 |
|
122 |
+
REGISTRY["swimmer_init_translator"] = swimmer_translator.GameDescriber
|
123 |
+
REGISTRY["swimmer_basic_translator"] = swimmer_translator.BasicStateSequenceTranslator
|
124 |
+
REGISTRY["swimmer_policies"] = [swimmer_policies.pseudo_random_policy, swimmer_policies.real_random_policy]
|
125 |
+
|
126 |
+
REGISTRY["reacher_init_translator"] = reacher_translator.GameDescriber
|
127 |
+
REGISTRY["reacher_basic_translator"] = reacher_translator.BasicStateSequenceTranslator
|
128 |
+
REGISTRY["reacher_policies"] = [reacher_policies.pseudo_random_policy, reacher_policies.real_random_policy]
|
129 |
+
|
130 |
REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
|
131 |
REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
|
132 |
REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
|
|
|
133 |
REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
|
134 |
REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
|
135 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
envs/mujoco/reacher_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [2 * random.random() - 1 for i in range(2)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [2 * random.random() - 1 for i in range(2)]
|
envs/mujoco/reacher_translator.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''Reacher
|
2 |
+
Action Space Box(-1.0, 1.0, (2,), float32)
|
3 |
+
|
4 |
+
Observation Space Box(-inf, inf, (11,), float64)
|
5 |
+
'''
|
6 |
+
class BasicLevelTranslator:
|
7 |
+
def __init__(self):
|
8 |
+
pass
|
9 |
+
|
10 |
+
def translate(self, state):
|
11 |
+
(cos_angle_arm1, cos_angle_arm2, sin_angle_arm1, sin_angle_arm2,
|
12 |
+
target_x, target_y, angular_vel_arm1, angular_vel_arm2,
|
13 |
+
diff_x, diff_y, diff_z) = state
|
14 |
+
|
15 |
+
res = (f"Arm1 has a cosine angle of {cos_angle_arm1:.2f} and a sine angle of {sin_angle_arm1:.2f}. "\
|
16 |
+
f"Arm2 has a cosine angle of {cos_angle_arm2:.2f} and a sine angle of {sin_angle_arm2:.2f}. "\
|
17 |
+
f"Target position is at ({target_x:.2f}, {target_y:.2f}). "\
|
18 |
+
f"Arm1's angular velocity is {angular_vel_arm1:.2f} rad/s, and Arm2's is {angular_vel_arm2:.2f} rad/s. "\
|
19 |
+
f"Vector difference between fingertip and target is ({diff_x:.2f}, {diff_y:.2f}, {diff_z:.2f}).")
|
20 |
+
return res
|
21 |
+
|
22 |
+
class GameDescriber:
|
23 |
+
def __init__(self, args):
|
24 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
25 |
+
self.max_episode_len = args.max_episode_len
|
26 |
+
self.action_desc_dict = {
|
27 |
+
}
|
28 |
+
self.reward_desc_dict = {
|
29 |
+
}
|
30 |
+
|
31 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
32 |
+
return ""
|
33 |
+
|
34 |
+
def translate_potential_next_state(self, state, action):
|
35 |
+
return ""
|
36 |
+
|
37 |
+
def describe_goal(self):
|
38 |
+
return "The goal is to control a two-jointed robot arm to move its end effector (fingertip) close to a randomly spawned target."
|
39 |
+
|
40 |
+
def describe_game(self):
|
41 |
+
return ("In the Reacher game, you control a two-jointed robot arm. The objective is to maneuver the arm's fingertip close to a target. "\
|
42 |
+
"The observation space includes the cosine and sine of the arm angles, coordinates of the target, angular velocities of the arms, "\
|
43 |
+
"and the vector from the fingertip to the target. The episode ends after 50 timesteps or if any state space value becomes non-finite. "\
|
44 |
+
"Rewards are given based on the distance of the fingertip from the target and the magnitude of actions applied.")
|
45 |
+
|
46 |
+
def describe_action(self):
|
47 |
+
return ("Your next move: \n Please provide two numerical values representing the torques applied at the two hinge joints. "\
|
48 |
+
"Each value should be within the range of [-1, 1].")
|
49 |
+
|
50 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
51 |
+
def translate(self, infos, is_current=False):
|
52 |
+
descriptions = []
|
53 |
+
if is_current:
|
54 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
55 |
+
return state_desc
|
56 |
+
for i, info in enumerate(infos):
|
57 |
+
assert 'state' in info, "info should contain state information"
|
58 |
+
|
59 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
60 |
+
action_desc = ("Take Action: Apply Torque at Joint 1: {:.2f}, "
|
61 |
+
"Joint 2 Torque: {:.2f}"
|
62 |
+
).format(info['action'][0], info['action'][1])
|
63 |
+
|
64 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}, "
|
65 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
66 |
+
descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
|
67 |
+
return descriptions
|
envs/mujoco/swimmer_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [2 * random.random() - 1 for i in range(2)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [2 * random.random() - 1 for i in range(2)]
|
envs/mujoco/swimmer_translator.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''Swimmer
|
2 |
+
Action Space Box(-1.0, 1.0, (2,), float32)
|
3 |
+
|
4 |
+
Observation Space Box(-inf, inf, (8,), float64)
|
5 |
+
'''
|
6 |
+
|
7 |
+
class BasicLevelTranslator:
|
8 |
+
def translate(self, state):
|
9 |
+
res = (
|
10 |
+
f"Angle of the front tip: {state[0]:.2f} rad\n"
|
11 |
+
f"Angle of the first rotor: {state[1]:.2f} rad\n"
|
12 |
+
f"Angle of the second rotor: {state[2]:.2f} rad\n"
|
13 |
+
f"Velocity of the tip along the x-axis: {state[3]:.2f} m/s\n"
|
14 |
+
f"Velocity of the tip along the y-axis: {state[4]:.2f} m/s\n"
|
15 |
+
f"Angular velocity of front tip: {state[5]:.2f} rad/s\n"
|
16 |
+
f"Angular velocity of the first rotor: {state[6]:.2f} rad/s\n"
|
17 |
+
f"Angular velocity of the second rotor: {state[7]:.2f} rad/s"
|
18 |
+
)
|
19 |
+
return res
|
20 |
+
|
21 |
+
class GameDescriber:
|
22 |
+
|
23 |
+
def __init__(self, args):
|
24 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
25 |
+
self.max_episode_len = args.max_episode_len
|
26 |
+
self.action_desc_dict = {
|
27 |
+
}
|
28 |
+
self.reward_desc_dict = {
|
29 |
+
}
|
30 |
+
|
31 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
32 |
+
return ""
|
33 |
+
|
34 |
+
def translate_potential_next_state(self, state, action):
|
35 |
+
return ""
|
36 |
+
|
37 |
+
def describe_goal(self):
|
38 |
+
return (
|
39 |
+
"The goal in the Swimmer environment is to move as fast as possible towards the right "\
|
40 |
+
"by applying torque to the rotors and utilizing fluid friction. The swimmer consists of "\
|
41 |
+
"three or more segments connected by rotors, and the objective is to achieve efficient "\
|
42 |
+
"swimming motion."
|
43 |
+
)
|
44 |
+
|
45 |
+
def describe_game(self):
|
46 |
+
return (
|
47 |
+
"In the Swimmer environment, you control a swimmer consisting of three or more segments "\
|
48 |
+
"connected by rotors. Your goal is to make the swimmer move as fast as possible to the right "\
|
49 |
+
"in a two-dimensional pool. You can achieve this by applying torques to the rotors and utilizing "\
|
50 |
+
"fluid friction. The environment provides observations of the swimmer's angles, velocities, "\
|
51 |
+
"and angular velocities."
|
52 |
+
)
|
53 |
+
|
54 |
+
def describe_action(self):
|
55 |
+
return (
|
56 |
+
"Your next move: \nPlease provide a list of two numerical values, each within the range of [-1, 1], "\
|
57 |
+
"representing the torques to be applied to the two rotors of the swimmer. These torques will help "\
|
58 |
+
"control the swimmer's movement and achieve efficient swimming."
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
63 |
+
def translate(self, infos, is_current=False):
|
64 |
+
descriptions = []
|
65 |
+
if is_current:
|
66 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
67 |
+
return state_desc
|
68 |
+
for i, info in enumerate(infos):
|
69 |
+
assert 'state' in info, "info should contain state information"
|
70 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
71 |
+
action_desc = (
|
72 |
+
"Torques Applied: "
|
73 |
+
f"Rotor 1: {info['action'][0]:.2f}, Rotor 2: {info['action'][1]:.2f}"
|
74 |
+
)
|
75 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}"
|
76 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
77 |
+
descriptions.append(
|
78 |
+
f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
|
79 |
+
)
|
80 |
+
return descriptions
|
record_reflexion.csv
CHANGED
@@ -10,5 +10,10 @@ FrozenLake-v1,1,expert,200.0
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
13 |
-
Ant-v4,1,expert,5000
|
|
|
|
|
|
|
|
|
|
|
14 |
|
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
13 |
+
Ant-v4,1,expert,5000.2
|
14 |
+
HalfCheetah-v4,1,expert,12138.8
|
15 |
+
Hopper-v4,1,expert,3542.2
|
16 |
+
Walker2d-v4,1,expert,5000.0
|
17 |
+
Swimmer-v4,1,expert,44.4
|
18 |
+
Reacher-v4,1,expert,-2.6
|
19 |
|
test_atari.sh → shell/test_atari.sh
RENAMED
File without changes
|
shell/test_mujoco_ant.sh
CHANGED
@@ -1,6 +1,18 @@
|
|
1 |
|
2 |
# Ant-v4
|
3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
# REFLEXION
|
5 |
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
6 |
|
|
|
1 |
|
2 |
# Ant-v4
|
3 |
|
4 |
+
# COT
|
5 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
6 |
+
|
7 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
8 |
+
|
9 |
+
# SPP
|
10 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
11 |
+
|
12 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
# REFLEXION
|
17 |
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
18 |
|
shell/test_mujoco_invertedDoublePendulum.sh
CHANGED
@@ -1,5 +1,16 @@
|
|
1 |
# InvertedDoublePendulum-v4
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# REFLEXION
|
4 |
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
|
|
|
1 |
# InvertedDoublePendulum-v4
|
2 |
|
3 |
+
# COT
|
4 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
|
8 |
+
# SPP
|
9 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
+
|
11 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
+
|
13 |
+
|
14 |
# REFLEXION
|
15 |
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
16 |
|
shell/test_mujoco_invertedPendulum.sh
CHANGED
@@ -1,16 +1,25 @@
|
|
1 |
# InvertedPendulum-v4
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# REFLEXION
|
4 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
|
6 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
7 |
|
8 |
-
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
9 |
|
10 |
|
11 |
# exe
|
12 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
13 |
|
14 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
15 |
-
|
16 |
-
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
|
|
1 |
# InvertedPendulum-v4
|
2 |
|
3 |
+
# COT
|
4 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
|
8 |
+
# SPP
|
9 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
+
|
11 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
# REFLEXION
|
16 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
17 |
|
18 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
19 |
|
|
|
20 |
|
21 |
|
22 |
# exe
|
23 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
24 |
|
25 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
|
|
|
shell/test_mujoco_reacher.sh
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Reacher-v4
|
2 |
+
|
3 |
+
# COT
|
4 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
|
8 |
+
# SPP
|
9 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
+
|
11 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
+
|
13 |
+
|
14 |
+
# REFLEXION
|
15 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
16 |
+
|
17 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
18 |
+
|
19 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
20 |
+
|
21 |
+
|
22 |
+
# exe
|
23 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
24 |
+
|
25 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
26 |
+
|
27 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_swimmer.sh
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Swimmer-v4
|
2 |
+
|
3 |
+
# COT
|
4 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
|
8 |
+
# SPP
|
9 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
+
|
11 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
+
|
13 |
+
|
14 |
+
# REFLEXION
|
15 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
16 |
+
|
17 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
18 |
+
|
19 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
20 |
+
|
21 |
+
|
22 |
+
# exe
|
23 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
24 |
+
|
25 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
26 |
+
|
27 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_walker2d.sh
CHANGED
@@ -1,5 +1,17 @@
|
|
1 |
# Walker2d-v4
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# REFLEXION
|
4 |
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
|
|
|
1 |
# Walker2d-v4
|
2 |
|
3 |
+
# COT
|
4 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
|
8 |
+
# SPP
|
9 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
+
|
11 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
# REFLEXION
|
16 |
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
17 |
|
test_reflexion.sh → shell/test_reflexion.sh
RENAMED
File without changes
|