Spaces:
Runtime error
Runtime error
resolve conflict for merge
Browse files- .gitignore +2 -1
- README.md +30 -0
- deciders/act.py +13 -10
- deciders/utils.py +1 -0
- envs/__init__.py +84 -0
- envs/atari/montezumarevenge_policies.py +142 -0
- envs/atari/montezumarevenge_translator.py +136 -0
- envs/atari/mspacman_policies.py +88 -0
- envs/atari/mspacman_translator.py +80 -0
- envs/mujoco/ant_translator.py +1 -2
- record_reflexion.csv +2 -0
- shell/test_atari.sh +5 -1
.gitignore
CHANGED
@@ -185,4 +185,5 @@ main_test*.sh
|
|
185 |
main_jarvis.sh
|
186 |
test*.py
|
187 |
*.zip
|
188 |
-
test_
|
|
|
|
185 |
main_jarvis.sh
|
186 |
test*.py
|
187 |
*.zip
|
188 |
+
test_
|
189 |
+
*.ipynb
|
README.md
CHANGED
@@ -86,6 +86,36 @@ pip install cython==0.29.37
|
|
86 |
3. install gym[mujoco]
|
87 |
`pip install gym[mujoco]`
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
### support new env
|
90 |
We also support other new env using Gym format, for new env you need to
|
91 |
1. Translate your Gym env to TextGym env, make `<your_env>_translator.py, <your_env>policies.py`, put them into `./envs/`, and add your env in `./envs/__init__.py`.
|
|
|
86 |
3. install gym[mujoco]
|
87 |
`pip install gym[mujoco]`
|
88 |
|
89 |
+
### Import Atari ROMs
|
90 |
+
|
91 |
+
If you encounter the error `Unable to find game "[env_name]"` when running a script for Atari environments, it may be due to the absence of Atari ROMs in the `atari_py` package since version 0.2.7. To resolve this issue, you can manually download the ROMs and add them to Gym's registry.
|
92 |
+
|
93 |
+
``` shell
|
94 |
+
pip install gym[accept-rom-license]
|
95 |
+
AutoROM --accept-license
|
96 |
+
```
|
97 |
+
|
98 |
+
Test with the following code
|
99 |
+
|
100 |
+
```python
|
101 |
+
import gym
|
102 |
+
from atariari.benchmark.wrapper import AtariARIWrapper
|
103 |
+
|
104 |
+
# Initialize the environment
|
105 |
+
env = AtariARIWrapper(gym.make("MsPacmanNoFrameskip-v4"))
|
106 |
+
obs = env.reset()
|
107 |
+
|
108 |
+
# Perform a single step in the environment
|
109 |
+
obs, reward, done, info = env.step(1)
|
110 |
+
|
111 |
+
# Check the information provided by the environment (including labels and scores)
|
112 |
+
print(info["labels"])
|
113 |
+
```
|
114 |
+
|
115 |
+
If everything runs smoothly, you have successfully imported the Atari ROMs and set up your environment.
|
116 |
+
|
117 |
+
Reference: [StackOverflow answer](https://stackoverflow.com/a/68143504/38626)
|
118 |
+
|
119 |
### support new env
|
120 |
We also support other new env using Gym format, for new env you need to
|
121 |
1. Translate your Gym env to TextGym env, make `<your_env>_translator.py, <your_env>policies.py`, put them into `./envs/`, and add your env in `./envs/__init__.py`.
|
deciders/act.py
CHANGED
@@ -11,7 +11,7 @@ from memory.env_history import EnvironmentHistory
|
|
11 |
import tiktoken
|
12 |
import json
|
13 |
import re
|
14 |
-
from .utils import run_chain
|
15 |
from gym.spaces import Discrete
|
16 |
|
17 |
class RandomAct():
|
@@ -156,12 +156,15 @@ class NaiveAct(gpt):
|
|
156 |
prompt = f"{game_description}\n{goal_description}\n{fewshot_examples}\nCurrent {state_description}\n{action_description} "
|
157 |
prompt += "Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Your Action is: "
|
158 |
print(f"prompt is {prompt}")
|
159 |
-
res =
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
165 |
return prompt, res
|
166 |
|
167 |
def _add_history_before_action(self, game_description, goal_description, state_description):
|
@@ -210,8 +213,8 @@ class NaiveAct(gpt):
|
|
210 |
my_mem += f"{self.env_history.get_histories(self.mem_num)}"
|
211 |
|
212 |
|
213 |
-
prompt,
|
214 |
-
action_str =
|
215 |
print(f'my anwser is {action_str}')
|
216 |
action = self.parser.parse(response).action
|
217 |
self._add_history_after_action(action)
|
@@ -219,7 +222,7 @@ class NaiveAct(gpt):
|
|
219 |
self.logger.info(f'The optimal action is: {action}.')
|
220 |
if env_info.get('history'):
|
221 |
self.logger.info(f'History: {history_to_str(env_info["history"])}')
|
222 |
-
return action, prompt,
|
223 |
|
224 |
def _read_mem(self, ):
|
225 |
memory = self.memory
|
|
|
11 |
import tiktoken
|
12 |
import json
|
13 |
import re
|
14 |
+
from .utils import run_chain, get_completion, get_chat
|
15 |
from gym.spaces import Discrete
|
16 |
|
17 |
class RandomAct():
|
|
|
156 |
prompt = f"{game_description}\n{goal_description}\n{fewshot_examples}\nCurrent {state_description}\n{action_description} "
|
157 |
prompt += "Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Your Action is: "
|
158 |
print(f"prompt is {prompt}")
|
159 |
+
# res = get_chat(prompt, self.args.api_type, self.args.gpt_version, self.temperature, self.max_tokens)
|
160 |
+
res = get_chat(prompt, api_type=self.args.api_type, model=self.args.gpt_version, engine=self.args.gpt_version, temperature=self.temperature, max_tokens=self.max_tokens)
|
161 |
+
# openai.ChatCompletion.create(
|
162 |
+
# engine=self.args.gpt_version,
|
163 |
+
# # model=self.args.gpt_version,
|
164 |
+
# prompt=prompt,
|
165 |
+
# temperature=self.temperature,
|
166 |
+
# max_tokens=self.max_tokens,
|
167 |
+
# )
|
168 |
return prompt, res
|
169 |
|
170 |
def _add_history_before_action(self, game_description, goal_description, state_description):
|
|
|
213 |
my_mem += f"{self.env_history.get_histories(self.mem_num)}"
|
214 |
|
215 |
|
216 |
+
prompt, response = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
|
217 |
+
action_str = response
|
218 |
print(f'my anwser is {action_str}')
|
219 |
action = self.parser.parse(response).action
|
220 |
self._add_history_after_action(action)
|
|
|
222 |
self.logger.info(f'The optimal action is: {action}.')
|
223 |
if env_info.get('history'):
|
224 |
self.logger.info(f'History: {history_to_str(env_info["history"])}')
|
225 |
+
return action, prompt, response, 0, 0
|
226 |
|
227 |
def _read_mem(self, ):
|
228 |
memory = self.memory
|
deciders/utils.py
CHANGED
@@ -54,6 +54,7 @@ def get_completion(prompt: str, api_type: str = "azure", engine: str = "gpt-35-t
|
|
54 |
temperature=temperature,
|
55 |
# request_timeout = 1
|
56 |
)
|
|
|
57 |
return response.choices[0]["message"]["content"]
|
58 |
|
59 |
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
|
|
54 |
temperature=temperature,
|
55 |
# request_timeout = 1
|
56 |
)
|
57 |
+
import pdb; pdb.set_trace()
|
58 |
return response.choices[0]["message"]["content"]
|
59 |
|
60 |
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
envs/__init__.py
CHANGED
@@ -14,6 +14,8 @@ from .toy_text import frozenlake_translator, frozenlake_policies
|
|
14 |
|
15 |
from .atari import register_environments
|
16 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
|
|
|
|
17 |
register_environments()
|
18 |
|
19 |
|
@@ -95,6 +97,88 @@ REGISTRY["RepresentedPong_basic_policies"] = [
|
|
95 |
Pong_policies.dedicated_6_policy,
|
96 |
]
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
## For mujoco env
|
99 |
|
100 |
|
|
|
14 |
|
15 |
from .atari import register_environments
|
16 |
from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
|
17 |
+
from .atari import mspacman_policies, mspacman_translator
|
18 |
+
from .atari import montezumarevenge_policies, montezumarevenge_translator
|
19 |
register_environments()
|
20 |
|
21 |
|
|
|
97 |
Pong_policies.dedicated_6_policy,
|
98 |
]
|
99 |
|
100 |
+
REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
|
101 |
+
REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
|
102 |
+
REGISTRY["RepresentedMsPacman_basic_policies"] = [
|
103 |
+
mspacman_policies.real_random_policy,
|
104 |
+
mspacman_policies.pseudo_random_policy,
|
105 |
+
mspacman_policies.dedicated_1_policy,
|
106 |
+
mspacman_policies.dedicated_2_policy,
|
107 |
+
mspacman_policies.dedicated_3_policy,
|
108 |
+
mspacman_policies.dedicated_4_policy,
|
109 |
+
mspacman_policies.dedicated_5_policy,
|
110 |
+
mspacman_policies.dedicated_6_policy,
|
111 |
+
mspacman_policies.dedicated_7_policy,
|
112 |
+
mspacman_policies.dedicated_8_policy,
|
113 |
+
mspacman_policies.dedicated_9_policy,
|
114 |
+
]
|
115 |
+
|
116 |
+
REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
|
117 |
+
REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
|
118 |
+
REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
|
119 |
+
montezumarevenge_policies.real_random_policy,
|
120 |
+
montezumarevenge_policies.pseudo_random_policy,
|
121 |
+
montezumarevenge_policies.dedicated_1_policy,
|
122 |
+
montezumarevenge_policies.dedicated_2_policy,
|
123 |
+
montezumarevenge_policies.dedicated_3_policy,
|
124 |
+
montezumarevenge_policies.dedicated_4_policy,
|
125 |
+
montezumarevenge_policies.dedicated_5_policy,
|
126 |
+
montezumarevenge_policies.dedicated_6_policy,
|
127 |
+
montezumarevenge_policies.dedicated_7_policy,
|
128 |
+
montezumarevenge_policies.dedicated_8_policy,
|
129 |
+
montezumarevenge_policies.dedicated_9_policy,
|
130 |
+
montezumarevenge_policies.dedicated_10_policy,
|
131 |
+
montezumarevenge_policies.dedicated_11_policy,
|
132 |
+
montezumarevenge_policies.dedicated_12_policy,
|
133 |
+
montezumarevenge_policies.dedicated_13_policy,
|
134 |
+
montezumarevenge_policies.dedicated_14_policy,
|
135 |
+
montezumarevenge_policies.dedicated_15_policy,
|
136 |
+
montezumarevenge_policies.dedicated_16_policy,
|
137 |
+
montezumarevenge_policies.dedicated_17_policy,
|
138 |
+
montezumarevenge_policies.dedicated_18_policy,
|
139 |
+
]
|
140 |
+
|
141 |
+
REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
|
142 |
+
REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
|
143 |
+
REGISTRY["RepresentedMsPacman_basic_policies"] = [
|
144 |
+
mspacman_policies.real_random_policy,
|
145 |
+
mspacman_policies.pseudo_random_policy,
|
146 |
+
mspacman_policies.dedicated_1_policy,
|
147 |
+
mspacman_policies.dedicated_2_policy,
|
148 |
+
mspacman_policies.dedicated_3_policy,
|
149 |
+
mspacman_policies.dedicated_4_policy,
|
150 |
+
mspacman_policies.dedicated_5_policy,
|
151 |
+
mspacman_policies.dedicated_6_policy,
|
152 |
+
mspacman_policies.dedicated_7_policy,
|
153 |
+
mspacman_policies.dedicated_8_policy,
|
154 |
+
mspacman_policies.dedicated_9_policy,
|
155 |
+
]
|
156 |
+
|
157 |
+
REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
|
158 |
+
REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
|
159 |
+
REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
|
160 |
+
montezumarevenge_policies.real_random_policy,
|
161 |
+
montezumarevenge_policies.pseudo_random_policy,
|
162 |
+
montezumarevenge_policies.dedicated_1_policy,
|
163 |
+
montezumarevenge_policies.dedicated_2_policy,
|
164 |
+
montezumarevenge_policies.dedicated_3_policy,
|
165 |
+
montezumarevenge_policies.dedicated_4_policy,
|
166 |
+
montezumarevenge_policies.dedicated_5_policy,
|
167 |
+
montezumarevenge_policies.dedicated_6_policy,
|
168 |
+
montezumarevenge_policies.dedicated_7_policy,
|
169 |
+
montezumarevenge_policies.dedicated_8_policy,
|
170 |
+
montezumarevenge_policies.dedicated_9_policy,
|
171 |
+
montezumarevenge_policies.dedicated_10_policy,
|
172 |
+
montezumarevenge_policies.dedicated_11_policy,
|
173 |
+
montezumarevenge_policies.dedicated_12_policy,
|
174 |
+
montezumarevenge_policies.dedicated_13_policy,
|
175 |
+
montezumarevenge_policies.dedicated_14_policy,
|
176 |
+
montezumarevenge_policies.dedicated_15_policy,
|
177 |
+
montezumarevenge_policies.dedicated_16_policy,
|
178 |
+
montezumarevenge_policies.dedicated_17_policy,
|
179 |
+
montezumarevenge_policies.dedicated_18_policy,
|
180 |
+
]
|
181 |
+
|
182 |
## For mujoco env
|
183 |
|
184 |
|
envs/atari/montezumarevenge_policies.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def dedicated_1_policy(state, pre_action=1):
|
5 |
+
def get_description():
|
6 |
+
return "Always select action 1 which does NOOP (no operation)"
|
7 |
+
dedicated_1_policy.description = get_description()
|
8 |
+
return 1
|
9 |
+
|
10 |
+
|
11 |
+
def dedicated_2_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Always select action 2 which hits the enemy"
|
14 |
+
dedicated_1_policy.description = get_description()
|
15 |
+
return 2
|
16 |
+
|
17 |
+
|
18 |
+
def dedicated_3_policy(state, pre_action=1):
|
19 |
+
def get_description():
|
20 |
+
return "Always select action 3 which moves the agent up"
|
21 |
+
dedicated_3_policy.description = get_description()
|
22 |
+
return 3
|
23 |
+
|
24 |
+
|
25 |
+
def dedicated_4_policy(state, pre_action=1):
|
26 |
+
def get_description():
|
27 |
+
return "Always select action 4 which moves the agent right"
|
28 |
+
dedicated_4_policy.description = get_description()
|
29 |
+
return 4
|
30 |
+
|
31 |
+
|
32 |
+
def dedicated_5_policy(state, pre_action=1):
|
33 |
+
def get_description():
|
34 |
+
return "Always select action 5 which moves the agent left"
|
35 |
+
dedicated_5_policy.description = get_description()
|
36 |
+
return 5
|
37 |
+
|
38 |
+
|
39 |
+
def pseudo_random_policy(state, pre_action):
|
40 |
+
def get_description():
|
41 |
+
return "Select an action among 1 to 18 alternatively"
|
42 |
+
pseudo_random_policy.description = get_description()
|
43 |
+
return pre_action % 18 + 1
|
44 |
+
|
45 |
+
|
46 |
+
def real_random_policy(state, pre_action=1):
|
47 |
+
def get_description():
|
48 |
+
return "Select action with a random policy"
|
49 |
+
real_random_policy.description = get_description()
|
50 |
+
return np.random.choice(range(0, 18)) + 1
|
51 |
+
|
52 |
+
|
53 |
+
# Complete set of dedicated action policies
|
54 |
+
def dedicated_6_policy(state, pre_action=1):
|
55 |
+
def get_description():
|
56 |
+
return "Always select action 6 which moves the agent down"
|
57 |
+
dedicated_6_policy.description = get_description()
|
58 |
+
return 6
|
59 |
+
|
60 |
+
|
61 |
+
def dedicated_7_policy(state, pre_action=1):
|
62 |
+
def get_description():
|
63 |
+
return "Always select action 7 which moves the agent up and to the right"
|
64 |
+
dedicated_7_policy.description = get_description()
|
65 |
+
return 7
|
66 |
+
|
67 |
+
|
68 |
+
def dedicated_8_policy(state, pre_action=1):
|
69 |
+
def get_description():
|
70 |
+
return "Always select action 8 which moves the agent up and to the left"
|
71 |
+
dedicated_8_policy.description = get_description()
|
72 |
+
return 8
|
73 |
+
|
74 |
+
|
75 |
+
def dedicated_9_policy(state, pre_action=1):
|
76 |
+
def get_description():
|
77 |
+
return "Always select action 9 which moves the agent down and to the right"
|
78 |
+
dedicated_9_policy.description = get_description()
|
79 |
+
return 9
|
80 |
+
|
81 |
+
|
82 |
+
def dedicated_10_policy(state, pre_action=1):
|
83 |
+
def get_description():
|
84 |
+
return "Always select action 10 which moves the agent down and to the left"
|
85 |
+
dedicated_10_policy.description = get_description()
|
86 |
+
return 10
|
87 |
+
|
88 |
+
|
89 |
+
def dedicated_11_policy(state, pre_action=1):
|
90 |
+
def get_description():
|
91 |
+
return "Always select action 11 which moves the agent up while hiting the enemy"
|
92 |
+
dedicated_11_policy.description = get_description()
|
93 |
+
return 11
|
94 |
+
|
95 |
+
|
96 |
+
def dedicated_12_policy(state, pre_action=1):
|
97 |
+
def get_description():
|
98 |
+
return "Always select action 12 which moves the agent right while hiting the enemy"
|
99 |
+
dedicated_12_policy.description = get_description()
|
100 |
+
return 12
|
101 |
+
|
102 |
+
|
103 |
+
def dedicated_13_policy(state, pre_action=1):
|
104 |
+
def get_description():
|
105 |
+
return "Always select action 13 which moves the agent left while hiting the enemy"
|
106 |
+
dedicated_13_policy.description = get_description()
|
107 |
+
return 13
|
108 |
+
|
109 |
+
|
110 |
+
def dedicated_14_policy(state, pre_action=1):
|
111 |
+
def get_description():
|
112 |
+
return "Always select action 14 which moves the agent down while hiting the enemy"
|
113 |
+
dedicated_14_policy.description = get_description()
|
114 |
+
return 14
|
115 |
+
|
116 |
+
|
117 |
+
def dedicated_15_policy(state, pre_action=1):
|
118 |
+
def get_description():
|
119 |
+
return "Always select action 15 which moves the agent up and to the right while hiting the enemy"
|
120 |
+
dedicated_15_policy.description = get_description()
|
121 |
+
return 15
|
122 |
+
|
123 |
+
|
124 |
+
def dedicated_16_policy(state, pre_action=1):
|
125 |
+
def get_description():
|
126 |
+
return "Always select action 16 which moves the agent up and to the left while hiting the enemy"
|
127 |
+
dedicated_16_policy.description = get_description()
|
128 |
+
return 16
|
129 |
+
|
130 |
+
|
131 |
+
def dedicated_17_policy(state, pre_action=1):
|
132 |
+
def get_description():
|
133 |
+
return "Always select action 17 which moves the agent down and to the right while hiting the enemy"
|
134 |
+
dedicated_17_policy.description = get_description()
|
135 |
+
return 17
|
136 |
+
|
137 |
+
|
138 |
+
def dedicated_18_policy(state, pre_action=1):
|
139 |
+
def get_description():
|
140 |
+
return "Always select action 18 which moves the agent down and to the left while hiting the enemy"
|
141 |
+
dedicated_18_policy.description = get_description()
|
142 |
+
return 18
|
envs/atari/montezumarevenge_translator.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class BasicLevelTranslator:
|
2 |
+
def __init__(self):
|
3 |
+
self.player_direction_map = {
|
4 |
+
72: "facing left",
|
5 |
+
40: "facing left, climbing down ladder/rope",
|
6 |
+
24: "facing left, climbing up ladder/rope",
|
7 |
+
128: "facing right",
|
8 |
+
32: "facing right, climbing down ladder/rope",
|
9 |
+
16: "facing right, climbing up ladder/rope",
|
10 |
+
}
|
11 |
+
|
12 |
+
def translate(self, state):
|
13 |
+
(
|
14 |
+
room_number, player_x, player_y, player_direction, enemy_skull_x, enemy_skull_y,
|
15 |
+
key_monster_x, key_monster_y, level, num_lives, items_in_inventory_count,
|
16 |
+
room_state, score_0, score_1, score_2
|
17 |
+
) = state
|
18 |
+
|
19 |
+
player_dir = self.player_direction_map.get(player_direction, "unknown direction")
|
20 |
+
picked_up_items = "None"
|
21 |
+
|
22 |
+
if items_in_inventory_count > 0:
|
23 |
+
items = [
|
24 |
+
("Key", "Opens locked doors.", 1),
|
25 |
+
("Ankh", "Freeze enemies.", 2),
|
26 |
+
("Gem", "Extra bonus points.", 4),
|
27 |
+
("Torch", "Lights up dark rooms.", 8),
|
28 |
+
("Sword", "Vanquishes certain enemies.", 16),
|
29 |
+
]
|
30 |
+
|
31 |
+
picked_up_items = ""
|
32 |
+
for name, desc, val in items:
|
33 |
+
if items_in_inventory_count & val == val:
|
34 |
+
picked_up_items += f"{name} ({desc}), "
|
35 |
+
picked_up_items = picked_up_items[:-2]
|
36 |
+
|
37 |
+
res = f"""Room Number: {room_number}
|
38 |
+
Player Position: ({player_x}, {player_y})
|
39 |
+
Player Direction: {player_dir}
|
40 |
+
Enemy Skull Position: ({enemy_skull_x}, {enemy_skull_y})
|
41 |
+
Key Monster Position: ({key_monster_x}, {key_monster_y})
|
42 |
+
Level: {level}
|
43 |
+
Remaining Lives: {num_lives}
|
44 |
+
Items in Inventory: {picked_up_items if picked_up_items else "None"}
|
45 |
+
Room State (Mapped Based on Room Number): {room_state}
|
46 |
+
Current Score: {score_0}{score_1}{score_2}\n"""
|
47 |
+
return res
|
48 |
+
|
49 |
+
|
50 |
+
class GameDescriber:
|
51 |
+
def __init__(self, args):
|
52 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
53 |
+
self.max_episode_len = args.max_episode_len
|
54 |
+
self.action_desc_dict = {
|
55 |
+
}
|
56 |
+
self.reward_desc_dict = {
|
57 |
+
}
|
58 |
+
|
59 |
+
def describe_goal(self):
|
60 |
+
return ("The goal is to guide PANAMA JOE safely to reach Montezuma's fantastic treasure. "
|
61 |
+
"Avoid danger, collect special tools and rewards, and navigate through the chambers of the emperor's fortress.")
|
62 |
+
|
63 |
+
def describe_game(self):
|
64 |
+
return ("""In Montezuma's Revenge, you control a fearless adventurer named PANAMA JOE who aims to navigate through a maze
|
65 |
+
of death-dealing chambers within Emperor Montezuma's fortress. PANAMA JOE can walk, climb, and jump in the game. In each room of the
|
66 |
+
maze, there are several dangers, including various creatures such as skulls, snakes, spiders, and bats, as well as several deadly room
|
67 |
+
fixtures like fire pits, conveyor belts, disappearing floors, laser gates, floor spikes, and laser walls.
|
68 |
+
|
69 |
+
PANAMA JOE can act on several elements within the game environment. Some items in the game are:
|
70 |
+
1. Keys: Essential to open locked doors, allowing access to other rooms and deeper exploration.
|
71 |
+
2. Ankhs: Freeze all Killer Creatures in the room for 6.5 seconds, during which they can't move or kill.
|
72 |
+
3. Gems: Extra bonus points when collected.
|
73 |
+
4. Torches: Light up dark rooms, making it easier to navigate through threats.
|
74 |
+
5. Swords: Used to defeat certain enemies, by contact with the tip of the sword.
|
75 |
+
|
76 |
+
The game's ultimate goal is to reach the fabulous Treasure Room containing Montezuma's treasure while amassing as many points as
|
77 |
+
possible and keeping PANAMA JOE alive through the challenges. The game ends when you lose all of your PANAMA JOEs, with a maximum
|
78 |
+
of 6 lives.""")
|
79 |
+
|
80 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
81 |
+
return ""
|
82 |
+
|
83 |
+
def translate_potential_next_state(self, state, action):
|
84 |
+
return ""
|
85 |
+
|
86 |
+
def describe_action(self):
|
87 |
+
actions = {
|
88 |
+
1: "No Operation",
|
89 |
+
2: "Move Right",
|
90 |
+
3: "Move Left",
|
91 |
+
4: "Move Down",
|
92 |
+
5: "Move Up",
|
93 |
+
6: "Move Right + Climb Down",
|
94 |
+
7: "Move Left + Climb Down",
|
95 |
+
8: "Move Right + Climb Up",
|
96 |
+
9: "Move Left + Climb Up",
|
97 |
+
10: "Jump",
|
98 |
+
11: "Jump Right",
|
99 |
+
12: "Jump Left",
|
100 |
+
13: "Jump Down",
|
101 |
+
14: "Jump Up",
|
102 |
+
15: "Jump Right + Climb Down",
|
103 |
+
16: "Jump Left + Climb Down",
|
104 |
+
17: "Jump Right + Climb Up",
|
105 |
+
18: "Jump Left + Climb Up",
|
106 |
+
}
|
107 |
+
|
108 |
+
description = "Your Next Move:\n"
|
109 |
+
for action_number, action_name in actions.items():
|
110 |
+
description += f"{action_number}: {action_name}\n"
|
111 |
+
|
112 |
+
description += "Please choose an action from the list above."
|
113 |
+
return description
|
114 |
+
|
115 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
116 |
+
def __init__(self):
|
117 |
+
super().__init__()
|
118 |
+
|
119 |
+
def translate(self, infos, is_current=False):
|
120 |
+
descriptions = []
|
121 |
+
if is_current:
|
122 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
123 |
+
return state_desc
|
124 |
+
for i, info in enumerate(infos):
|
125 |
+
assert 'state' in info, "info should contain state information"
|
126 |
+
|
127 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
128 |
+
action_desc = f'Take Action: {["No Operation", "Move Right", "Move Left", "Move Down", "Move Up", "Move Right + Climb Down", "Move Left + Climb Down", "Move Right + Climb Up", "Move Left + Climb Up", "Jump","Jump Right", "Jump Left", "Jump Down", "Jump Up", "Jump Right + Climb Down", "Jump Left + Climb Down", "Jump Right + Climb Up", "Jump Left + Climb Up"][info["action"]]} ({info["action"]}).'
|
129 |
+
reward_desc = f"Result: Reward of {info['reward']}"
|
130 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
131 |
+
descriptions.append(f"{state_desc}\n"
|
132 |
+
f"{action_desc}\n"
|
133 |
+
f"{reward_desc}\n"
|
134 |
+
f"Transit to {next_state_desc}\n")
|
135 |
+
|
136 |
+
return descriptions
|
envs/atari/mspacman_policies.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def dedicated_1_policy(state, pre_action=1):
|
5 |
+
def get_description():
|
6 |
+
return "Always select action 1 which does NOOP (no operation)"
|
7 |
+
|
8 |
+
dedicated_1_policy.description = get_description()
|
9 |
+
return 1
|
10 |
+
|
11 |
+
|
12 |
+
def dedicated_2_policy(state, pre_action=1):
|
13 |
+
def get_description():
|
14 |
+
return "Always select action 2 which hits the enemy"
|
15 |
+
|
16 |
+
dedicated_1_policy.description = get_description()
|
17 |
+
return 2
|
18 |
+
|
19 |
+
|
20 |
+
def dedicated_3_policy(state, pre_action=1):
|
21 |
+
def get_description():
|
22 |
+
return "Always select action 3 which moves the agent up"
|
23 |
+
|
24 |
+
dedicated_3_policy.description = get_description()
|
25 |
+
return 3
|
26 |
+
|
27 |
+
|
28 |
+
def dedicated_4_policy(state, pre_action=1):
|
29 |
+
def get_description():
|
30 |
+
return "Always select action 4 which moves the agent right"
|
31 |
+
|
32 |
+
dedicated_4_policy.description = get_description()
|
33 |
+
return 4
|
34 |
+
|
35 |
+
|
36 |
+
def dedicated_5_policy(state, pre_action=1):
|
37 |
+
def get_description():
|
38 |
+
return "Always select action 5 which moves the agent left"
|
39 |
+
|
40 |
+
dedicated_5_policy.description = get_description()
|
41 |
+
return 5
|
42 |
+
|
43 |
+
|
44 |
+
def pseudo_random_policy(state, pre_action):
|
45 |
+
def get_description():
|
46 |
+
return "Select an action among 1 to 9 alternatively"
|
47 |
+
pseudo_random_policy.description = get_description()
|
48 |
+
return pre_action % 9 + 1
|
49 |
+
|
50 |
+
|
51 |
+
def real_random_policy(state, pre_action=1):
|
52 |
+
def get_description():
|
53 |
+
return "Select action with a random policy"
|
54 |
+
real_random_policy.description = get_description()
|
55 |
+
return np.random.choice(range(0, 9)) + 1
|
56 |
+
|
57 |
+
|
58 |
+
# Complete set of dedicated action policies
|
59 |
+
def dedicated_6_policy(state, pre_action=1):
|
60 |
+
def get_description():
|
61 |
+
return "Always select action 6 which moves the agent down"
|
62 |
+
|
63 |
+
dedicated_6_policy.description = get_description()
|
64 |
+
return 6
|
65 |
+
|
66 |
+
|
67 |
+
def dedicated_7_policy(state, pre_action=1):
|
68 |
+
def get_description():
|
69 |
+
return "Always select action 7 which moves the agent up and to the right"
|
70 |
+
|
71 |
+
dedicated_7_policy.description = get_description()
|
72 |
+
return 7
|
73 |
+
|
74 |
+
|
75 |
+
def dedicated_8_policy(state, pre_action=1):
|
76 |
+
def get_description():
|
77 |
+
return "Always select action 8 which moves the agent up and to the left"
|
78 |
+
|
79 |
+
dedicated_8_policy.description = get_description()
|
80 |
+
return 8
|
81 |
+
|
82 |
+
|
83 |
+
def dedicated_9_policy(state, pre_action=1):
|
84 |
+
def get_description():
|
85 |
+
return "Always select action 9 which moves the agent down and to the right"
|
86 |
+
|
87 |
+
dedicated_9_policy.description = get_description()
|
88 |
+
return 9
|
envs/atari/mspacman_translator.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class BasicLevelTranslator:
|
2 |
+
def __init__(self):
|
3 |
+
pass
|
4 |
+
|
5 |
+
def translate(self, state):
|
6 |
+
x, y = state[8], state[9]
|
7 |
+
ghosts = [(state[0], state[4]), (state[1], state[5]), (state[2], state[6]), (state[3], state[7])]
|
8 |
+
ghost_directions = ["UP", "RIGHT", "LEFT", "DOWN"]
|
9 |
+
|
10 |
+
direction = ghost_directions[int(state[13])]
|
11 |
+
eaten_dots = state[14]
|
12 |
+
score = state[15]
|
13 |
+
lives = state[16]
|
14 |
+
ghosts_count = state[12]
|
15 |
+
|
16 |
+
fruit_x, fruit_y = state[10], state[11]
|
17 |
+
fruit_present = fruit_x != 0 or fruit_y != 0
|
18 |
+
|
19 |
+
player_state = f"Ms. Pac-Man is at position ({x}, {y}), facing {direction} with {lives} lives left. {eaten_dots} dots have been eaten so far and the current score is {score}. The game has {ghosts_count} ghosts."
|
20 |
+
|
21 |
+
ghost_states = []
|
22 |
+
for i, (gx, gy) in enumerate(ghosts):
|
23 |
+
ghost_name = ["Sue", "Inky", "Pinky", "Blinky"][i]
|
24 |
+
ghost_states.append(f"{ghost_name} the ghost is at position ({gx}, {gy})")
|
25 |
+
ghost_state_str = " ".join(ghost_states)
|
26 |
+
|
27 |
+
fruit_state = f"A fruit is present at position ({fruit_x}, {fruit_y})" if fruit_present else "No fruit is currently present on the screen."
|
28 |
+
|
29 |
+
result = f"{player_state} {fruit_state} {ghost_state_str}"
|
30 |
+
return result
|
31 |
+
|
32 |
+
|
33 |
+
class GameDescriber:
|
34 |
+
def __init__(self, args):
|
35 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
36 |
+
self.max_episode_len = args.max_episode_len
|
37 |
+
self.action_desc_dict = {
|
38 |
+
}
|
39 |
+
self.reward_desc_dict = {
|
40 |
+
}
|
41 |
+
|
42 |
+
def describe_goal(self):
|
43 |
+
return "The goal of Ms. Pac-Man is to score as many points as possible while avoiding the ghosts."
|
44 |
+
|
45 |
+
def describe_game(self):
|
46 |
+
return "In the Ms. Pac-Man game, you control Ms. Pac-Man, moving around a maze, eating dots to score points. "\
|
47 |
+
"There are also special bonus items, such as fruit and pretzels, that appear for a limited time and award "\
|
48 |
+
"extra points. Ghosts chase Ms. Pac-Man around the maze, but she can eat an energy pill to temporarily "\
|
49 |
+
"turn the ghosts vulnerable and eat them for extra points. The game ends when you lose all your lives. "\
|
50 |
+
"Score 10,000 points and earn a bonus life."
|
51 |
+
|
52 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
53 |
+
return ""
|
54 |
+
|
55 |
+
def translate_potential_next_state(self, state, action):
|
56 |
+
return ""
|
57 |
+
|
58 |
+
def describe_action(self):
|
59 |
+
return "Your Next Move: \n Please choose an action. Each value corresponds to a directional input as follows: "\
|
60 |
+
"1 - NOOP, 2 - UP, 3 - RIGHT, 4 - LEFT, 5 - DOWN, 6 - UPRIGHT, 7 - UPLEFT, 8 - DOWNRIGHT, 9 - DOWNLEFT. "\
|
61 |
+
"Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6, 7, 8, 9]."
|
62 |
+
|
63 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
64 |
+
def __init__(self):
|
65 |
+
super().__init__()
|
66 |
+
|
67 |
+
def translate(self, infos, is_current=False):
|
68 |
+
descriptions = []
|
69 |
+
if is_current:
|
70 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
71 |
+
return state_desc
|
72 |
+
for i, info in enumerate(infos):
|
73 |
+
assert 'state' in info, "info should contain state information"
|
74 |
+
|
75 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
76 |
+
action_desc = f"Take Action: {['NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT'][info['action']]} ({info['action']})."
|
77 |
+
reward_desc = f"Result: Reward of {info['reward']}, "
|
78 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
79 |
+
descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
|
80 |
+
return descriptions
|
envs/mujoco/ant_translator.py
CHANGED
@@ -99,8 +99,7 @@ class GameDescriber:
|
|
99 |
|
100 |
def describe_action(self):
|
101 |
return (
|
102 |
-
"Your next move: \n Please
|
103 |
-
"representing the torques to be applied at the eight hinge joints of the ant."
|
104 |
)
|
105 |
|
106 |
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
|
|
99 |
|
100 |
def describe_action(self):
|
101 |
return (
|
102 |
+
"Your next move: \n Please choose your action which applies torques at the eight hinge joints of the ant. It be a list of eight numerical values and each value is within the range of [-1,1]."
|
|
|
103 |
)
|
104 |
|
105 |
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
record_reflexion.csv
CHANGED
@@ -10,6 +10,8 @@ FrozenLake-v1,1,expert,200.0
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
|
|
|
|
13 |
Ant-v4,1,expert,5000.2
|
14 |
HalfCheetah-v4,1,expert,12138.8
|
15 |
Hopper-v4,1,expert,3542.2
|
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
13 |
+
RepresentedMsPacman-v0,1,expert,10000.0
|
14 |
+
RepresentedMontezumaRevenge-v0,1,expert,10000.0
|
15 |
Ant-v4,1,expert,5000.2
|
16 |
HalfCheetah-v4,1,expert,12138.8
|
17 |
Hopper-v4,1,expert,3542.2
|
shell/test_atari.sh
CHANGED
@@ -1,2 +1,6 @@
|
|
1 |
python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
2 |
-
python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
|
|
|
|
|
|
|
|
|
1 |
python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
2 |
+
python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
3 |
+
|
4 |
+
python main_reflexion.py --env_name RepresentedMsPacman-v0 --init_summarizer RepresentedMsPacman_init_translator --curr_summarizer RepresentedMsPacman_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name RepresentedMontezumaRevenge-v0 --init_summarizer RepresentedMontezumaRevenge_init_translator --curr_summarizer RepresentedMontezumaRevenge_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
|