Jarvis-K commited on
Commit
50deaa3
·
2 Parent(s): 6299d2b 841d805

resolve conflict for merge

Browse files
.gitignore CHANGED
@@ -185,4 +185,5 @@ main_test*.sh
185
  main_jarvis.sh
186
  test*.py
187
  *.zip
188
- test_
 
 
185
  main_jarvis.sh
186
  test*.py
187
  *.zip
188
+ test_
189
+ *.ipynb
README.md CHANGED
@@ -86,6 +86,36 @@ pip install cython==0.29.37
86
  3. install gym[mujoco]
87
  `pip install gym[mujoco]`
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  ### support new env
90
  We also support other new env using Gym format, for new env you need to
91
  1. Translate your Gym env to TextGym env, make `<your_env>_translator.py, <your_env>policies.py`, put them into `./envs/`, and add your env in `./envs/__init__.py`.
 
86
  3. install gym[mujoco]
87
  `pip install gym[mujoco]`
88
 
89
+ ### Import Atari ROMs
90
+
91
+ If you encounter the error `Unable to find game "[env_name]"` when running a script for Atari environments, it may be due to the absence of Atari ROMs in the `atari_py` package since version 0.2.7. To resolve this issue, you can manually download the ROMs and add them to Gym's registry.
92
+
93
+ ``` shell
94
+ pip install gym[accept-rom-license]
95
+ AutoROM --accept-license
96
+ ```
97
+
98
+ Test with the following code
99
+
100
+ ```python
101
+ import gym
102
+ from atariari.benchmark.wrapper import AtariARIWrapper
103
+
104
+ # Initialize the environment
105
+ env = AtariARIWrapper(gym.make("MsPacmanNoFrameskip-v4"))
106
+ obs = env.reset()
107
+
108
+ # Perform a single step in the environment
109
+ obs, reward, done, info = env.step(1)
110
+
111
+ # Check the information provided by the environment (including labels and scores)
112
+ print(info["labels"])
113
+ ```
114
+
115
+ If everything runs smoothly, you have successfully imported the Atari ROMs and set up your environment.
116
+
117
+ Reference: [StackOverflow answer](https://stackoverflow.com/a/68143504/38626)
118
+
119
  ### support new env
120
  We also support other new env using Gym format, for new env you need to
121
  1. Translate your Gym env to TextGym env, make `<your_env>_translator.py, <your_env>policies.py`, put them into `./envs/`, and add your env in `./envs/__init__.py`.
deciders/act.py CHANGED
@@ -11,7 +11,7 @@ from memory.env_history import EnvironmentHistory
11
  import tiktoken
12
  import json
13
  import re
14
- from .utils import run_chain
15
  from gym.spaces import Discrete
16
 
17
  class RandomAct():
@@ -156,12 +156,15 @@ class NaiveAct(gpt):
156
  prompt = f"{game_description}\n{goal_description}\n{fewshot_examples}\nCurrent {state_description}\n{action_description} "
157
  prompt += "Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Your Action is: "
158
  print(f"prompt is {prompt}")
159
- res = openai.Completion.create(
160
- engine=self.args.gpt_version,
161
- prompt=prompt,
162
- temperature=self.temperature,
163
- max_tokens=self.max_tokens,
164
- )
 
 
 
165
  return prompt, res
166
 
167
  def _add_history_before_action(self, game_description, goal_description, state_description):
@@ -210,8 +213,8 @@ class NaiveAct(gpt):
210
  my_mem += f"{self.env_history.get_histories(self.mem_num)}"
211
 
212
 
213
- prompt, res = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
214
- action_str = res.choices[0].text.strip()
215
  print(f'my anwser is {action_str}')
216
  action = self.parser.parse(response).action
217
  self._add_history_after_action(action)
@@ -219,7 +222,7 @@ class NaiveAct(gpt):
219
  self.logger.info(f'The optimal action is: {action}.')
220
  if env_info.get('history'):
221
  self.logger.info(f'History: {history_to_str(env_info["history"])}')
222
- return action, prompt, res, 0, 0
223
 
224
  def _read_mem(self, ):
225
  memory = self.memory
 
11
  import tiktoken
12
  import json
13
  import re
14
+ from .utils import run_chain, get_completion, get_chat
15
  from gym.spaces import Discrete
16
 
17
  class RandomAct():
 
156
  prompt = f"{game_description}\n{goal_description}\n{fewshot_examples}\nCurrent {state_description}\n{action_description} "
157
  prompt += "Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Your Action is: "
158
  print(f"prompt is {prompt}")
159
+ # res = get_chat(prompt, self.args.api_type, self.args.gpt_version, self.temperature, self.max_tokens)
160
+ res = get_chat(prompt, api_type=self.args.api_type, model=self.args.gpt_version, engine=self.args.gpt_version, temperature=self.temperature, max_tokens=self.max_tokens)
161
+ # openai.ChatCompletion.create(
162
+ # engine=self.args.gpt_version,
163
+ # # model=self.args.gpt_version,
164
+ # prompt=prompt,
165
+ # temperature=self.temperature,
166
+ # max_tokens=self.max_tokens,
167
+ # )
168
  return prompt, res
169
 
170
  def _add_history_before_action(self, game_description, goal_description, state_description):
 
213
  my_mem += f"{self.env_history.get_histories(self.mem_num)}"
214
 
215
 
216
+ prompt, response = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
217
+ action_str = response
218
  print(f'my anwser is {action_str}')
219
  action = self.parser.parse(response).action
220
  self._add_history_after_action(action)
 
222
  self.logger.info(f'The optimal action is: {action}.')
223
  if env_info.get('history'):
224
  self.logger.info(f'History: {history_to_str(env_info["history"])}')
225
+ return action, prompt, response, 0, 0
226
 
227
  def _read_mem(self, ):
228
  memory = self.memory
deciders/utils.py CHANGED
@@ -54,6 +54,7 @@ def get_completion(prompt: str, api_type: str = "azure", engine: str = "gpt-35-t
54
  temperature=temperature,
55
  # request_timeout = 1
56
  )
 
57
  return response.choices[0]["message"]["content"]
58
 
59
  # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 
54
  temperature=temperature,
55
  # request_timeout = 1
56
  )
57
+ import pdb; pdb.set_trace()
58
  return response.choices[0]["message"]["content"]
59
 
60
  # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
envs/__init__.py CHANGED
@@ -14,6 +14,8 @@ from .toy_text import frozenlake_translator, frozenlake_policies
14
 
15
  from .atari import register_environments
16
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
 
 
17
  register_environments()
18
 
19
 
@@ -95,6 +97,88 @@ REGISTRY["RepresentedPong_basic_policies"] = [
95
  Pong_policies.dedicated_6_policy,
96
  ]
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ## For mujoco env
99
 
100
 
 
14
 
15
  from .atari import register_environments
16
  from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
17
+ from .atari import mspacman_policies, mspacman_translator
18
+ from .atari import montezumarevenge_policies, montezumarevenge_translator
19
  register_environments()
20
 
21
 
 
97
  Pong_policies.dedicated_6_policy,
98
  ]
99
 
100
+ REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
101
+ REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
102
+ REGISTRY["RepresentedMsPacman_basic_policies"] = [
103
+ mspacman_policies.real_random_policy,
104
+ mspacman_policies.pseudo_random_policy,
105
+ mspacman_policies.dedicated_1_policy,
106
+ mspacman_policies.dedicated_2_policy,
107
+ mspacman_policies.dedicated_3_policy,
108
+ mspacman_policies.dedicated_4_policy,
109
+ mspacman_policies.dedicated_5_policy,
110
+ mspacman_policies.dedicated_6_policy,
111
+ mspacman_policies.dedicated_7_policy,
112
+ mspacman_policies.dedicated_8_policy,
113
+ mspacman_policies.dedicated_9_policy,
114
+ ]
115
+
116
+ REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
117
+ REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
118
+ REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
119
+ montezumarevenge_policies.real_random_policy,
120
+ montezumarevenge_policies.pseudo_random_policy,
121
+ montezumarevenge_policies.dedicated_1_policy,
122
+ montezumarevenge_policies.dedicated_2_policy,
123
+ montezumarevenge_policies.dedicated_3_policy,
124
+ montezumarevenge_policies.dedicated_4_policy,
125
+ montezumarevenge_policies.dedicated_5_policy,
126
+ montezumarevenge_policies.dedicated_6_policy,
127
+ montezumarevenge_policies.dedicated_7_policy,
128
+ montezumarevenge_policies.dedicated_8_policy,
129
+ montezumarevenge_policies.dedicated_9_policy,
130
+ montezumarevenge_policies.dedicated_10_policy,
131
+ montezumarevenge_policies.dedicated_11_policy,
132
+ montezumarevenge_policies.dedicated_12_policy,
133
+ montezumarevenge_policies.dedicated_13_policy,
134
+ montezumarevenge_policies.dedicated_14_policy,
135
+ montezumarevenge_policies.dedicated_15_policy,
136
+ montezumarevenge_policies.dedicated_16_policy,
137
+ montezumarevenge_policies.dedicated_17_policy,
138
+ montezumarevenge_policies.dedicated_18_policy,
139
+ ]
140
+
141
+ REGISTRY["RepresentedMsPacman_init_translator"] = mspacman_translator.GameDescriber
142
+ REGISTRY["RepresentedMsPacman_basic_translator"] = mspacman_translator.BasicStateSequenceTranslator
143
+ REGISTRY["RepresentedMsPacman_basic_policies"] = [
144
+ mspacman_policies.real_random_policy,
145
+ mspacman_policies.pseudo_random_policy,
146
+ mspacman_policies.dedicated_1_policy,
147
+ mspacman_policies.dedicated_2_policy,
148
+ mspacman_policies.dedicated_3_policy,
149
+ mspacman_policies.dedicated_4_policy,
150
+ mspacman_policies.dedicated_5_policy,
151
+ mspacman_policies.dedicated_6_policy,
152
+ mspacman_policies.dedicated_7_policy,
153
+ mspacman_policies.dedicated_8_policy,
154
+ mspacman_policies.dedicated_9_policy,
155
+ ]
156
+
157
+ REGISTRY["RepresentedMontezumaRevenge_init_translator"] = montezumarevenge_translator.GameDescriber
158
+ REGISTRY["RepresentedMontezumaRevenge_basic_translator"] = montezumarevenge_translator.BasicStateSequenceTranslator
159
+ REGISTRY["RepresentedMontezumaRevenge_basic_policies"] = [
160
+ montezumarevenge_policies.real_random_policy,
161
+ montezumarevenge_policies.pseudo_random_policy,
162
+ montezumarevenge_policies.dedicated_1_policy,
163
+ montezumarevenge_policies.dedicated_2_policy,
164
+ montezumarevenge_policies.dedicated_3_policy,
165
+ montezumarevenge_policies.dedicated_4_policy,
166
+ montezumarevenge_policies.dedicated_5_policy,
167
+ montezumarevenge_policies.dedicated_6_policy,
168
+ montezumarevenge_policies.dedicated_7_policy,
169
+ montezumarevenge_policies.dedicated_8_policy,
170
+ montezumarevenge_policies.dedicated_9_policy,
171
+ montezumarevenge_policies.dedicated_10_policy,
172
+ montezumarevenge_policies.dedicated_11_policy,
173
+ montezumarevenge_policies.dedicated_12_policy,
174
+ montezumarevenge_policies.dedicated_13_policy,
175
+ montezumarevenge_policies.dedicated_14_policy,
176
+ montezumarevenge_policies.dedicated_15_policy,
177
+ montezumarevenge_policies.dedicated_16_policy,
178
+ montezumarevenge_policies.dedicated_17_policy,
179
+ montezumarevenge_policies.dedicated_18_policy,
180
+ ]
181
+
182
  ## For mujoco env
183
 
184
 
envs/atari/montezumarevenge_policies.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def dedicated_1_policy(state, pre_action=1):
5
+ def get_description():
6
+ return "Always select action 1 which does NOOP (no operation)"
7
+ dedicated_1_policy.description = get_description()
8
+ return 1
9
+
10
+
11
+ def dedicated_2_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Always select action 2 which hits the enemy"
14
+ dedicated_1_policy.description = get_description()
15
+ return 2
16
+
17
+
18
+ def dedicated_3_policy(state, pre_action=1):
19
+ def get_description():
20
+ return "Always select action 3 which moves the agent up"
21
+ dedicated_3_policy.description = get_description()
22
+ return 3
23
+
24
+
25
+ def dedicated_4_policy(state, pre_action=1):
26
+ def get_description():
27
+ return "Always select action 4 which moves the agent right"
28
+ dedicated_4_policy.description = get_description()
29
+ return 4
30
+
31
+
32
+ def dedicated_5_policy(state, pre_action=1):
33
+ def get_description():
34
+ return "Always select action 5 which moves the agent left"
35
+ dedicated_5_policy.description = get_description()
36
+ return 5
37
+
38
+
39
+ def pseudo_random_policy(state, pre_action):
40
+ def get_description():
41
+ return "Select an action among 1 to 18 alternatively"
42
+ pseudo_random_policy.description = get_description()
43
+ return pre_action % 18 + 1
44
+
45
+
46
+ def real_random_policy(state, pre_action=1):
47
+ def get_description():
48
+ return "Select action with a random policy"
49
+ real_random_policy.description = get_description()
50
+ return np.random.choice(range(0, 18)) + 1
51
+
52
+
53
+ # Complete set of dedicated action policies
54
+ def dedicated_6_policy(state, pre_action=1):
55
+ def get_description():
56
+ return "Always select action 6 which moves the agent down"
57
+ dedicated_6_policy.description = get_description()
58
+ return 6
59
+
60
+
61
+ def dedicated_7_policy(state, pre_action=1):
62
+ def get_description():
63
+ return "Always select action 7 which moves the agent up and to the right"
64
+ dedicated_7_policy.description = get_description()
65
+ return 7
66
+
67
+
68
+ def dedicated_8_policy(state, pre_action=1):
69
+ def get_description():
70
+ return "Always select action 8 which moves the agent up and to the left"
71
+ dedicated_8_policy.description = get_description()
72
+ return 8
73
+
74
+
75
+ def dedicated_9_policy(state, pre_action=1):
76
+ def get_description():
77
+ return "Always select action 9 which moves the agent down and to the right"
78
+ dedicated_9_policy.description = get_description()
79
+ return 9
80
+
81
+
82
+ def dedicated_10_policy(state, pre_action=1):
83
+ def get_description():
84
+ return "Always select action 10 which moves the agent down and to the left"
85
+ dedicated_10_policy.description = get_description()
86
+ return 10
87
+
88
+
89
+ def dedicated_11_policy(state, pre_action=1):
90
+ def get_description():
91
+ return "Always select action 11 which moves the agent up while hiting the enemy"
92
+ dedicated_11_policy.description = get_description()
93
+ return 11
94
+
95
+
96
+ def dedicated_12_policy(state, pre_action=1):
97
+ def get_description():
98
+ return "Always select action 12 which moves the agent right while hiting the enemy"
99
+ dedicated_12_policy.description = get_description()
100
+ return 12
101
+
102
+
103
+ def dedicated_13_policy(state, pre_action=1):
104
+ def get_description():
105
+ return "Always select action 13 which moves the agent left while hiting the enemy"
106
+ dedicated_13_policy.description = get_description()
107
+ return 13
108
+
109
+
110
+ def dedicated_14_policy(state, pre_action=1):
111
+ def get_description():
112
+ return "Always select action 14 which moves the agent down while hiting the enemy"
113
+ dedicated_14_policy.description = get_description()
114
+ return 14
115
+
116
+
117
+ def dedicated_15_policy(state, pre_action=1):
118
+ def get_description():
119
+ return "Always select action 15 which moves the agent up and to the right while hiting the enemy"
120
+ dedicated_15_policy.description = get_description()
121
+ return 15
122
+
123
+
124
+ def dedicated_16_policy(state, pre_action=1):
125
+ def get_description():
126
+ return "Always select action 16 which moves the agent up and to the left while hiting the enemy"
127
+ dedicated_16_policy.description = get_description()
128
+ return 16
129
+
130
+
131
+ def dedicated_17_policy(state, pre_action=1):
132
+ def get_description():
133
+ return "Always select action 17 which moves the agent down and to the right while hiting the enemy"
134
+ dedicated_17_policy.description = get_description()
135
+ return 17
136
+
137
+
138
+ def dedicated_18_policy(state, pre_action=1):
139
+ def get_description():
140
+ return "Always select action 18 which moves the agent down and to the left while hiting the enemy"
141
+ dedicated_18_policy.description = get_description()
142
+ return 18
envs/atari/montezumarevenge_translator.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BasicLevelTranslator:
2
+ def __init__(self):
3
+ self.player_direction_map = {
4
+ 72: "facing left",
5
+ 40: "facing left, climbing down ladder/rope",
6
+ 24: "facing left, climbing up ladder/rope",
7
+ 128: "facing right",
8
+ 32: "facing right, climbing down ladder/rope",
9
+ 16: "facing right, climbing up ladder/rope",
10
+ }
11
+
12
+ def translate(self, state):
13
+ (
14
+ room_number, player_x, player_y, player_direction, enemy_skull_x, enemy_skull_y,
15
+ key_monster_x, key_monster_y, level, num_lives, items_in_inventory_count,
16
+ room_state, score_0, score_1, score_2
17
+ ) = state
18
+
19
+ player_dir = self.player_direction_map.get(player_direction, "unknown direction")
20
+ picked_up_items = "None"
21
+
22
+ if items_in_inventory_count > 0:
23
+ items = [
24
+ ("Key", "Opens locked doors.", 1),
25
+ ("Ankh", "Freeze enemies.", 2),
26
+ ("Gem", "Extra bonus points.", 4),
27
+ ("Torch", "Lights up dark rooms.", 8),
28
+ ("Sword", "Vanquishes certain enemies.", 16),
29
+ ]
30
+
31
+ picked_up_items = ""
32
+ for name, desc, val in items:
33
+ if items_in_inventory_count & val == val:
34
+ picked_up_items += f"{name} ({desc}), "
35
+ picked_up_items = picked_up_items[:-2]
36
+
37
+ res = f"""Room Number: {room_number}
38
+ Player Position: ({player_x}, {player_y})
39
+ Player Direction: {player_dir}
40
+ Enemy Skull Position: ({enemy_skull_x}, {enemy_skull_y})
41
+ Key Monster Position: ({key_monster_x}, {key_monster_y})
42
+ Level: {level}
43
+ Remaining Lives: {num_lives}
44
+ Items in Inventory: {picked_up_items if picked_up_items else "None"}
45
+ Room State (Mapped Based on Room Number): {room_state}
46
+ Current Score: {score_0}{score_1}{score_2}\n"""
47
+ return res
48
+
49
+
50
+ class GameDescriber:
51
+ def __init__(self, args):
52
+ self.is_only_local_obs = args.is_only_local_obs == 1
53
+ self.max_episode_len = args.max_episode_len
54
+ self.action_desc_dict = {
55
+ }
56
+ self.reward_desc_dict = {
57
+ }
58
+
59
+ def describe_goal(self):
60
+ return ("The goal is to guide PANAMA JOE safely to reach Montezuma's fantastic treasure. "
61
+ "Avoid danger, collect special tools and rewards, and navigate through the chambers of the emperor's fortress.")
62
+
63
+ def describe_game(self):
64
+ return ("""In Montezuma's Revenge, you control a fearless adventurer named PANAMA JOE who aims to navigate through a maze
65
+ of death-dealing chambers within Emperor Montezuma's fortress. PANAMA JOE can walk, climb, and jump in the game. In each room of the
66
+ maze, there are several dangers, including various creatures such as skulls, snakes, spiders, and bats, as well as several deadly room
67
+ fixtures like fire pits, conveyor belts, disappearing floors, laser gates, floor spikes, and laser walls.
68
+
69
+ PANAMA JOE can act on several elements within the game environment. Some items in the game are:
70
+ 1. Keys: Essential to open locked doors, allowing access to other rooms and deeper exploration.
71
+ 2. Ankhs: Freeze all Killer Creatures in the room for 6.5 seconds, during which they can't move or kill.
72
+ 3. Gems: Extra bonus points when collected.
73
+ 4. Torches: Light up dark rooms, making it easier to navigate through threats.
74
+ 5. Swords: Used to defeat certain enemies, by contact with the tip of the sword.
75
+
76
+ The game's ultimate goal is to reach the fabulous Treasure Room containing Montezuma's treasure while amassing as many points as
77
+ possible and keeping PANAMA JOE alive through the challenges. The game ends when you lose all of your PANAMA JOEs, with a maximum
78
+ of 6 lives.""")
79
+
80
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
81
+ return ""
82
+
83
+ def translate_potential_next_state(self, state, action):
84
+ return ""
85
+
86
+ def describe_action(self):
87
+ actions = {
88
+ 1: "No Operation",
89
+ 2: "Move Right",
90
+ 3: "Move Left",
91
+ 4: "Move Down",
92
+ 5: "Move Up",
93
+ 6: "Move Right + Climb Down",
94
+ 7: "Move Left + Climb Down",
95
+ 8: "Move Right + Climb Up",
96
+ 9: "Move Left + Climb Up",
97
+ 10: "Jump",
98
+ 11: "Jump Right",
99
+ 12: "Jump Left",
100
+ 13: "Jump Down",
101
+ 14: "Jump Up",
102
+ 15: "Jump Right + Climb Down",
103
+ 16: "Jump Left + Climb Down",
104
+ 17: "Jump Right + Climb Up",
105
+ 18: "Jump Left + Climb Up",
106
+ }
107
+
108
+ description = "Your Next Move:\n"
109
+ for action_number, action_name in actions.items():
110
+ description += f"{action_number}: {action_name}\n"
111
+
112
+ description += "Please choose an action from the list above."
113
+ return description
114
+
115
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
116
+ def __init__(self):
117
+ super().__init__()
118
+
119
+ def translate(self, infos, is_current=False):
120
+ descriptions = []
121
+ if is_current:
122
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
123
+ return state_desc
124
+ for i, info in enumerate(infos):
125
+ assert 'state' in info, "info should contain state information"
126
+
127
+ state_desc = BasicLevelTranslator().translate(info['state'])
128
+ action_desc = f'Take Action: {["No Operation", "Move Right", "Move Left", "Move Down", "Move Up", "Move Right + Climb Down", "Move Left + Climb Down", "Move Right + Climb Up", "Move Left + Climb Up", "Jump","Jump Right", "Jump Left", "Jump Down", "Jump Up", "Jump Right + Climb Down", "Jump Left + Climb Down", "Jump Right + Climb Up", "Jump Left + Climb Up"][info["action"]]} ({info["action"]}).'
129
+ reward_desc = f"Result: Reward of {info['reward']}"
130
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
131
+ descriptions.append(f"{state_desc}\n"
132
+ f"{action_desc}\n"
133
+ f"{reward_desc}\n"
134
+ f"Transit to {next_state_desc}\n")
135
+
136
+ return descriptions
envs/atari/mspacman_policies.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def dedicated_1_policy(state, pre_action=1):
5
+ def get_description():
6
+ return "Always select action 1 which does NOOP (no operation)"
7
+
8
+ dedicated_1_policy.description = get_description()
9
+ return 1
10
+
11
+
12
+ def dedicated_2_policy(state, pre_action=1):
13
+ def get_description():
14
+ return "Always select action 2 which hits the enemy"
15
+
16
+ dedicated_1_policy.description = get_description()
17
+ return 2
18
+
19
+
20
+ def dedicated_3_policy(state, pre_action=1):
21
+ def get_description():
22
+ return "Always select action 3 which moves the agent up"
23
+
24
+ dedicated_3_policy.description = get_description()
25
+ return 3
26
+
27
+
28
+ def dedicated_4_policy(state, pre_action=1):
29
+ def get_description():
30
+ return "Always select action 4 which moves the agent right"
31
+
32
+ dedicated_4_policy.description = get_description()
33
+ return 4
34
+
35
+
36
+ def dedicated_5_policy(state, pre_action=1):
37
+ def get_description():
38
+ return "Always select action 5 which moves the agent left"
39
+
40
+ dedicated_5_policy.description = get_description()
41
+ return 5
42
+
43
+
44
+ def pseudo_random_policy(state, pre_action):
45
+ def get_description():
46
+ return "Select an action among 1 to 9 alternatively"
47
+ pseudo_random_policy.description = get_description()
48
+ return pre_action % 9 + 1
49
+
50
+
51
+ def real_random_policy(state, pre_action=1):
52
+ def get_description():
53
+ return "Select action with a random policy"
54
+ real_random_policy.description = get_description()
55
+ return np.random.choice(range(0, 9)) + 1
56
+
57
+
58
+ # Complete set of dedicated action policies
59
+ def dedicated_6_policy(state, pre_action=1):
60
+ def get_description():
61
+ return "Always select action 6 which moves the agent down"
62
+
63
+ dedicated_6_policy.description = get_description()
64
+ return 6
65
+
66
+
67
+ def dedicated_7_policy(state, pre_action=1):
68
+ def get_description():
69
+ return "Always select action 7 which moves the agent up and to the right"
70
+
71
+ dedicated_7_policy.description = get_description()
72
+ return 7
73
+
74
+
75
+ def dedicated_8_policy(state, pre_action=1):
76
+ def get_description():
77
+ return "Always select action 8 which moves the agent up and to the left"
78
+
79
+ dedicated_8_policy.description = get_description()
80
+ return 8
81
+
82
+
83
+ def dedicated_9_policy(state, pre_action=1):
84
+ def get_description():
85
+ return "Always select action 9 which moves the agent down and to the right"
86
+
87
+ dedicated_9_policy.description = get_description()
88
+ return 9
envs/atari/mspacman_translator.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BasicLevelTranslator:
2
+ def __init__(self):
3
+ pass
4
+
5
+ def translate(self, state):
6
+ x, y = state[8], state[9]
7
+ ghosts = [(state[0], state[4]), (state[1], state[5]), (state[2], state[6]), (state[3], state[7])]
8
+ ghost_directions = ["UP", "RIGHT", "LEFT", "DOWN"]
9
+
10
+ direction = ghost_directions[int(state[13])]
11
+ eaten_dots = state[14]
12
+ score = state[15]
13
+ lives = state[16]
14
+ ghosts_count = state[12]
15
+
16
+ fruit_x, fruit_y = state[10], state[11]
17
+ fruit_present = fruit_x != 0 or fruit_y != 0
18
+
19
+ player_state = f"Ms. Pac-Man is at position ({x}, {y}), facing {direction} with {lives} lives left. {eaten_dots} dots have been eaten so far and the current score is {score}. The game has {ghosts_count} ghosts."
20
+
21
+ ghost_states = []
22
+ for i, (gx, gy) in enumerate(ghosts):
23
+ ghost_name = ["Sue", "Inky", "Pinky", "Blinky"][i]
24
+ ghost_states.append(f"{ghost_name} the ghost is at position ({gx}, {gy})")
25
+ ghost_state_str = " ".join(ghost_states)
26
+
27
+ fruit_state = f"A fruit is present at position ({fruit_x}, {fruit_y})" if fruit_present else "No fruit is currently present on the screen."
28
+
29
+ result = f"{player_state} {fruit_state} {ghost_state_str}"
30
+ return result
31
+
32
+
33
+ class GameDescriber:
34
+ def __init__(self, args):
35
+ self.is_only_local_obs = args.is_only_local_obs == 1
36
+ self.max_episode_len = args.max_episode_len
37
+ self.action_desc_dict = {
38
+ }
39
+ self.reward_desc_dict = {
40
+ }
41
+
42
+ def describe_goal(self):
43
+ return "The goal of Ms. Pac-Man is to score as many points as possible while avoiding the ghosts."
44
+
45
+ def describe_game(self):
46
+ return "In the Ms. Pac-Man game, you control Ms. Pac-Man, moving around a maze, eating dots to score points. "\
47
+ "There are also special bonus items, such as fruit and pretzels, that appear for a limited time and award "\
48
+ "extra points. Ghosts chase Ms. Pac-Man around the maze, but she can eat an energy pill to temporarily "\
49
+ "turn the ghosts vulnerable and eat them for extra points. The game ends when you lose all your lives. "\
50
+ "Score 10,000 points and earn a bonus life."
51
+
52
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
53
+ return ""
54
+
55
+ def translate_potential_next_state(self, state, action):
56
+ return ""
57
+
58
+ def describe_action(self):
59
+ return "Your Next Move: \n Please choose an action. Each value corresponds to a directional input as follows: "\
60
+ "1 - NOOP, 2 - UP, 3 - RIGHT, 4 - LEFT, 5 - DOWN, 6 - UPRIGHT, 7 - UPLEFT, 8 - DOWNRIGHT, 9 - DOWNLEFT. "\
61
+ "Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6, 7, 8, 9]."
62
+
63
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
64
+ def __init__(self):
65
+ super().__init__()
66
+
67
+ def translate(self, infos, is_current=False):
68
+ descriptions = []
69
+ if is_current:
70
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
71
+ return state_desc
72
+ for i, info in enumerate(infos):
73
+ assert 'state' in info, "info should contain state information"
74
+
75
+ state_desc = BasicLevelTranslator().translate(info['state'])
76
+ action_desc = f"Take Action: {['NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT'][info['action']]} ({info['action']})."
77
+ reward_desc = f"Result: Reward of {info['reward']}, "
78
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
79
+ descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
80
+ return descriptions
envs/mujoco/ant_translator.py CHANGED
@@ -99,8 +99,7 @@ class GameDescriber:
99
 
100
  def describe_action(self):
101
  return (
102
- "Your next move: \n Please provide a list of eight numerical values, each within the range of [-1,1], "
103
- "representing the torques to be applied at the eight hinge joints of the ant."
104
  )
105
 
106
  class BasicStateSequenceTranslator(BasicLevelTranslator):
 
99
 
100
  def describe_action(self):
101
  return (
102
+ "Your next move: \n Please choose your action which applies torques at the eight hinge joints of the ant. It be a list of eight numerical values and each value is within the range of [-1,1]."
 
103
  )
104
 
105
  class BasicStateSequenceTranslator(BasicLevelTranslator):
record_reflexion.csv CHANGED
@@ -10,6 +10,8 @@ FrozenLake-v1,1,expert,200.0
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
 
 
13
  Ant-v4,1,expert,5000.2
14
  HalfCheetah-v4,1,expert,12138.8
15
  Hopper-v4,1,expert,3542.2
 
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
13
+ RepresentedMsPacman-v0,1,expert,10000.0
14
+ RepresentedMontezumaRevenge-v0,1,expert,10000.0
15
  Ant-v4,1,expert,5000.2
16
  HalfCheetah-v4,1,expert,12138.8
17
  Hopper-v4,1,expert,3542.2
shell/test_atari.sh CHANGED
@@ -1,2 +1,6 @@
1
  python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
2
- python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
 
 
 
 
 
1
  python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
2
+ python main_reflexion.py --env_name RepresentedPong-v0 --init_summarizer RepresentedPong_init_translator --curr_summarizer RepresentedPong_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
3
+
4
+ python main_reflexion.py --env_name RepresentedMsPacman-v0 --init_summarizer RepresentedMsPacman_init_translator --curr_summarizer RepresentedMsPacman_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0
5
+
6
+ python main_reflexion.py --env_name RepresentedMontezumaRevenge-v0 --init_summarizer RepresentedMontezumaRevenge_init_translator --curr_summarizer RepresentedMontezumaRevenge_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0