hzxwonder commited on
Commit
65ee2b8
·
1 Parent(s): 8c8cf65
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. deciders/__init__.py +2 -10
  2. deciders/act.py +22 -20
  3. deciders/cot.py +1 -1
  4. deciders/{jarvis.py → exe.py} +29 -24
  5. deciders/jarvis_without_insights.py +0 -179
  6. deciders/jarvis_without_shortmem.py +0 -182
  7. deciders/jarvis_without_suggestions.py +0 -180
  8. deciders/jarvis_without_summary.py +0 -179
  9. deciders/pal.py +0 -149
  10. deciders/reflexion.py +3 -3
  11. deciders/self_consistency.py +2 -2
  12. deciders/selfask.py +1 -1
  13. deciders/spp.py +1 -1
  14. deciders/utils.py +12 -17
  15. distillers/guider.py +12 -43
  16. distillers/self_reflection.py +10 -4
  17. distillers/traj_prompt_summarizer.py +20 -6
  18. draw_overall_performance.py +0 -59
  19. environment.yml → environment.yaml +96 -115
  20. envs/classic_control/acrobot_policies.py +1 -1
  21. envs/toy_text/few_shot_examples/cliffwalking_l2.json +0 -0
  22. envs/toy_text/few_shot_examples/cliffwalking_l4.json +0 -0
  23. gen_examples.sh +0 -55
  24. gen_few_shots_examples.py +0 -269
  25. main_merge.py +0 -365
  26. main_merge.sh +0 -123
  27. main_reflexion.py +17 -67
  28. memory/env_history.py +3 -2
  29. prompts/__init__.py +10 -41
  30. prompts/task_irrelevant/prompts.py +1 -1
  31. prompts/task_relevant/box2d/LunarLander.py +1 -77
  32. prompts/task_relevant/classic_control/acrobot.py +1 -89
  33. prompts/task_relevant/classic_control/cartpole.py +1 -67
  34. prompts/task_relevant/classic_control/mountaincar.py +1 -72
  35. prompts/task_relevant/classic_control/mountaincarContinuous.py +1 -72
  36. prompts/task_relevant/toy_text/blackjack.py +1 -67
  37. prompts/task_relevant/toy_text/cliffwalking.py +1 -75
  38. prompts/task_relevant/toy_text/frozenlake.py +1 -50
  39. prompts/task_relevant/toy_text/taxi.py +1 -78
  40. record_reflexion.csv +11 -0
  41. requirements.txt +0 -98
  42. shell/test_acrobot.sh +12 -12
  43. shell/test_blackjack.sh +11 -11
  44. shell/test_cartpole.sh +11 -18
  45. shell/test_cliffwalking.sh +12 -12
  46. shell/{test_jarvis.sh → test_exe.sh} +40 -40
  47. shell/test_frozenlake.sh +12 -12
  48. shell/test_jarvis_woi.sh +0 -55
  49. shell/test_jarvis_wosh.sh +0 -55
  50. shell/test_jarvis_wosug.sh +0 -55
deciders/__init__.py CHANGED
@@ -1,26 +1,18 @@
1
 
2
  from .act import NaiveAct, RandomAct
3
  from .selfask import SelfAskAct
4
- from .pal import PAL
5
  from .cot import ChainOfThought
6
  from .self_consistency import SelfConsistency
7
  from .spp import SPP
8
  from .reflexion import Reflexion
9
- from .jarvis import Jarvis
10
- from .jarvis_without_insights import JarvisWithoutInsight
11
- from .jarvis_without_suggestions import JarvisWithoutSuggestions
12
- from .jarvis_without_shortmem import JarvisWithoutShortMem
13
 
14
  REGISTRY = {}
15
  REGISTRY['random_actor'] = RandomAct
16
  REGISTRY['naive_actor'] = NaiveAct
17
  REGISTRY['selfask_actor'] = SelfAskAct
18
- REGISTRY['pal_actor'] = PAL
19
  REGISTRY['cot_actor'] = ChainOfThought
20
  REGISTRY['self_consistency_actor'] = SelfConsistency
21
  REGISTRY['spp_actor'] = SPP
22
  REGISTRY['reflexion_actor'] = Reflexion
23
- REGISTRY['jarvis_actor'] = Jarvis
24
- REGISTRY['jarvis_actor_woi'] = JarvisWithoutInsight
25
- REGISTRY['jarvis_actor_wosug'] = JarvisWithoutSuggestions
26
- REGISTRY['jarvis_actor_wosh'] = JarvisWithoutShortMem
 
1
 
2
  from .act import NaiveAct, RandomAct
3
  from .selfask import SelfAskAct
 
4
  from .cot import ChainOfThought
5
  from .self_consistency import SelfConsistency
6
  from .spp import SPP
7
  from .reflexion import Reflexion
8
+ from .exe import EXE
 
 
 
9
 
10
  REGISTRY = {}
11
  REGISTRY['random_actor'] = RandomAct
12
  REGISTRY['naive_actor'] = NaiveAct
13
  REGISTRY['selfask_actor'] = SelfAskAct
 
14
  REGISTRY['cot_actor'] = ChainOfThought
15
  REGISTRY['self_consistency_actor'] = SelfConsistency
16
  REGISTRY['spp_actor'] = SPP
17
  REGISTRY['reflexion_actor'] = Reflexion
18
+ REGISTRY['exe_actor'] = EXE
 
 
 
deciders/act.py CHANGED
@@ -6,7 +6,7 @@ from loguru import logger
6
  from .parser import PARSERS
7
  from langchain.output_parsers import PydanticOutputParser
8
  from langchain.output_parsers import OutputFixingParser
9
- from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
10
  from memory.env_history import EnvironmentHistory
11
  import tiktoken
12
  import json
@@ -21,7 +21,7 @@ class RandomAct():
21
  return self.action_space.sample()+1, '', '', '', 0, 0
22
 
23
  class NaiveAct(gpt):
24
- def __init__(self, action_space, args, prompts, distiller, temperature=0.0, max_tokens=512, logger=None):
25
  self.action_space = action_space
26
  self.temperature = temperature
27
  self.action_desc_dict = args.action_desc_dict
@@ -48,7 +48,7 @@ class NaiveAct(gpt):
48
  self.memory = self.summarized_fewshot_example
49
  if args.use_short_mem == 1:
50
  self.use_short_mem = True
51
- self.mem_num = self.args.trajectories_num
52
  else:
53
  self.use_short_mem = False
54
  self.mem_num = 0
@@ -74,10 +74,11 @@ class NaiveAct(gpt):
74
  self.env_history.reset()
75
 
76
  def clear_mem(self):
 
77
  self.pre_memory = []
78
  self.post_memory = []
79
  self.is_first = True
80
- self._update_mem(None)
81
 
82
 
83
  def _parser_initialization(self):
@@ -87,16 +88,15 @@ class NaiveAct(gpt):
87
  else:
88
  num_action = 1
89
 
90
- # autofixing_chat = AzureChatOpenAI(
91
- # openai_api_type=openai.api_type,
92
- # openai_api_version=openai.api_version,
93
- # openai_api_base=openai.api_base,
94
- # openai_api_key=openai.api_key,
95
- # deployment_name="gpt-35-turbo",
96
- # temperature=self.temperature,
97
- # max_tokens=self.max_tokens
98
- # )
99
- autofixing_chat = ChatOpenAI(temperature=0, openai_api_key=openai.api_key)
100
 
101
  parser = PydanticOutputParser(pydantic_object=PARSERS[num_action])
102
  autofixing_parser = OutputFixingParser.from_llm(
@@ -127,13 +127,14 @@ class NaiveAct(gpt):
127
  for i, transition in enumerate(traj):
128
  traj_text += transition['observation']
129
  traj_text += f"> {transition['action']}"
 
130
  one_traj_token = self.num_tokens_from_string(traj_text)
131
- if one_traj_token > 5000:
132
  max_step_num = i+1
133
  break
134
  traj_text += f"Your performance is: {transition['cum_reward']}"
135
  if not max_step_num:
136
- max_step_num = 200
137
  self.summarized_fewshot_example = self.distiller.generate_from_file(json_file,max_step_num=max_step_num)
138
 
139
  def response(self, state_description, action_description, env_info, game_description=None, goal_description=None, fewshot_examples=None):
@@ -155,10 +156,11 @@ class NaiveAct(gpt):
155
  self.game_description = game_description
156
  self.goal_description = goal_description
157
  self.env_history.add("observation", state_description)
158
- # print(self.env_history)
159
- if len(self.env_history) >= 2:
 
160
  one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
161
- self.env_history.set_history(6000 // one_history_token)
162
 
163
  def act(self, state_description, action_description, env_info, game_description=None, goal_description=None, logfile=None):
164
  self._add_history_before_action(game_description, goal_description, state_description)
@@ -192,7 +194,7 @@ class NaiveAct(gpt):
192
  if self.use_short_mem:
193
  if len(self.env_history) > 1:
194
  my_mem += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
195
- my_mem += f"\nBelow are the latest {min(self.args.short_mem_num,len(self.env_history)//2)} historical data entries:\n"
196
  my_mem += f"{self.env_history.get_histories(self.mem_num)}"
197
 
198
  while asking_round < 3:
 
6
  from .parser import PARSERS
7
  from langchain.output_parsers import PydanticOutputParser
8
  from langchain.output_parsers import OutputFixingParser
9
+ from langchain.chat_models import AzureChatOpenAI
10
  from memory.env_history import EnvironmentHistory
11
  import tiktoken
12
  import json
 
21
  return self.action_space.sample()+1, '', '', '', 0, 0
22
 
23
  class NaiveAct(gpt):
24
+ def __init__(self, action_space, args, prompts, distiller, temperature=0.0, max_tokens=2048, logger=None):
25
  self.action_space = action_space
26
  self.temperature = temperature
27
  self.action_desc_dict = args.action_desc_dict
 
48
  self.memory = self.summarized_fewshot_example
49
  if args.use_short_mem == 1:
50
  self.use_short_mem = True
51
+ self.mem_num = self.args.short_mem_num
52
  else:
53
  self.use_short_mem = False
54
  self.mem_num = 0
 
74
  self.env_history.reset()
75
 
76
  def clear_mem(self):
77
+ self.update_mem()
78
  self.pre_memory = []
79
  self.post_memory = []
80
  self.is_first = True
81
+ self.env_history.reset()
82
 
83
 
84
  def _parser_initialization(self):
 
88
  else:
89
  num_action = 1
90
 
91
+ autofixing_chat = AzureChatOpenAI(
92
+ openai_api_type=openai.api_type,
93
+ openai_api_version=openai.api_version,
94
+ openai_api_base=openai.api_base,
95
+ openai_api_key=openai.api_key,
96
+ deployment_name=self.args.gpt_version,
97
+ temperature=self.temperature,
98
+ max_tokens=self.max_tokens
99
+ )
 
100
 
101
  parser = PydanticOutputParser(pydantic_object=PARSERS[num_action])
102
  autofixing_parser = OutputFixingParser.from_llm(
 
127
  for i, transition in enumerate(traj):
128
  traj_text += transition['observation']
129
  traj_text += f"> {transition['action']}"
130
+ traj_text += f"{transition.get('reward','')}\n"
131
  one_traj_token = self.num_tokens_from_string(traj_text)
132
+ if one_traj_token > self.args.max_query_tokens:
133
  max_step_num = i+1
134
  break
135
  traj_text += f"Your performance is: {transition['cum_reward']}"
136
  if not max_step_num:
137
+ max_step_num = self.args.max_episode_len
138
  self.summarized_fewshot_example = self.distiller.generate_from_file(json_file,max_step_num=max_step_num)
139
 
140
  def response(self, state_description, action_description, env_info, game_description=None, goal_description=None, fewshot_examples=None):
 
156
  self.game_description = game_description
157
  self.goal_description = goal_description
158
  self.env_history.add("observation", state_description)
159
+
160
+ # limit the token used, or it may exceed the max token
161
+ if len(self.env_history):
162
  one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
163
+ self.env_history.set_history(self.args.max_query_tokens // one_history_token)
164
 
165
  def act(self, state_description, action_description, env_info, game_description=None, goal_description=None, logfile=None):
166
  self._add_history_before_action(game_description, goal_description, state_description)
 
194
  if self.use_short_mem:
195
  if len(self.env_history) > 1:
196
  my_mem += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
197
+ my_mem += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
198
  my_mem += f"{self.env_history.get_histories(self.mem_num)}"
199
 
200
  while asking_round < 3:
deciders/cot.py CHANGED
@@ -88,7 +88,7 @@ class ChainOfThought(NaiveAct):
88
  if len(self.env_history) > 1:
89
  if not suffix_flag:
90
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
91
- human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
92
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
93
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
94
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
 
88
  if len(self.env_history) > 1:
89
  if not suffix_flag:
90
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
91
+ human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
92
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
93
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
94
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
deciders/{jarvis.py → exe.py} RENAMED
@@ -1,6 +1,6 @@
1
  import openai
2
  from .misc import history_to_str
3
- from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
4
  from langchain.prompts.chat import (
5
  PromptTemplate,
6
  ChatPromptTemplate,
@@ -19,7 +19,7 @@ from loguru import logger
19
 
20
 
21
 
22
- class Jarvis(NaiveAct):
23
  def __init__(self, action_space, args, prompts, distiller, temperature=0., max_tokens=None, logger=None, fixed_suggestion=None, fixed_insight=None):
24
  super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
25
  self.pre_memory = []
@@ -30,8 +30,7 @@ class Jarvis(NaiveAct):
30
  self.goal_description = args.goal_description
31
  self.action_description = args.action_description
32
  self.action_desc_dict = args.action_desc_dict
33
- self.mem_num = args.trajectories_num
34
- self.temperature = temperature
35
  self.fixed_suggestion = fixed_suggestion
36
  self.fixed_insight = fixed_insight
37
  self._update_mem(None)
@@ -50,10 +49,12 @@ class Jarvis(NaiveAct):
50
  self._update_mem(traj)
51
 
52
  def clear_mem(self):
 
53
  self.pre_memory = []
54
  self.post_memory = []
55
  self.is_first = True
56
- self._update_mem(None)
 
57
 
58
  def _update_mem(self, traj):
59
  if self.memory:
@@ -82,6 +83,7 @@ class Jarvis(NaiveAct):
82
  insight_str += f"{self.insight}\n"
83
  suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
84
  return insight_str + suggestion_str
 
85
  def act(
86
  self,
87
  state_description,
@@ -94,7 +96,15 @@ class Jarvis(NaiveAct):
94
  self.game_description = game_description
95
  self.goal_description = goal_description
96
  self.env_history.add("observation", state_description)
97
- chat = ChatOpenAI(temperature=0.5, openai_api_key=openai.api_key, model=self.args.gpt_version)
 
 
 
 
 
 
 
 
98
  # print(self.logger)
99
  reply_format_description = \
100
  "Your response should choose an optimal action from valid action list, and terminated with following format: "
@@ -107,8 +117,8 @@ class Jarvis(NaiveAct):
107
  for examples in self.irr_few_shot_examples:
108
  template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
109
 
110
- template += "\n\nNow you are in the task."
111
- template += " {game_description} {action_description} {goal_description}"
112
  template += "You are observing something and " \
113
  "you need to choose the optimal action acoordingly."
114
  template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
@@ -116,7 +126,7 @@ class Jarvis(NaiveAct):
116
  template += self._read_mem()
117
  system_message_prompt = SystemMessagePromptTemplate.from_template(template)
118
 
119
- short_memory_template = HumanMessagePromptTemplate.from_template("{history}")
120
  chat_prompt = ChatPromptTemplate.from_messages(
121
  [system_message_prompt, short_memory_template])
122
  if self.logger:
@@ -130,12 +140,7 @@ class Jarvis(NaiveAct):
130
  handler = FileCallbackHandler(logfile)
131
  total_tokens, total_cost = 0, 0
132
  max_think_times = 1
133
- # TODO: ADD REACT Support
134
- # print(str(self.env_history))
135
- if self.use_short_mem:
136
- my_history = str(self.env_history)
137
- else:
138
- my_history = ""
139
  for i_think in range(max_think_times):
140
  # chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=True)
141
  chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
@@ -145,11 +150,11 @@ class Jarvis(NaiveAct):
145
  game_description=game_description,
146
  goal_description=goal_description,
147
  action_description=action_description,
148
- # state_description = self.env_history.get_last_history(),
149
- history=self.env_history.get_histories_with_last(self.mem_num),
150
  format_instructions=self.parser.get_format_instructions(),
151
  reply_format_description=reply_format_description,
152
- max_token=3000
153
  )
154
 
155
  total_tokens += cb.total_tokens
@@ -166,12 +171,12 @@ class Jarvis(NaiveAct):
166
  self.logger.info(f'History: {history_to_str(env_info["history"])}')
167
  text_prompt = chat_prompt.format_messages(
168
  game_description=game_description,
169
- goal_description=goal_description,
170
- action_description=action_description,
171
- # state_description = self.env_history.get_last_history(),
172
- history=self.env_history.get_histories_with_last(self.mem_num),
173
- format_instructions=self.parser.get_format_instructions(),
174
- reply_format_description=reply_format_description,
175
  )
176
  text_prompt = f'{text_prompt[0].content}\n{text_prompt[1].content}'
177
  return action, text_prompt, response, total_tokens, total_cost
 
1
  import openai
2
  from .misc import history_to_str
3
+ from langchain.chat_models import AzureChatOpenAI
4
  from langchain.prompts.chat import (
5
  PromptTemplate,
6
  ChatPromptTemplate,
 
19
 
20
 
21
 
22
+ class EXE(NaiveAct):
23
  def __init__(self, action_space, args, prompts, distiller, temperature=0., max_tokens=None, logger=None, fixed_suggestion=None, fixed_insight=None):
24
  super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
25
  self.pre_memory = []
 
30
  self.goal_description = args.goal_description
31
  self.action_description = args.action_description
32
  self.action_desc_dict = args.action_desc_dict
33
+ self.mem_num = args.short_mem_num
 
34
  self.fixed_suggestion = fixed_suggestion
35
  self.fixed_insight = fixed_insight
36
  self._update_mem(None)
 
49
  self._update_mem(traj)
50
 
51
  def clear_mem(self):
52
+ self.update_mem()
53
  self.pre_memory = []
54
  self.post_memory = []
55
  self.is_first = True
56
+ self.env_history.reset()
57
+ # self._update_mem(None)
58
 
59
  def _update_mem(self, traj):
60
  if self.memory:
 
83
  insight_str += f"{self.insight}\n"
84
  suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
85
  return insight_str + suggestion_str
86
+
87
  def act(
88
  self,
89
  state_description,
 
96
  self.game_description = game_description
97
  self.goal_description = goal_description
98
  self.env_history.add("observation", state_description)
99
+ chat = AzureChatOpenAI(
100
+ openai_api_type=openai.api_type,
101
+ openai_api_version=openai.api_version,
102
+ openai_api_base=openai.api_base,
103
+ openai_api_key=openai.api_key,
104
+ deployment_name=self.args.gpt_version,
105
+ temperature=self.temperature,
106
+ max_tokens=self.max_tokens,
107
+ )
108
  # print(self.logger)
109
  reply_format_description = \
110
  "Your response should choose an optimal action from valid action list, and terminated with following format: "
 
117
  for examples in self.irr_few_shot_examples:
118
  template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
119
 
120
+ template += "\n\nNow you are in the task.\n"
121
+ template += " {game_description}\n{action_description}\n{goal_description}"
122
  template += "You are observing something and " \
123
  "you need to choose the optimal action acoordingly."
124
  template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
 
126
  template += self._read_mem()
127
  system_message_prompt = SystemMessagePromptTemplate.from_template(template)
128
 
129
+ short_memory_template = HumanMessagePromptTemplate.from_template("{history}\nNext is the observation that the agent gets:\n{state_description}Please select an optimal action to gain higher rewards based on the current state and history. The action description is below: {action_description}. Please think step by step.")
130
  chat_prompt = ChatPromptTemplate.from_messages(
131
  [system_message_prompt, short_memory_template])
132
  if self.logger:
 
140
  handler = FileCallbackHandler(logfile)
141
  total_tokens, total_cost = 0, 0
142
  max_think_times = 1
143
+
 
 
 
 
 
144
  for i_think in range(max_think_times):
145
  # chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=True)
146
  chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
 
150
  game_description=game_description,
151
  goal_description=goal_description,
152
  action_description=action_description,
153
+ state_description = self.env_history.get_last_history(),
154
+ history=self.env_history.get_histories(self.mem_num),
155
  format_instructions=self.parser.get_format_instructions(),
156
  reply_format_description=reply_format_description,
157
+ max_token=self.max_tokens
158
  )
159
 
160
  total_tokens += cb.total_tokens
 
171
  self.logger.info(f'History: {history_to_str(env_info["history"])}')
172
  text_prompt = chat_prompt.format_messages(
173
  game_description=game_description,
174
+ goal_description=goal_description,
175
+ action_description=action_description,
176
+ state_description = self.env_history.get_last_history(),
177
+ history=self.env_history.get_histories(self.mem_num),
178
+ format_instructions=self.parser.get_format_instructions(),
179
+ reply_format_description=reply_format_description,
180
  )
181
  text_prompt = f'{text_prompt[0].content}\n{text_prompt[1].content}'
182
  return action, text_prompt, response, total_tokens, total_cost
deciders/jarvis_without_insights.py DELETED
@@ -1,179 +0,0 @@
1
- import openai
2
- from .misc import history_to_str
3
- from langchain.chat_models import AzureChatOpenAI
4
- from langchain.prompts.chat import (
5
- PromptTemplate,
6
- ChatPromptTemplate,
7
- SystemMessagePromptTemplate,
8
- HumanMessagePromptTemplate,
9
- )
10
- from langchain.prompts.few_shot import FewShotPromptTemplate
11
- from langchain import LLMChain
12
- from loguru import logger
13
- from langchain.callbacks import FileCallbackHandler
14
- from langchain.callbacks import get_openai_callback
15
- from .act import NaiveAct
16
- from memory.env_history import EnvironmentHistory
17
- import tiktoken
18
- from .utils import run_chain
19
-
20
-
21
- class JarvisWithoutInsight(NaiveAct):
22
- def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
23
- super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
24
- self.pre_memory = []
25
- self.post_memory = []
26
- self.is_first = True
27
- self.num_trails = args.num_trails
28
- self.game_description = args.game_description
29
- self.goal_description = args.goal_description
30
- self.action_description = args.action_description
31
- self._update_mem(None)
32
-
33
- def update_mem(self,):
34
- traj = self.game_description
35
- traj += self.goal_description
36
- max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
37
- traj += self.env_history.get_histories(max_step_num)
38
- self._update_mem(traj)
39
-
40
- def _update_mem(self, traj):
41
- if not self.is_first:
42
- summary = self.distiller.generate_summary(traj, self.post_memory)
43
- self.post_memory.append(summary)
44
- self.insight = self.distiller.generate_insight(self.post_memory)
45
- else:
46
- self.is_first = False
47
- suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
48
- self.pre_memory.append(suggestion)
49
- self.env_history.reset()
50
-
51
- def _read_mem(self, ):
52
- insight_str = ""
53
- suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
54
- return insight_str + suggestion_str
55
-
56
- def act(
57
- self,
58
- state_description,
59
- action_description,
60
- env_info,
61
- game_description,
62
- goal_description,
63
- logfile=None,
64
- ):
65
- self.game_description = game_description
66
- self.goal_description = goal_description
67
- self.env_history.add("observation", state_description)
68
- chat = AzureChatOpenAI(
69
- openai_api_type=openai.api_type,
70
- openai_api_version=openai.api_version,
71
- openai_api_base=openai.api_base,
72
- openai_api_key=openai.api_key,
73
- deployment_name=self.args.gpt_version,
74
- temperature=self.temperature,
75
- max_tokens=self.max_tokens,
76
- )
77
- reply_format_description = \
78
- "Your response should choose an optimal action from valid action list, and terminated with following format: "
79
- # only task relevant examplesA
80
- template = "Now you are completing a task. "
81
- template += "You need to carefully understand the description of the game. "
82
- # TODO: few shot example handle
83
- if self.irr_few_shot_examples:
84
- template += "Here are some examples of how you should completing a task."
85
- for examples in self.irr_few_shot_examples:
86
- template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
87
-
88
- if self.fewshot_example:
89
- if self.expert_knowledge:
90
- template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
91
- template += self.expert_knowledge + '\n'
92
- template += "Next are some examples: "
93
- system_message_prompt = SystemMessagePromptTemplate.from_template(template)
94
-
95
- human_template = ""
96
- human_template += "\n\nNow you are in the task.\n"
97
- human_template += "{game_description}\n{action_description}\n{goal_description}\n"
98
- human_template += "You are observing something and " \
99
- "you need to choose the optimal action acoordingly. "
100
- human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
101
- human_template += self._read_mem()
102
- human_template += "\n\nHere are some history states listed below:\n"
103
-
104
- fewshot_example_prompt = PromptTemplate(
105
- input_variables=["question", "answer"],
106
- template="Question: \n{question}\n{answer}"
107
- )
108
- human_message_prompt = FewShotPromptTemplate(
109
- examples=self.fewshot_example,
110
- example_prompt=fewshot_example_prompt,
111
- suffix=human_template,
112
- input_variables=[
113
- 'game_description', 'goal_description',
114
- 'action_description', 'reply_format_description'],
115
- partial_variables={'format_instructions': self.parser.get_format_instructions()}
116
- )
117
- human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
118
-
119
- short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state:")
120
-
121
- chat_prompt = ChatPromptTemplate.from_messages(
122
- [system_message_prompt, human_message_prompt, short_memory_template])
123
-
124
-
125
- if logfile:
126
- # logger.remove()
127
- if self.first_call:
128
- logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
129
- self.first_call = False
130
- handler = FileCallbackHandler(logfile)
131
- total_tokens, total_cost = 0, 0
132
- max_think_times = 1
133
- # TODO: ADD REACT Support
134
- # print(str(self.env_history))
135
- if self.use_short_mem:
136
- my_history = str(self.env_history)
137
- else:
138
- my_history = ""
139
- for i_think in range(max_think_times):
140
- chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
141
- with get_openai_callback() as cb:
142
- response = run_chain(
143
- chain,
144
- game_description=game_description,
145
- goal_description=goal_description,
146
- action_description=action_description,
147
- history=str(self.env_history),
148
- format_instructions=self.parser.get_format_instructions(),
149
- reply_format_description=reply_format_description,
150
- max_token = 3000
151
- )
152
-
153
- total_tokens += cb.total_tokens
154
- total_cost += cb.total_cost
155
- action = self.parser.parse(response).action
156
-
157
- text_prompt = chat_prompt.format_messages(
158
- game_description=game_description,
159
- goal_description=goal_description,
160
- action_description=action_description,
161
- history=str(self.env_history),
162
- format_instructions=self.parser.get_format_instructions(),
163
- reply_format_description=reply_format_description,
164
- )
165
- texts = ""
166
- for text in text_prompt:
167
- texts += text.content + "\n"
168
-
169
- self._add_history_after_action(action)
170
- logger.info(f'The GPT response is: {response}.')
171
- logger.info(f'The optimal action is: {action}.')
172
- if self.pre_memory:
173
- logger.info(f'The suggestion is: {self.pre_memory[-1]}.')
174
- if self.post_memory:
175
- logger.info(f'The summary is: {self.post_memory[-1]}.')
176
- if env_info.get('history'):
177
- logger.info(f'History: {history_to_str(env_info["history"])}')
178
-
179
- return action, texts, response, logger, total_tokens, total_cost
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deciders/jarvis_without_shortmem.py DELETED
@@ -1,182 +0,0 @@
1
- import openai
2
- from .misc import history_to_str
3
- from langchain.chat_models import AzureChatOpenAI
4
- from langchain.prompts.chat import (
5
- PromptTemplate,
6
- ChatPromptTemplate,
7
- SystemMessagePromptTemplate,
8
- HumanMessagePromptTemplate,
9
- )
10
- from langchain.prompts.few_shot import FewShotPromptTemplate
11
- from langchain import LLMChain
12
- from loguru import logger
13
- from langchain.callbacks import FileCallbackHandler
14
- from langchain.callbacks import get_openai_callback
15
- from .act import NaiveAct
16
- from memory.env_history import EnvironmentHistory
17
- import tiktoken
18
- from .utils import run_chain
19
-
20
-
21
- class JarvisWithoutShortMem(NaiveAct):
22
- def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
23
- super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
24
- self.pre_memory = []
25
- self.post_memory = []
26
- self.is_first = True
27
- self.num_trails = args.num_trails
28
- self.game_description = args.game_description
29
- self.goal_description = args.goal_description
30
- self.action_description = args.action_description
31
- self._update_mem(None)
32
-
33
- def update_mem(self,):
34
- traj = self.game_description
35
- traj += self.goal_description
36
- max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
37
- traj += self.env_history.get_histories(max_step_num)
38
- self._update_mem(traj)
39
-
40
- def _update_mem(self, traj):
41
- if not self.is_first:
42
- summary = self.distiller.generate_summary(traj, self.post_memory)
43
- self.post_memory.append(summary)
44
- self.insight = self.distiller.generate_insight(self.post_memory)
45
- else:
46
- self.is_first = False
47
- suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
48
- self.pre_memory.append(suggestion)
49
- self.env_history.reset()
50
-
51
- def _read_mem(self, ):
52
- insight_str = ""
53
- if len(self.post_memory) > 0:
54
- insight_str += "The insights of the game are listed below: "
55
- insight_str += f"{self.insight}\n"
56
- suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
57
- return insight_str + suggestion_str
58
-
59
- def act(
60
- self,
61
- state_description,
62
- action_description,
63
- env_info,
64
- game_description,
65
- goal_description,
66
- logfile=None,
67
- ):
68
- self.game_description = game_description
69
- self.goal_description = goal_description
70
- self.env_history.add("observation", state_description)
71
- chat = AzureChatOpenAI(
72
- openai_api_type=openai.api_type,
73
- openai_api_version=openai.api_version,
74
- openai_api_base=openai.api_base,
75
- openai_api_key=openai.api_key,
76
- deployment_name=self.args.gpt_version,
77
- temperature=self.temperature,
78
- max_tokens=self.max_tokens,
79
- )
80
- reply_format_description = \
81
- "Your response should choose an optimal action from valid action list, and terminated with following format: "
82
- # only task relevant examplesA
83
- template = "Now you are completing a task. "
84
- template += "You need to carefully understand the description of the game. "
85
- # TODO: few shot example handle
86
- if self.irr_few_shot_examples:
87
- template += "Here are some examples of how you should completing a task."
88
- for examples in self.irr_few_shot_examples:
89
- template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
90
-
91
- if self.fewshot_example:
92
- if self.expert_knowledge:
93
- template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
94
- template += self.expert_knowledge + '\n'
95
- template += "Next are some examples: "
96
- system_message_prompt = SystemMessagePromptTemplate.from_template(template)
97
-
98
- human_template = ""
99
- human_template += "\n\nNow you are in the task.\n"
100
- human_template += "{game_description}\n{action_description}\n{goal_description}\n"
101
- human_template += "You are observing something and " \
102
- "you need to choose the optimal action acoordingly. "
103
- human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
104
- human_template += self._read_mem()
105
- human_template += "\n\nHere are some history states listed below:\n"
106
-
107
- fewshot_example_prompt = PromptTemplate(
108
- input_variables=["question", "answer"],
109
- template="Question: \n{question}\n{answer}"
110
- )
111
- human_message_prompt = FewShotPromptTemplate(
112
- examples=self.fewshot_example,
113
- example_prompt=fewshot_example_prompt,
114
- suffix=human_template,
115
- input_variables=[
116
- 'game_description', 'goal_description',
117
- 'action_description', 'reply_format_description'],
118
- partial_variables={'format_instructions': self.parser.get_format_instructions()}
119
- )
120
- human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
121
-
122
- short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state:")
123
-
124
- chat_prompt = ChatPromptTemplate.from_messages(
125
- [system_message_prompt, human_message_prompt, short_memory_template])
126
-
127
-
128
- if logfile:
129
- # logger.remove()
130
- if self.first_call:
131
- logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
132
- self.first_call = False
133
- handler = FileCallbackHandler(logfile)
134
- total_tokens, total_cost = 0, 0
135
- max_think_times = 1
136
- # TODO: ADD REACT Support
137
- # print(str(self.env_history))
138
- if self.use_short_mem:
139
- my_history = str(self.env_history)
140
- else:
141
- my_history = ""
142
- for i_think in range(max_think_times):
143
- chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
144
- with get_openai_callback() as cb:
145
- response = run_chain(
146
- chain,
147
- game_description=game_description,
148
- goal_description=goal_description,
149
- action_description=action_description,
150
- history=self.env_history.get_last_history(),
151
- format_instructions=self.parser.get_format_instructions(),
152
- reply_format_description=reply_format_description,
153
- max_token = 3000
154
- )
155
-
156
- total_tokens += cb.total_tokens
157
- total_cost += cb.total_cost
158
- action = self.parser.parse(response).action
159
-
160
- text_prompt = chat_prompt.format_messages(
161
- game_description=game_description,
162
- goal_description=goal_description,
163
- action_description=action_description,
164
- history=self.env_history.get_last_history(),
165
- format_instructions=self.parser.get_format_instructions(),
166
- reply_format_description=reply_format_description,
167
- )
168
- texts = ""
169
- for text in text_prompt:
170
- texts += text.content + "\n"
171
-
172
- self._add_history_after_action(action)
173
- logger.info(f'The GPT response is: {response}.')
174
- logger.info(f'The optimal action is: {action}.')
175
- if self.pre_memory:
176
- logger.info(f'The suggestion is: {self.pre_memory[-1]}.')
177
- if self.post_memory:
178
- logger.info(f'The summary is: {self.post_memory[-1]}.')
179
- if env_info.get('history'):
180
- logger.info(f'History: {history_to_str(env_info["history"])}')
181
-
182
- return action, texts, response, logger, total_tokens, total_cost
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deciders/jarvis_without_suggestions.py DELETED
@@ -1,180 +0,0 @@
1
- import openai
2
- from .misc import history_to_str
3
- from langchain.chat_models import AzureChatOpenAI
4
- from langchain.prompts.chat import (
5
- PromptTemplate,
6
- ChatPromptTemplate,
7
- SystemMessagePromptTemplate,
8
- HumanMessagePromptTemplate,
9
- )
10
- from langchain.prompts.few_shot import FewShotPromptTemplate
11
- from langchain import LLMChain
12
- from loguru import logger
13
- from langchain.callbacks import FileCallbackHandler
14
- from langchain.callbacks import get_openai_callback
15
- from .act import NaiveAct
16
- from memory.env_history import EnvironmentHistory
17
- import tiktoken
18
- from .utils import run_chain
19
-
20
-
21
- class JarvisWithoutSuggestions(NaiveAct):
22
- def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
23
- super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
24
- self.pre_memory = []
25
- self.post_memory = []
26
- self.is_first = True
27
- self.num_trails = args.num_trails
28
- self.game_description = args.game_description
29
- self.goal_description = args.goal_description
30
- self.action_description = args.action_description
31
- self._update_mem(None)
32
-
33
- def update_mem(self,):
34
- traj = self.game_description
35
- traj += self.goal_description
36
- max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
37
- traj += self.env_history.get_histories(max_step_num)
38
- self._update_mem(traj)
39
-
40
- def _update_mem(self, traj):
41
- if not self.is_first:
42
- summary = self.distiller.generate_summary(traj, self.post_memory)
43
- self.post_memory.append(summary)
44
- self.insight = self.distiller.generate_insight(self.post_memory)
45
- else:
46
- self.is_first = False
47
- suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
48
- self.pre_memory.append(suggestion)
49
- self.env_history.reset()
50
-
51
- def _read_mem(self, ):
52
- insight_str = ""
53
- if len(self.post_memory) > 0:
54
- insight_str += "The insights of the game are listed below: "
55
- insight_str += f"{self.insight}\n"
56
- suggestion_str = "\n"
57
- return insight_str + suggestion_str
58
-
59
- def act(
60
- self,
61
- state_description,
62
- action_description,
63
- env_info,
64
- game_description,
65
- goal_description,
66
- logfile=None,
67
- ):
68
- self.game_description = game_description
69
- self.goal_description = goal_description
70
- self.env_history.add("observation", state_description)
71
- chat = AzureChatOpenAI(
72
- openai_api_type=openai.api_type,
73
- openai_api_version=openai.api_version,
74
- openai_api_base=openai.api_base,
75
- openai_api_key=openai.api_key,
76
- deployment_name=self.args.gpt_version,
77
- temperature=self.temperature,
78
- max_tokens=self.max_tokens,
79
- )
80
- reply_format_description = \
81
- "Your response should choose an optimal action from valid action list, and terminated with following format: "
82
- # only task relevant examplesA
83
- template = "Now you are completing a task. "
84
- template += "You need to carefully understand the description of the game. "
85
- # TODO: few shot example handle
86
- if self.irr_few_shot_examples:
87
- template += "Here are some examples of how you should completing a task."
88
- for examples in self.irr_few_shot_examples:
89
- template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
90
-
91
- if self.fewshot_example:
92
- if self.expert_knowledge:
93
- template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
94
- template += self.expert_knowledge + '\n'
95
- template += "Next are some examples: "
96
- system_message_prompt = SystemMessagePromptTemplate.from_template(template)
97
-
98
- human_template = ""
99
- human_template += "\n\nNow you are in the task.\n"
100
- human_template += "{game_description}\n{action_description}\n{goal_description}\n"
101
- human_template += "You are observing something and " \
102
- "you need to choose the optimal action acoordingly. "
103
- human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
104
- human_template += self._read_mem()
105
- human_template += "\n\nHere are some history states listed below:\n"
106
-
107
- fewshot_example_prompt = PromptTemplate(
108
- input_variables=["question", "answer"],
109
- template="Question: \n{question}\n{answer}"
110
- )
111
- human_message_prompt = FewShotPromptTemplate(
112
- examples=self.fewshot_example,
113
- example_prompt=fewshot_example_prompt,
114
- suffix=human_template,
115
- input_variables=[
116
- 'game_description', 'goal_description',
117
- 'action_description', 'reply_format_description'],
118
- partial_variables={'format_instructions': self.parser.get_format_instructions()}
119
- )
120
- human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
121
-
122
- short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state:")
123
-
124
- chat_prompt = ChatPromptTemplate.from_messages(
125
- [system_message_prompt, human_message_prompt, short_memory_template])
126
-
127
-
128
- if logfile:
129
- # logger.remove()
130
- if self.first_call:
131
- logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
132
- self.first_call = False
133
- handler = FileCallbackHandler(logfile)
134
- total_tokens, total_cost = 0, 0
135
- max_think_times = 1
136
- # TODO: ADD REACT Support
137
- # print(str(self.env_history))
138
- if self.use_short_mem:
139
- my_history = str(self.env_history)
140
- else:
141
- my_history = ""
142
- for i_think in range(max_think_times):
143
- chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
144
- with get_openai_callback() as cb:
145
- response = run_chain(
146
- chain,
147
- game_description=game_description,
148
- goal_description=goal_description,
149
- action_description=action_description,
150
- history=str(self.env_history),
151
- format_instructions=self.parser.get_format_instructions(),
152
- reply_format_description=reply_format_description,
153
- max_token = 3000
154
- )
155
-
156
- total_tokens += cb.total_tokens
157
- total_cost += cb.total_cost
158
- action = self.parser.parse(response).action
159
-
160
- text_prompt = chat_prompt.format_messages(
161
- game_description=game_description,
162
- goal_description=goal_description,
163
- action_description=action_description,
164
- history=str(self.env_history),
165
- format_instructions=self.parser.get_format_instructions(),
166
- reply_format_description=reply_format_description,
167
- )
168
- texts = ""
169
- for text in text_prompt:
170
- texts += text.content + "\n"
171
-
172
- self._add_history_after_action(action)
173
- logger.info(f'The GPT response is: {response}.')
174
- logger.info(f'The optimal action is: {action}.')
175
- if self.post_memory:
176
- logger.info(f'The summary is: {self.post_memory[-1]}.')
177
- if env_info.get('history'):
178
- logger.info(f'History: {history_to_str(env_info["history"])}')
179
-
180
- return action, texts, response, logger, total_tokens, total_cost
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deciders/jarvis_without_summary.py DELETED
@@ -1,179 +0,0 @@
1
- import openai
2
- from .misc import history_to_str
3
- from langchain.chat_models import AzureChatOpenAI
4
- from langchain.prompts.chat import (
5
- PromptTemplate,
6
- ChatPromptTemplate,
7
- SystemMessagePromptTemplate,
8
- HumanMessagePromptTemplate,
9
- )
10
- from langchain.prompts.few_shot import FewShotPromptTemplate
11
- from langchain import LLMChain
12
- from loguru import logger
13
- from langchain.callbacks import FileCallbackHandler
14
- from langchain.callbacks import get_openai_callback
15
- from .act import NaiveAct
16
- from memory.env_history import EnvironmentHistory
17
- import tiktoken
18
-
19
-
20
- class Jarvis(NaiveAct):
21
- def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
22
- super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
23
- self.pre_memory = []
24
- self.post_memory = []
25
- self.is_first = True
26
- self.num_trails = args.num_trails
27
- self.game_description = args.game_description
28
- self.goal_description = args.goal_description
29
- self.action_description = args.action_description
30
- self._update_mem(None)
31
-
32
- def update_mem(self,):
33
- traj = self.game_description
34
- traj += self.goal_description
35
- max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
36
- traj += self.env_history.get_histories(max_step_num)
37
- self._update_mem(traj)
38
-
39
- def _update_mem(self, traj):
40
- if not self.is_first:
41
- summary = self.distiller.generate_summary(traj, self.post_memory)
42
- self.post_memory.append(summary)
43
- self.insight = self.distiller.generate_insight(self.post_memory)
44
- else:
45
- self.is_first = False
46
- suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
47
- self.pre_memory.append(suggestion)
48
- self.env_history.reset()
49
-
50
- def _read_mem(self, ):
51
- insight_str = ""
52
- if len(self.post_memory) > 0:
53
- insight_str += "The insights of the game are listed below: "
54
- insight_str += f"{self.insight}\n"
55
- suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
56
- return insight_str + suggestion_str
57
-
58
- def act(
59
- self,
60
- state_description,
61
- action_description,
62
- env_info,
63
- game_description,
64
- goal_description,
65
- logfile=None,
66
- ):
67
- self.game_description = game_description
68
- self.goal_description = goal_description
69
- self.env_history.add("observation", state_description)
70
- chat = AzureChatOpenAI(
71
- openai_api_type=openai.api_type,
72
- openai_api_version=openai.api_version,
73
- openai_api_base=openai.api_base,
74
- openai_api_key=openai.api_key,
75
- deployment_name=self.args.gpt_version,
76
- temperature=self.temperature,
77
- max_tokens=self.max_tokens,
78
- )
79
- reply_format_description = \
80
- "Your response should choose an optimal action from valid action list, and terminated with following format: "
81
- # only task relevant examplesA
82
- template = "Now you are completing a task. "
83
- template += "You need to carefully understand the description of the game. "
84
- # TODO: few shot example handle
85
- if self.irr_few_shot_examples:
86
- template += "Here are some examples of how you should completing a task."
87
- for examples in self.irr_few_shot_examples:
88
- template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
89
-
90
- if self.fewshot_example:
91
- if self.expert_knowledge:
92
- template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
93
- template += self.expert_knowledge + '\n'
94
- template += "Next are some examples: "
95
- system_message_prompt = SystemMessagePromptTemplate.from_template(template)
96
-
97
- human_template = ""
98
- human_template += "\n"
99
- human_template += "{game_description}\n{action_description}\n{goal_description}\n"
100
- human_template += "You are observing something and " \
101
- "you need to choose the optimal action acoordingly. "
102
- human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
103
- human_template += self._read_mem()
104
- human_template += "\n\nHere are some history states listed below:\n"
105
-
106
- fewshot_example_prompt = PromptTemplate(
107
- input_variables=["question", "answer"],
108
- template="Question: \n{question}\n{answer}"
109
- )
110
- human_message_prompt = FewShotPromptTemplate(
111
- examples=self.fewshot_example,
112
- example_prompt=fewshot_example_prompt,
113
- suffix=human_template,
114
- input_variables=[
115
- 'game_description', 'goal_description',
116
- 'action_description', 'reply_format_description'],
117
- partial_variables={'format_instructions': self.parser.get_format_instructions()}
118
- )
119
- human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
120
-
121
- short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or perform any prohibited actions. Here's the action description below: \n {action_description}\n")
122
-
123
- chat_prompt = ChatPromptTemplate.from_messages(
124
- [system_message_prompt, human_message_prompt, short_memory_template])
125
-
126
- if logfile:
127
- # logger.remove()
128
- if self.first_call:
129
- logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
130
- self.first_call = False
131
- handler = FileCallbackHandler(logfile)
132
- total_tokens, total_cost = 0, 0
133
- max_think_times = 1
134
- # TODO: ADD REACT Support
135
- # print(str(self.env_history))
136
- if self.use_short_mem:
137
- my_history = str(self.env_history)
138
- else:
139
- my_history = ""
140
- for i_think in range(max_think_times):
141
- chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
142
- with get_openai_callback() as cb:
143
- response = chain.run(
144
- game_description=game_description,
145
- goal_description=goal_description,
146
- action_description=action_description,
147
- history=self.env_history.get_histories(11),
148
- format_instructions=self.parser.get_format_instructions(),
149
- reply_format_description=reply_format_description,
150
- max_token = 3000
151
- )
152
-
153
- total_tokens += cb.total_tokens
154
- total_cost += cb.total_cost
155
- action = self.parser.parse(response).action
156
-
157
- text_prompt = chat_prompt.format_messages(
158
- game_description=game_description,
159
- goal_description=goal_description,
160
- action_description=action_description,
161
- history=self.env_history.get_histories(11),
162
- format_instructions=self.parser.get_format_instructions(),
163
- reply_format_description=reply_format_description,
164
- )
165
- texts = ""
166
- for text in text_prompt:
167
- texts += text.content + "\n"
168
-
169
- self._add_history_after_action(action)
170
- logger.info(f'The GPT response is: {response}.')
171
- logger.info(f'The optimal action is: {action}.')
172
- if self.pre_memory:
173
- logger.info(f'The suggestion is: {self.pre_memory[-1]}.')
174
- if self.post_memory:
175
- logger.info(f'The summary is: {self.post_memory[-1]}.')
176
- if env_info.get('history'):
177
- logger.info(f'History: {history_to_str(env_info["history"])}')
178
-
179
- return action, texts, response, logger, total_tokens, total_cost
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deciders/pal.py DELETED
@@ -1,149 +0,0 @@
1
- import openai
2
- from .misc import history_to_str
3
- from langchain.chat_models import AzureChatOpenAI
4
- from langchain.prompts.chat import (
5
- PromptTemplate,
6
- ChatPromptTemplate,
7
- SystemMessagePromptTemplate,
8
- HumanMessagePromptTemplate,
9
- )
10
- from langchain.prompts.few_shot import FewShotPromptTemplate
11
- from langchain import LLMChain
12
- from loguru import logger
13
- from langchain.callbacks import FileCallbackHandler
14
- from langchain.callbacks import get_openai_callback
15
- from .act import NaiveAct
16
- from .utils import run_chain
17
-
18
- def get_last_n_lines(text, n):
19
- lines = text.splitlines()
20
- return '\n'.join(lines[-n:])
21
-
22
- class PAL(NaiveAct):
23
- def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
24
- super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
25
-
26
- def act(
27
- self,
28
- state_description,
29
- action_description,
30
- env_info,
31
- game_description,
32
- goal_description,
33
- logfile=None,
34
- ):
35
- self._add_history_before_action(game_description, goal_description, state_description)
36
- chat = AzureChatOpenAI(
37
- openai_api_type=openai.api_type,
38
- openai_api_version=openai.api_version,
39
- openai_api_base=openai.api_base,
40
- openai_api_key=openai.api_key,
41
- deployment_name=self.args.gpt_version,
42
- temperature=self.temperature,
43
- max_tokens=self.max_tokens
44
- )
45
-
46
- suffix_flag = False
47
- reply_format_description = \
48
- "Your response should choose an optimal action from a valid action list and terminate with the following format: "
49
-
50
- # System Message
51
- human_template = "Now, you are completing a challenging task. You must carefully understand the Program-aided Language method you will use and apply it to the following task.\n"
52
-
53
- # task-irrelevant SystemMessage
54
- if self.irr_few_shot_examples:
55
- human_template += 'In the following example, I shall present a set of question and answer with the Program-aided Language method. Please adhere to the format and reasoning of the provided response when addressing the subsequent task.\n'
56
- for i, examples in enumerate(self.irr_few_shot_examples):
57
- human_template += f"\nExample {i+1}:\n"
58
- human_template += "Question: \n" + examples['question'] + "\nAnswer: \n" + examples['answer']
59
-
60
- # task-irrelevant few shot if have
61
- if self.irr_few_shot_examples:
62
- human_template += "\nMoving forward, I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below.\n"
63
-
64
- if self.fewshot_example:
65
- human_template += "I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below."
66
- # print(fewshot_example_prompt.format(**fewshot_examples[0]))
67
- human_template += '\nTask Description: {game_description} \n'
68
- human_template += 'Goal Description: {goal_description}\n'
69
- human_template += 'Actions Description: {action_description}\n'
70
-
71
- if self.fewshot_example:
72
- human_template += "Here, I will provide you with some guidance to help you better understand the rules of the task. Next are some examples: "
73
- for i, examples in enumerate(self.fewshot_example):
74
- human_template += f"\nExample {i+1}:\n"
75
- human_template += "Question: \n" + examples['question'] + "\nAnswer: \n" + examples['answer']
76
-
77
- if self.prompt_level in [2, 3, 4]:
78
- if self.memory:
79
- human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.\n'
80
- suffix_flag = True
81
- if self.prompt_level == 2:
82
- human_template += 'I have collected a few trajectories from a random policy, and the summaries are listed below.'
83
- elif self.prompt_level == 3:
84
- human_template += 'I have collected a few trajectories before, and the summaries are listed below.'
85
- elif self.prompt_level == 4:
86
- human_template += 'I have collected a few trajectories from an expert policy, and the summaries are listed below.'
87
- human_template += self._read_mem() + "\n"
88
-
89
- if self.use_short_mem:
90
- if len(self.env_history) > 1:
91
- if not suffix_flag:
92
- human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
93
- human_template += f"\nBelow are the latest {min(self.args.short_mem_num,len(self.env_history)//2)} historical data entries:\n"
94
- human_template += f"{self.env_history.get_histories(self.mem_num)}"
95
- human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
96
- human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
97
- human_template += 'Please generate Python program as answers to given questions, similar to the provided examples.\n'
98
- human_template += 'And You should calculate the final result based on the program ,not just give a code script alone!\n'
99
-
100
- human_message_prompt = PromptTemplate(
101
- template=human_template,
102
- input_variables=[
103
- 'state_description', 'goal_description', 'game_description',
104
- 'action_description'],
105
- )
106
-
107
- human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
108
-
109
- chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
110
-
111
- if not self.logger:
112
- logger.remove()
113
- self.logger = logger.add(logfile, colorize=True, enqueue=True)
114
- handler = FileCallbackHandler(logfile)
115
-
116
- chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
117
-
118
- with get_openai_callback() as cb:
119
- response = run_chain(
120
- chain,
121
- game_description=game_description,
122
- state_description=state_description,
123
- goal_description=goal_description,
124
- action_description=action_description,
125
- )
126
- total_tokens = cb.total_tokens
127
- total_cost = cb.total_cost
128
- _response = get_last_n_lines(response, 2)
129
-
130
-
131
- action = self.parser.parse(_response).action
132
-
133
- text_prompt = chat_prompt.format_messages(
134
- game_description=game_description,
135
- state_description=state_description,
136
- goal_description=goal_description,
137
- action_description=action_description,
138
- )
139
- texts = ""
140
- for text in text_prompt:
141
- texts += text.content + "\n"
142
-
143
- self._add_history_after_action(action)
144
- self.logger.info(f'The GPT response is: {response}.')
145
- self.logger.info(f'The optimal action is: {action}.')
146
- if env_info.get('history'):
147
- self.logger.info(f'History: {history_to_str(env_info["history"])}')
148
-
149
- return action, texts, response, total_tokens, total_cost
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deciders/reflexion.py CHANGED
@@ -31,7 +31,7 @@ class Reflexion(NaiveAct):
31
  traj = self.game_description
32
  traj += self.goal_description
33
  one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
34
- history_num = 4000 // one_history_token
35
  traj += self.env_history.get_histories_with_last(history_num)
36
  self._update_mem(traj)
37
 
@@ -109,7 +109,7 @@ class Reflexion(NaiveAct):
109
  if len(self.env_history) > 1:
110
  if not suffix_flag:
111
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
112
- human_template += f"\nBelow are the latest {self.mem_num} historical data entries:\n"
113
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
114
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
115
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
@@ -150,7 +150,7 @@ class Reflexion(NaiveAct):
150
  action_description=action_description,
151
  format_instructions=self.parser.get_format_instructions(),
152
  reply_format_description=reply_format_description,
153
- max_token = 3000
154
  )
155
 
156
  total_tokens += cb.total_tokens
 
31
  traj = self.game_description
32
  traj += self.goal_description
33
  one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
34
+ history_num = self.args.max_query_tokens // one_history_token
35
  traj += self.env_history.get_histories_with_last(history_num)
36
  self._update_mem(traj)
37
 
 
109
  if len(self.env_history) > 1:
110
  if not suffix_flag:
111
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
112
+ human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
113
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
114
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
115
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
 
150
  action_description=action_description,
151
  format_instructions=self.parser.get_format_instructions(),
152
  reply_format_description=reply_format_description,
153
+ max_token = self.max_tokens
154
  )
155
 
156
  total_tokens += cb.total_tokens
deciders/self_consistency.py CHANGED
@@ -64,7 +64,7 @@ class SelfConsistency(NaiveAct):
64
 
65
  if self.fewshot_example:
66
  human_template += "I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below."
67
- # print(fewshot_example_prompt.format(**fewshot_examples[0]))
68
  human_template += '\nTask Description: {game_description} \n'
69
  human_template += 'Goal Description: {goal_description}\n'
70
  human_template += 'Actions Description: {action_description}\n'
@@ -91,7 +91,7 @@ class SelfConsistency(NaiveAct):
91
  if len(self.env_history) > 1:
92
  if not suffix_flag:
93
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
94
- human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
95
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
96
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
97
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
 
64
 
65
  if self.fewshot_example:
66
  human_template += "I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below."
67
+
68
  human_template += '\nTask Description: {game_description} \n'
69
  human_template += 'Goal Description: {goal_description}\n'
70
  human_template += 'Actions Description: {action_description}\n'
 
91
  if len(self.env_history) > 1:
92
  if not suffix_flag:
93
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
94
+ human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
95
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
96
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
97
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
deciders/selfask.py CHANGED
@@ -88,7 +88,7 @@ class SelfAskAct(NaiveAct):
88
  if len(self.env_history) > 1:
89
  if not suffix_flag:
90
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
91
- human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
92
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
93
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
94
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
 
88
  if len(self.env_history) > 1:
89
  if not suffix_flag:
90
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
91
+ human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
92
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
93
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
94
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
deciders/spp.py CHANGED
@@ -81,7 +81,7 @@ class SPP(NaiveAct):
81
  if len(self.env_history) > 1:
82
  if not suffix_flag:
83
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
84
- human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
85
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
86
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
87
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
 
81
  if len(self.env_history) > 1:
82
  if not suffix_flag:
83
  human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
84
+ human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
85
  human_template += f"{self.env_history.get_histories(self.mem_num)}"
86
  human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
87
  human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
deciders/utils.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import sys
3
  import openai
4
- from openai import OpenAI
5
  from tenacity import (
6
  retry,
7
  stop_after_attempt, # type: ignore
@@ -27,10 +26,8 @@ def run_chain(chain, *args, **kwargs):
27
 
28
  @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
29
  def get_completion(prompt: str, engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None) -> str:
30
-
31
- client = OpenAI(api_key=openai.api_key)
32
- response = client.chat.completions.create(
33
- model=engine,
34
  prompt=prompt,
35
  temperature=temperature,
36
  max_tokens=max_tokens,
@@ -42,7 +39,7 @@ def get_completion(prompt: str, engine: str = "gpt-35-turbo", temperature: float
42
  )
43
  return response.choices[0].text
44
 
45
- # @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
46
  def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None, is_batched: bool = False) -> str:
47
  assert model != "text-davinci-003"
48
  messages = [
@@ -51,15 +48,13 @@ def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-tur
51
  "content": prompt
52
  }
53
  ]
54
- # import pdb;pdb.set_trace()
55
- client = OpenAI(api_key=openai.api_key)
56
-
57
- response = client.chat.completions.create(
58
- model=model,
59
- messages=messages,
60
- max_tokens=max_tokens,
61
- stop=stop_strs,
62
- temperature=temperature,
63
- # request_timeout = 1
64
  )
65
- return response.choices[0].message.content
 
1
  import os
2
  import sys
3
  import openai
 
4
  from tenacity import (
5
  retry,
6
  stop_after_attempt, # type: ignore
 
26
 
27
  @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
28
  def get_completion(prompt: str, engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None) -> str:
29
+ response = openai.Completion.create(
30
+ engine=engine,
 
 
31
  prompt=prompt,
32
  temperature=temperature,
33
  max_tokens=max_tokens,
 
39
  )
40
  return response.choices[0].text
41
 
42
+ @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
43
  def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None, is_batched: bool = False) -> str:
44
  assert model != "text-davinci-003"
45
  messages = [
 
48
  "content": prompt
49
  }
50
  ]
51
+ response = openai.ChatCompletion.create(
52
+ model=model,
53
+ engine=engine,
54
+ messages=messages,
55
+ max_tokens=max_tokens,
56
+ stop=stop_strs,
57
+ temperature=temperature,
58
+ # request_timeout = 1
 
 
59
  )
60
+ return response.choices[0]["message"]["content"]
distillers/guider.py CHANGED
@@ -9,8 +9,6 @@ class Guidance_Generator():
9
  self.args = args
10
  with open("./distillers/guidance_summary_few_shot_examples.txt", 'r') as f:
11
  self.SUMMARY_FEW_SHOT_EXAMPLES = f.read()
12
- # with open("./distillers/exploration_few_shot_examples.txt", 'r') as f:
13
- # self.SUGGEST_FEW_SHOT_EXAMPLES = f.read()
14
  self.insight = ""
15
  self.suggestion = ""
16
  if logfile:
@@ -22,11 +20,17 @@ class Guidance_Generator():
22
  with open(file_path, 'r') as infile:
23
  data = json.load(infile)
24
  for traj in data:
25
- traj_text = traj[0]['game_description']
26
- traj_text += traj[0]['goal_description']
27
  for transition in traj[-max_step_num:]:
28
- traj_text += transition['observation']
29
- traj_text += f"Action: {transition['action']}"
 
 
 
 
 
 
30
  summary = self.generate_summary(traj_text, mem)
31
  mem.append(summary)
32
  return mem
@@ -44,11 +48,6 @@ class Guidance_Generator():
44
  """
45
  segments = []
46
 
47
- # Summarization memory
48
- # if post_memory:
49
- # segments.append('Your summarization memory is as below:')
50
- # segments.extend([f'Episode #{i}: {m}' for i, m in enumerate(post_memory)])
51
-
52
  # Trajectory
53
  segments.append(f"Your new collected trajectory is as below:\n {traj}")
54
  segments.append(f"The suggestion to guide the trajectory is:\n{self.suggestion}")
@@ -65,19 +64,6 @@ class Guidance_Generator():
65
  query = '\n'.join(segments)
66
  return query
67
 
68
- # def _generate_summary_query(self, traj, post_memory):
69
- # """Allows the Agent to generate exploration guidance."""
70
- # query = ""
71
- # if len(post_memory) > 0:
72
- # query += '\Your summarization memory is as below:\n'
73
- # for i, m in enumerate(post_memory):
74
- # query += f'Episode #{i}: {m}\n'
75
- # query += f"""
76
- # {traj}
77
- # Above is the trajectory of the new experience.
78
- # """
79
- # query += '\n Anwser the following questions.\n 1. What is the performance of this policy and does it improve the performance compared to before? 2. Summarize the main reason that makes the policy improve or reduce the performance; 3. What new information of the task can be inferred compared to the memory?'
80
- # return query
81
 
82
  def generate_summary(self, traj, post_memory):
83
  query = self._generate_summary_query(traj, post_memory)
@@ -93,6 +79,7 @@ class Guidance_Generator():
93
  query += f'Episode #{i}: {m}\n'
94
  query += '\n Identify and summarize the key information that can be exploited to improve performance of the player.'
95
  insight = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
 
96
  logger.info(f'[Reflexion Memory]The insight prompt is: {query}.')
97
  logger.info(f'[Reflexion Memory]The insight response is: {insight}.')
98
  return insight
@@ -105,26 +92,8 @@ class Guidance_Generator():
105
  query += f"""You have obtained experience as below """
106
  for i, m in enumerate(post_memory):
107
  query += f'Episode #{i}: {m}\n'
108
- # if max_num_trials - len(post_memory) == 1:
109
- # query = (f"\n The main goal is to aid the human player in winning the game in the next episode. "
110
- # f"This is his {len(post_memory) + 1} try out of {max(max_num_trials, 1)} episodes. "
111
- # "Your suggestions should be simple, executable with heuristic policy, and suitable for an LLM agent. "
112
- # "Reply in an item list format. Specifically, focus on:"
113
- # "\n1. How to achieve optimal performance (exploitation) using the obtained knowledge?"
114
- # "\nNote: Stress the importance of prioritizing performance without exploration.")
115
- # suggestion = get_chat(query) + "\n Remember, in this attempt, aim solely for high performance without exploration."
116
- # else:
117
- # if max_num_trials-len(post_memory) == 1:
118
- # query += f"\n The main aim for you is to help the human player win the game in the last episode. The next episode is the last episode. You can give suggestions before each episode. Then what is your suggestion for his next episode? Note that this is the last try and he should not explore which may decrease his performance. The suggestions should be simple to follow, executable with heuristic policy, easy to use for an llm agent,and reply in item list format. The answer should instruct him to exploit all the knowlegde to gain the highest performance (exploitation) in the next episode. "
119
- # else:
120
  query += f"\n The main aim for you is to help the human player win the game in the last episode. He has only {max(max_num_trials-len(post_memory), 1)} episodes left to try.You can give suggestions before each episode. Then what is your suggestion for his next episode? Please provide simple, concise answers suitable for a six-year-old child, focusing on the following in item list format: 1. What game-relevant knowledge is critical to determine the optimal policy. Notice that the knowledge should be obtainable by interacting with the environment and helpful for the decisions.\n 2. How should the player conduct exploration in the next episode to acquire this information?\n3. How can the player exploit the information obtained to achieve higher performance in subsequent episodes?\n 4. How should exploration and exploitation be balanced to improve performance in the next episode?\n"
121
- # query += (f"\n The primary goal is to assist the human player in winning the game in the final episode. "
122
- # f"This is his {len(post_memory) + 1} try out of {max(max_num_trials, 1)} episodes. "
123
- # "Provide suggestions for the next episode that balance both exploration and exploitation. "
124
- # "The suggestions should be in item list format, easy to follow, aligned with heuristic policy, and usable for an LLM agent. Address:"
125
- # "\n1. Which information the player should gather via exploration and the best ways to explore?"
126
- # "\n2. Strategies to refine the policy for enhanced performance (exploitation)?"
127
- # "\n3. How should exploration and exploitation be weighted in the next episode?")
128
 
129
  # TODO: consider the inconsistency between past suggestion and past memory.
130
  suggestion = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
 
9
  self.args = args
10
  with open("./distillers/guidance_summary_few_shot_examples.txt", 'r') as f:
11
  self.SUMMARY_FEW_SHOT_EXAMPLES = f.read()
 
 
12
  self.insight = ""
13
  self.suggestion = ""
14
  if logfile:
 
20
  with open(file_path, 'r') as infile:
21
  data = json.load(infile)
22
  for traj in data:
23
+ traj_text = traj[0]['game_description']+'\n'
24
+ traj_text += traj[0]['goal_description']+'\n'
25
  for transition in traj[-max_step_num:]:
26
+ traj_text += transition['observation']+'\n'
27
+ if type(eval(transition['action'])) == type([]):
28
+ action = float(eval(transition['action'])[0])-1
29
+ else:
30
+ action = transition['action']
31
+ traj_text += f"Action: {action}\n"
32
+ traj_text += f"Reward: {transition['reward']}\n"
33
+ traj_text += f"Your performance is: {transition['cum_reward']}\n"
34
  summary = self.generate_summary(traj_text, mem)
35
  mem.append(summary)
36
  return mem
 
48
  """
49
  segments = []
50
 
 
 
 
 
 
51
  # Trajectory
52
  segments.append(f"Your new collected trajectory is as below:\n {traj}")
53
  segments.append(f"The suggestion to guide the trajectory is:\n{self.suggestion}")
 
64
  query = '\n'.join(segments)
65
  return query
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def generate_summary(self, traj, post_memory):
69
  query = self._generate_summary_query(traj, post_memory)
 
79
  query += f'Episode #{i}: {m}\n'
80
  query += '\n Identify and summarize the key information that can be exploited to improve performance of the player.'
81
  insight = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
82
+ # import pdb;pdb.set_trace()
83
  logger.info(f'[Reflexion Memory]The insight prompt is: {query}.')
84
  logger.info(f'[Reflexion Memory]The insight response is: {insight}.')
85
  return insight
 
92
  query += f"""You have obtained experience as below """
93
  for i, m in enumerate(post_memory):
94
  query += f'Episode #{i}: {m}\n'
95
+
 
 
 
 
 
 
 
 
 
 
 
96
  query += f"\n The main aim for you is to help the human player win the game in the last episode. He has only {max(max_num_trials-len(post_memory), 1)} episodes left to try.You can give suggestions before each episode. Then what is your suggestion for his next episode? Please provide simple, concise answers suitable for a six-year-old child, focusing on the following in item list format: 1. What game-relevant knowledge is critical to determine the optimal policy. Notice that the knowledge should be obtainable by interacting with the environment and helpful for the decisions.\n 2. How should the player conduct exploration in the next episode to acquire this information?\n3. How can the player exploit the information obtained to achieve higher performance in subsequent episodes?\n 4. How should exploration and exploitation be balanced to improve performance in the next episode?\n"
 
 
 
 
 
 
 
97
 
98
  # TODO: consider the inconsistency between past suggestion and past memory.
99
  suggestion = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
distillers/self_reflection.py CHANGED
@@ -18,11 +18,17 @@ class RefletionGenerator():
18
  with open(file_path, 'r') as infile:
19
  data = json.load(infile)
20
  for traj in data:
21
- traj_text = traj[0]['game_description']
22
- traj_text += traj[0]['goal_description']
23
  for transition in traj[-max_step_num:]:
24
- traj_text += transition['observation']
25
- traj_text += f"Action: {transition['action']}"
 
 
 
 
 
 
26
  reflection = self.generate(traj_text, mem, max_len_mem=5)
27
  mem.append(reflection)
28
  return mem
 
18
  with open(file_path, 'r') as infile:
19
  data = json.load(infile)
20
  for traj in data:
21
+ traj_text = traj[0]['game_description']+'\n'
22
+ traj_text += traj[0]['goal_description']+'\n'
23
  for transition in traj[-max_step_num:]:
24
+ traj_text += transition['observation']+'\n'
25
+ if type(eval(transition['action'])) == type([]):
26
+ action = float(eval(transition['action'])[0])-1
27
+ else:
28
+ action = transition['action']
29
+ traj_text += f"Action: {action}\n"
30
+ traj_text += f"Reward: {transition['reward']}\n"
31
+ traj_text += f"Your performance is: {transition['cum_reward']}\n"
32
  reflection = self.generate(traj_text, mem, max_len_mem=5)
33
  mem.append(reflection)
34
  return mem
distillers/traj_prompt_summarizer.py CHANGED
@@ -1,23 +1,35 @@
1
  import random
2
  from deciders.utils import get_completion
3
  import json
 
 
 
4
  class TrajPromptSummarizer():
5
- def __init__(self,args=None):
6
  self.args = args
7
  with open("./distillers/traj_summary_few_shot_examples.txt", 'r') as f:
8
  self.FEW_SHOT_EXAMPLES = f.read()
 
 
 
 
9
 
10
  def generate_from_file(self, file_path,max_step_num=200):
11
  mem = []
12
  with open(file_path, 'r') as infile:
13
  data = json.load(infile)
14
  for traj in data:
15
- traj_text = traj[0]['game_description']
16
- traj_text += traj[0]['goal_description']
17
  for transition in traj[-max_step_num:]:
18
- traj_text += transition['observation']
19
- traj_text += f"> {transition['action']}"
20
- traj_text += f"Your performance is: {transition['cum_reward']}"
 
 
 
 
 
21
  reflection = self.generate(traj_text, mem, max_len_mem=5)
22
  mem.append(reflection)
23
  return mem
@@ -43,4 +55,6 @@ class TrajPromptSummarizer():
43
  else:
44
  reflection_query = self._generate_summary_query(traj, memory)
45
  reflection = get_completion(reflection_query, engine=self.args.gpt_version)
 
 
46
  return reflection
 
1
  import random
2
  from deciders.utils import get_completion
3
  import json
4
+ from loguru import logger
5
+
6
+
7
  class TrajPromptSummarizer():
8
+ def __init__(self,args=None,logfile=None):
9
  self.args = args
10
  with open("./distillers/traj_summary_few_shot_examples.txt", 'r') as f:
11
  self.FEW_SHOT_EXAMPLES = f.read()
12
+
13
+ if logfile:
14
+ # logger.remove()
15
+ logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' in x['message'])
16
 
17
  def generate_from_file(self, file_path,max_step_num=200):
18
  mem = []
19
  with open(file_path, 'r') as infile:
20
  data = json.load(infile)
21
  for traj in data:
22
+ traj_text = traj[0]['game_description']+'\n'
23
+ traj_text += traj[0]['goal_description']+'\n'
24
  for transition in traj[-max_step_num:]:
25
+ traj_text += transition['observation']+'\n'
26
+ if type(eval(transition['action'])) == type([]):
27
+ action = float(eval(transition['action'])[0])-1
28
+ else:
29
+ action = transition['action']
30
+ traj_text += f"Action: {action}\n"
31
+ traj_text += f"Reward: {transition['reward']}\n"
32
+ traj_text += f"Your performance is: {transition['cum_reward']}\n"
33
  reflection = self.generate(traj_text, mem, max_len_mem=5)
34
  mem.append(reflection)
35
  return mem
 
55
  else:
56
  reflection_query = self._generate_summary_query(traj, memory)
57
  reflection = get_completion(reflection_query, engine=self.args.gpt_version)
58
+ logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
59
+ logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
60
  return reflection
draw_overall_performance.py DELETED
@@ -1,59 +0,0 @@
1
- import pandas as pd
2
- import matplotlib.pyplot as plt
3
-
4
- # Load the CSV data
5
- data = pd.read_csv("performance_data.csv")
6
-
7
- # Group games by type
8
- game_types = {
9
- "Classic Control": ["Acrobot-v1", "CartPole-v0", "MountainCar-v0"],
10
- "Box 2D": ["LunarLander-v2"],
11
- "Toy Text": ["Taxi-v3", "CliffWalking-v0", "Blackjack-v1"]
12
- }
13
-
14
- for game_type, games in game_types.items():
15
- fig, axs = plt.subplots(1, len(games), figsize=(12 * len(games), 6))
16
- fig.suptitle(f"Performance Plot: {game_type}", fontsize=28, fontname="Times New Roman")
17
-
18
- if len(games) == 1:
19
- axs = [axs]
20
-
21
- handles, labels = [], []
22
-
23
- for idx, game in enumerate(games):
24
- # Filter data to get information for the current game (in the loop)
25
- game_data = data[data["game"] == game]
26
-
27
- axs[idx].set_title(game, fontsize=20, fontname="Times New Roman")
28
- axs[idx].set_xlabel("Levels", fontsize=16, fontname="Times New Roman")
29
- if idx == 0:
30
- axs[idx].set_ylabel("Scores", fontsize=16, fontname="Times New Roman")
31
-
32
- for index, row in game_data.iterrows():
33
- decider_name = row["decider_name"]
34
- levels = ["l1", "l2", "l3", "l4", "l5"]
35
- scores = row[levels].values.tolist()
36
- lines = axs[idx].plot(levels, scores, "-o", label=decider_name)
37
- # Grab the handle and label for creating a global legend
38
- handles.append(lines[0])
39
- labels.append(decider_name)
40
-
41
- # Eliminate duplicate labels and handles
42
- unique_labels = []
43
- unique_handles = []
44
- for handle, label in zip(handles, labels):
45
- if label not in unique_labels:
46
- unique_labels.append(label)
47
- unique_handles.append(handle)
48
-
49
- # Add a legend at the bottom middle of the figure
50
- fig.legend(
51
- unique_handles,
52
- unique_labels,
53
- loc="lower center",
54
- ncol=4, prop={'size': 18}
55
- )
56
-
57
- # Adjust layout to accommodate the legend and prevent cropping
58
-
59
- plt.savefig("./vis/" + game_type + ".png", dpi=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
environment.yml → environment.yaml RENAMED
@@ -1,26 +1,21 @@
1
  name: llm-gym
2
- channels:
3
- - conda-forge
4
- - defaults
5
  dependencies:
6
  - _libgcc_mutex=0.1=main
7
  - _openmp_mutex=5.1=1_gnu
8
- - aiosignal=1.2.0=pyhd3eb1b0_0
9
  - asttokens=2.0.5=pyhd3eb1b0_0
10
  - async-timeout=4.0.2=py38h06a4308_0
11
- - attrs=22.1.0=py38h06a4308_0
12
  - backcall=0.2.0=pyhd3eb1b0_0
13
  - blas=1.0=mkl
14
  - brotlipy=0.7.0=py38h27cfd23_1003
15
- - ca-certificates=2023.08.22=h06a4308_0
16
  - cached-property=1.5.2=py_0
17
- - certifi=2023.7.22=py38h06a4308_0
18
  - cffi=1.15.1=py38h5eee18b_3
19
  - chardet=4.0.0=py38h06a4308_1003
20
  - comm=0.1.2=py38h06a4308_0
21
  - cryptography=39.0.1=py38h9ce1e76_2
22
  - cudatoolkit=11.3.1=h2bc3f7f_2
23
  - debugpy=1.5.1=py38h295c915_0
 
24
  - executing=0.8.3=pyhd3eb1b0_0
25
  - frozenlist=1.3.3=py38h5eee18b_0
26
  - hdf5=1.10.6=h3ffc7dd_1
@@ -42,7 +37,6 @@ dependencies:
42
  - libprotobuf=3.20.3=he621ea3_0
43
  - libsodium=1.0.18=h7b6447c_0
44
  - libstdcxx-ng=11.2.0=h1234567_1
45
- - loguru=0.7.1=py38h578d9bd_0
46
  - matplotlib-inline=0.1.6=py38h06a4308_0
47
  - mkl=2023.1.0=h6d00ec8_46342
48
  - mkl-service=2.4.0=py38h5eee18b_1
@@ -50,17 +44,16 @@ dependencies:
50
  - mkl_random=1.2.2=py38h417a72b_1
51
  - ncurses=6.4=h6a678d5_0
52
  - nest-asyncio=1.5.6=py38h06a4308_0
 
53
  - numpy-base=1.24.3=py38h060ed82_1
54
  - openssl=3.0.10=h7f8727e_2
55
- - packaging=23.0=py38h06a4308_0
56
  - parso=0.8.3=pyhd3eb1b0_0
57
  - pcre=8.45=h295c915_0
58
  - pexpect=4.8.0=pyhd3eb1b0_3
59
  - pickleshare=0.7.5=pyhd3eb1b0_1003
60
- - pip=23.2.1=py38h06a4308_0
61
  - platformdirs=2.5.2=py38h06a4308_0
62
  - prompt-toolkit=3.0.36=py38h06a4308_0
63
- - psutil=5.9.0=py38h5eee18b_0
64
  - ptyprocess=0.7.0=pyhd3eb1b0_2
65
  - pure_eval=0.2.2=pyhd3eb1b0_0
66
  - pycparser=2.21=pyhd3eb1b0_0
@@ -70,7 +63,6 @@ dependencies:
70
  - python=3.8.16=h955ad1f_4
71
  - python-dateutil=2.8.2=pyhd3eb1b0_0
72
  - python_abi=3.8=2_cp38
73
- - pyyaml=6.0=py38h0a891b7_4
74
  - pyzmq=25.1.0=py38h6a678d5_0
75
  - readline=8.2=h5eee18b_0
76
  - setuptools=67.8.0=py38h06a4308_0
@@ -81,7 +73,6 @@ dependencies:
81
  - tk=8.6.12=h1ccaba5_0
82
  - tornado=6.2=py38h5eee18b_0
83
  - traitlets=5.7.1=py38h06a4308_0
84
- - typing_extensions=4.6.3=py38h06a4308_0
85
  - wcwidth=0.2.5=pyhd3eb1b0_0
86
  - wheel=0.38.4=py38h06a4308_0
87
  - xz=5.4.2=h5eee18b_0
@@ -89,105 +80,95 @@ dependencies:
89
  - zeromq=4.3.4=h2531618_0
90
  - zlib=1.2.13=h5eee18b_0
91
  - pip:
92
- - absl-py==1.4.0
93
- - aiohttp==3.8.4
94
- - ale-py==0.8.1
95
- - annotated-types==0.5.0
96
- - appdirs==1.4.4
97
- - beautifulsoup4==4.12.2
98
- - box2d-py==2.3.5
99
- - cachetools==5.3.1
100
- - cchardet==2.1.7
101
- - charset-normalizer==3.1.0
102
- - click==8.1.3
103
- - cloudpickle==2.2.1
104
- - contourpy==1.1.0
105
- - cycler==0.11.0
106
- - cython==3.0.1
107
- - dataclasses-json==0.5.14
108
- - decorator==4.4.2
109
- - docker-pycreds==0.4.0
110
- - fasteners==0.18
111
- - filelock==3.12.2
112
- - fonttools==4.40.0
113
- - fsspec==2023.6.0
114
- - gitdb==4.0.10
115
- - gitpython==3.1.31
116
- - glfw==2.6.2
117
- - google-auth==2.21.0
118
- - google-auth-oauthlib==1.0.0
119
- - greenlet==2.0.2
120
- - grpcio==1.56.0
121
- - gym==0.26.2
122
- - gym-notices==0.0.8
123
- - h5py==3.9.0
124
- - huggingface-hub==0.15.1
125
- - imageio==2.31.2
126
- - imageio-ffmpeg==0.4.8
127
- - importlib-metadata==6.6.0
128
- - importlib-resources==5.12.0
129
- - iniconfig==2.0.0
130
- - kiwisolver==1.4.4
131
- - langchain==0.0.284
132
- - langsmith==0.0.33
133
- - llvmlite==0.40.1
134
- - lz4==4.3.2
135
- - markdown==3.4.3
136
- - markupsafe==2.1.1
137
- - marshmallow==3.20.1
138
- - matplotlib==3.7.1
139
- - moviepy==1.0.3
140
- - mujoco==2.2.0
141
- - mujoco-py==2.1.2.14
142
- - multidict==6.0.4
143
- - numba==0.57.1
144
- - numexpr==2.8.5
145
- - numpy==1.24.4
146
- - oauthlib==3.2.2
147
- - openai==0.27.8
148
- - opencv-python==4.8.0.76
149
- - pathtools==0.1.2
150
- - pillow==9.5.0
151
- - pluggy==1.2.0
152
- - proglog==0.1.10
153
- - protobuf==3.19.6
154
- - py==1.11.0
155
- - pyasn1==0.5.0
156
- - pyasn1-modules==0.3.0
157
- - pydantic==2.3.0
158
- - pydantic-core==2.6.3
159
- - pygame==2.1.0
160
- - pyopengl==3.1.7
161
- - pyparsing==3.0.9
162
- - pytest==7.0.1
163
- - regex==2023.6.3
164
- - requests==2.31.0
165
- - requests-oauthlib==1.3.1
166
- - rsa==4.9
167
- - safetensors==0.3.1
168
- - sentry-sdk==1.26.0
169
- - setproctitle==1.3.2
170
- - smmap==5.0.0
171
- - soupsieve==2.4.1
172
- - sqlalchemy==2.0.20
173
- - swig==4.1.1
174
- - tenacity==8.2.3
175
- - tensorboard==2.14.0
176
- - tensorboard-data-server==0.7.1
177
- - tianshou==0.4.10
178
- - tokenizers==0.13.3
179
- # - torch==1.12.0+cu113
180
- # - torchaudio==0.12.0+cu113
181
- # - torchvision==0.13.0+cu113
182
- - tqdm==4.65.0
183
- - transformers==4.30.2
184
- - typing==3.7.4.3
185
- - typing-extensions==4.7.1
186
- - typing-inspect==0.9.0
187
- - urllib3
188
- - v==1
189
- - wandb==0.15.4
190
- - werkzeug==2.3.6
191
- - yarl==1.9.2
192
- - zipp==3.15.0
193
- - aquarel==0.0.5
 
1
  name: llm-gym
 
 
 
2
  dependencies:
3
  - _libgcc_mutex=0.1=main
4
  - _openmp_mutex=5.1=1_gnu
 
5
  - asttokens=2.0.5=pyhd3eb1b0_0
6
  - async-timeout=4.0.2=py38h06a4308_0
 
7
  - backcall=0.2.0=pyhd3eb1b0_0
8
  - blas=1.0=mkl
9
  - brotlipy=0.7.0=py38h27cfd23_1003
10
+ - ca-certificates=2023.01.10=h06a4308_0
11
  - cached-property=1.5.2=py_0
 
12
  - cffi=1.15.1=py38h5eee18b_3
13
  - chardet=4.0.0=py38h06a4308_1003
14
  - comm=0.1.2=py38h06a4308_0
15
  - cryptography=39.0.1=py38h9ce1e76_2
16
  - cudatoolkit=11.3.1=h2bc3f7f_2
17
  - debugpy=1.5.1=py38h295c915_0
18
+ - decorator=5.1.1=pyhd8ed1ab_0
19
  - executing=0.8.3=pyhd3eb1b0_0
20
  - frozenlist=1.3.3=py38h5eee18b_0
21
  - hdf5=1.10.6=h3ffc7dd_1
 
37
  - libprotobuf=3.20.3=he621ea3_0
38
  - libsodium=1.0.18=h7b6447c_0
39
  - libstdcxx-ng=11.2.0=h1234567_1
 
40
  - matplotlib-inline=0.1.6=py38h06a4308_0
41
  - mkl=2023.1.0=h6d00ec8_46342
42
  - mkl-service=2.4.0=py38h5eee18b_1
 
44
  - mkl_random=1.2.2=py38h417a72b_1
45
  - ncurses=6.4=h6a678d5_0
46
  - nest-asyncio=1.5.6=py38h06a4308_0
47
+ - numpy=1.24.3=py38hf6e8229_1
48
  - numpy-base=1.24.3=py38h060ed82_1
49
  - openssl=3.0.10=h7f8727e_2
 
50
  - parso=0.8.3=pyhd3eb1b0_0
51
  - pcre=8.45=h295c915_0
52
  - pexpect=4.8.0=pyhd3eb1b0_3
53
  - pickleshare=0.7.5=pyhd3eb1b0_1003
54
+ - pip=23.1.2=py38h06a4308_0
55
  - platformdirs=2.5.2=py38h06a4308_0
56
  - prompt-toolkit=3.0.36=py38h06a4308_0
 
57
  - ptyprocess=0.7.0=pyhd3eb1b0_2
58
  - pure_eval=0.2.2=pyhd3eb1b0_0
59
  - pycparser=2.21=pyhd3eb1b0_0
 
63
  - python=3.8.16=h955ad1f_4
64
  - python-dateutil=2.8.2=pyhd3eb1b0_0
65
  - python_abi=3.8=2_cp38
 
66
  - pyzmq=25.1.0=py38h6a678d5_0
67
  - readline=8.2=h5eee18b_0
68
  - setuptools=67.8.0=py38h06a4308_0
 
73
  - tk=8.6.12=h1ccaba5_0
74
  - tornado=6.2=py38h5eee18b_0
75
  - traitlets=5.7.1=py38h06a4308_0
 
76
  - wcwidth=0.2.5=pyhd3eb1b0_0
77
  - wheel=0.38.4=py38h06a4308_0
78
  - xz=5.4.2=h5eee18b_0
 
80
  - zeromq=4.3.4=h2531618_0
81
  - zlib=1.2.13=h5eee18b_0
82
  - pip:
83
+ - absl-py==1.4.0
84
+ - aiohttp==3.8.4
85
+ - aiosignal==1.3.1
86
+ - annotated-types==0.5.0
87
+ - appdirs==1.4.4
88
+ - aquarel==0.0.5
89
+ - attrs==23.1.0
90
+ - box2d-py==2.3.5
91
+ - cachetools==5.3.1
92
+ - certifi==2023.5.7
93
+ - charset-normalizer==3.1.0
94
+ - click==8.1.6
95
+ - cloudpickle==2.2.1
96
+ - colorama==0.4.6
97
+ - contourpy==1.1.0
98
+ - cycler==0.11.0
99
+ - dataclasses-json==0.5.14
100
+ - docker-pycreds==0.4.0
101
+ - filelock==3.12.3
102
+ - fonttools==4.40.0
103
+ - fsspec==2023.6.0
104
+ - gitdb==4.0.10
105
+ - gitpython==3.1.32
106
+ - google-auth==2.22.0
107
+ - google-auth-oauthlib==1.0.0
108
+ - greenlet==2.0.2
109
+ - grpcio==1.57.0
110
+ - gym==0.26.2
111
+ - gym-notices==0.0.8
112
+ - h5py==3.9.0
113
+ - huggingface-hub==0.16.4
114
+ - importlib-metadata==6.6.0
115
+ - importlib-resources==5.12.0
116
+ - kiwisolver==1.4.4
117
+ - langchain==0.0.270
118
+ - langsmith==0.0.25
119
+ - llvmlite==0.40.1
120
+ - logger==1.4
121
+ - loguru==0.7.0
122
+ - markdown==3.4.4
123
+ - markupsafe==2.1.3
124
+ - marshmallow==3.20.1
125
+ - matplotlib==3.7.1
126
+ - multidict==6.0.4
127
+ - mypy-extensions==1.0.0
128
+ - numba==0.57.1
129
+ - numexpr==2.8.5
130
+ - oauthlib==3.2.2
131
+ - openai==0.27.8
132
+ - packaging==23.1
133
+ - pandas==2.0.3
134
+ - pathtools==0.1.2
135
+ - pillow==9.5.0
136
+ - protobuf==3.19.6
137
+ - psutil==5.9.5
138
+ - pyasn1==0.5.0
139
+ - pyasn1-modules==0.3.0
140
+ - pydantic==1.10.11
141
+ - pydantic-core==2.6.1
142
+ - pygame==2.1.0
143
+ - pyparsing==3.0.9
144
+ - pytz==2023.3.post1
145
+ - pyyaml==6.0.1
146
+ - regex==2023.8.8
147
+ - requests==2.31.0
148
+ - requests-oauthlib==1.3.1
149
+ - rsa==4.9
150
+ - safetensors==0.3.3
151
+ - seaborn==0.13.0
152
+ - sentry-sdk==1.28.1
153
+ - setproctitle==1.3.2
154
+ - smmap==5.0.0
155
+ - sqlalchemy==2.0.20
156
+ - swig==4.1.1
157
+ - tenacity==8.2.3
158
+ - tensorboard==2.14.0
159
+ - tensorboard-data-server==0.7.1
160
+ - tiktoken==0.4.0
161
+ - timeout-decorator==0.5.0
162
+ - tokenizers==0.13.3
163
+ - tqdm==4.65.0
164
+ - transformers==4.30.2
165
+ - typing-extensions==4.7.1
166
+ - typing-inspect==0.9.0
167
+ - tzdata==2023.3
168
+ - urllib3==1.26.16
169
+ - v==1
170
+ - wandb==0.15.5
171
+ - werkzeug==2.3.7
172
+ - win32-setctime==1.1.0
173
+ - yarl==1.9.2
174
+ - zipp==3.15.0
 
 
 
 
 
 
 
 
 
 
envs/classic_control/acrobot_policies.py CHANGED
@@ -6,7 +6,7 @@ import numpy as np
6
  def dedicated_1_policy(state, pre_action=1):
7
  def get_description():
8
  return "Always select action 1"
9
- dedicated_0_policy.description = get_description()
10
  return 1
11
 
12
  def dedicated_2_policy(state, pre_action=1):
 
6
  def dedicated_1_policy(state, pre_action=1):
7
  def get_description():
8
  return "Always select action 1"
9
+ dedicated_1_policy.description = get_description()
10
  return 1
11
 
12
  def dedicated_2_policy(state, pre_action=1):
envs/toy_text/few_shot_examples/cliffwalking_l2.json CHANGED
The diff for this file is too large to render. See raw diff
 
envs/toy_text/few_shot_examples/cliffwalking_l4.json CHANGED
The diff for this file is too large to render. See raw diff
 
gen_examples.sh DELETED
@@ -1,55 +0,0 @@
1
- # # (Wenhao Li, 2023-09-06, 09:20)
2
- # # Important !!!
3
- # # For environment that truncate at 200 steps automatically, you could set the max_episode_len to greater than 200.
4
- # # Otherwise, you need to set the max_episode_len to 200 manually (for fair comparison).
5
-
6
- # # L2
7
- # ## Cartpole env
8
- # python gen_few_shots_examples.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
9
-
10
- # ## Acrobot-v1 env
11
- # # Note that we want to use the Acrobot-v0 but it is deprecated in gym 0.26.2.
12
- # # So we use Acrobot-v1 instead and set the max_episode_len to 200.
13
- # python gen_few_shots_examples.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider random_actor --max_episode_len 200 --n_episodes 5
14
-
15
- # ## MountainCar-v0 env
16
- # python gen_few_shots_examples.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
17
-
18
- # ## LunarLander-v2 env
19
- # python gen_few_shots_examples.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
20
-
21
- # # Blacjack-v1 env
22
- # # (Wenhao Li, 2023-09-06, 10:00)
23
- # # random_actor is too weak, so we need to set the n_episodes to a larger number (100).
24
- # # the n_episodes should be set to a smaller number for other more powerful deciders.
25
-
26
- # # (Wenhao Li, 2023-09-07, 20:25)
27
- # # reset n_episodes to 2 (default value) for fair comparison.
28
- # python gen_few_shots_examples.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider random_actor --max_episode_len 200 --n_episodes 5
29
-
30
- # # Taxi-v3 env
31
- # python gen_few_shots_examples.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
32
-
33
- # # CliffWalking-v0 env
34
- # python gen_few_shots_examples.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider random_actor --max_episode_len 200 --n_episodes 5
35
-
36
- # # FrozenLake-v1 env
37
- # python gen_few_shots_examples.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
38
-
39
- # L4
40
- ## Cartpole env
41
- python gen_few_shots_examples.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider expert --policy_path RL_based/checkpoints/CartPole-v0/expert/policy.pth --max_episode_len 200 --n_episodes 5
42
-
43
- python gen_few_shots_examples.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider expert --policy_path RL_based/checkpoints/LunarLander-v2/expert/policy.pth --max_episode_len 200 --n_episodes 5
44
-
45
- python gen_few_shots_examples.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider expert --policy_path RL_based/checkpoints/Acrobot-v1/expert/policy.pth --max_episode_len 200 --n_episodes 5
46
-
47
- python gen_few_shots_examples.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider expert --policy_path RL_based/checkpoints/MountainCar-v0/expert/policy.pth --max_episode_len 200 --n_episodes 5
48
-
49
- python gen_few_shots_examples.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider expert --policy_path RL_based/checkpoints/Blackjack-v1/expert/policy.pth --max_episode_len 200 --n_episodes 5
50
-
51
- python gen_few_shots_examples.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider expert --policy_path RL_based/checkpoints/Taxi-v3/expert/policy.pth --max_episode_len 200 --n_episodes 5
52
-
53
- python gen_few_shots_examples.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider expert --policy_path RL_based/checkpoints/CliffWalking-v0/expert/policy.pth --max_episode_len 200 --n_episodes 5
54
-
55
- python gen_few_shots_examples.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider expert --policy_path RL_based/checkpoints/FrozenLake-v1/expert/policy.pth --max_episode_len 200 --n_episodes 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gen_few_shots_examples.py DELETED
@@ -1,269 +0,0 @@
1
- import argparse
2
- import envs
3
- import deciders
4
- from matplotlib import animation
5
- import matplotlib.pyplot as plt
6
- import os
7
- import numpy as np
8
- import torch as th
9
- from envs.translator import InitSummarizer, CurrSummarizer, FutureSummarizer, Translator
10
- from tianshou.data import Collector, VectorReplayBuffer, ReplayBuffer
11
- from tianshou.policy import PPOPolicy
12
- from RL_based.utils import (
13
- Net_GRU_Bert_tianshou,
14
- Net_Bert_CLS_tianshou,
15
- Net_Bert_CNN_tianshou,
16
- Net_GRU_nn_emb_tianshou,
17
- )
18
- from tianshou.utils.net.common import ActorCritic
19
- from tianshou.utils.net.discrete import Actor, Critic
20
- import gym
21
- import json
22
-
23
- ENV_CLASS = {'classic_control': ['CartPole', 'Acrobot', 'MountainCar'],
24
- 'box2d': ['LunarLander'],
25
- 'toy_text': ['Blackjack', 'Taxi', 'CliffWalking', 'FrozenLake']}
26
-
27
- def get_env_class(env_name):
28
- for key, value in ENV_CLASS.items():
29
- if env_name in value:
30
- return key
31
- return None
32
-
33
- def get_fewshot_example_path(env, decider):
34
- assert decider in ['random_actor', 'expert'], "decider must be random_actor or expert"
35
- prompt_level = 2 if decider == 'random_actor' else 4
36
- fewshot_example_path = os.path.join(
37
- 'envs', get_env_class(env.spec.name), 'few_shot_examples',
38
- ''.join([env.spec.name.lower(), '_l', str(prompt_level), '.json']))
39
- return fewshot_example_path
40
-
41
- # https://colab.research.google.com/drive/1DdWsGi10232orUv-reY4wsTmT0VMoHaX?usp=sharing#scrollTo=4OfVmDKk7XvG
42
- # LLMs bias on 0 so make the actions greater than 1 instead.
43
-
44
- def gen_expert_examples(environment, policy, file_path, max_episode_len=120, n_episodes=1):
45
- replaybuffer = ReplayBuffer(size=1000)
46
- test_collector_1 = Collector(policy, environment, replaybuffer)
47
- test_collector_1.reset_env()
48
- game_description = environment.get_game_description()
49
- goal_description = environment.get_goal_description()
50
- action_description = environment.get_action_description()
51
- policy.eval()
52
- data_lst = []
53
-
54
- for _ in range(n_episodes):
55
- test_collector_1.reset_buffer()
56
- result = test_collector_1.collect(n_episode=1)
57
- sample_result = replaybuffer.sample(0)[0]
58
- round = 0
59
- utility = 0
60
- data = []
61
- for transition in sample_result:
62
- round += 1
63
- if round > max_episode_len:
64
- break
65
- question = f"{transition.obs} \n {goal_description} \n {action_description} "
66
- reward = transition.rew
67
- utility += reward
68
-
69
- answer = f"The final answer is: {transition.act + 1}"
70
-
71
- data.append(
72
- {
73
- "observation": transition.obs,
74
- "goal_description": goal_description,
75
- "action_description": action_description,
76
- "game_description": game_description,
77
- "action": str(transition.act + 1),
78
- "question": question,
79
- "answer": answer,
80
- "reward": reward,
81
- "cum_reward": utility,
82
- }
83
- )
84
- print(f"Now it is round {round}")
85
- data_lst.append(data)
86
- # Return the final reward
87
- with open(file_path, "w") as outfile:
88
- json.dump(data_lst, outfile)
89
- return utility
90
-
91
-
92
- def gen_examples(environment, decider, file_path, max_episode_len=200, n_episodes=1):
93
- game_description = environment.get_game_description()
94
- goal_description = environment.get_goal_description()
95
- action_description = environment.get_action_description()
96
- frames = []
97
- utilities = []
98
- data_lst = []
99
-
100
- for _ in range(n_episodes):
101
- # Reset the environment
102
- round = 0
103
- state_description, env_info = environment.reset()
104
- utility = 0
105
- data = []
106
- for _ in range(max_episode_len):
107
- # Keep asking ChatGPT for an action until it provides a valid one
108
- asking_round = 0
109
- action, prompt, answer, _, _, _ = decider.act(
110
- state_description,
111
- action_description,
112
- env_info,
113
- game_description,
114
- goal_description,
115
- )
116
- # Perform the action in the environment
117
- state_description, reward, terminated, truncated, env_info = environment.step_llm(
118
- action
119
- )
120
- question = f"{state_description} \n {goal_description} \n {action_description} "
121
- utility += reward
122
- answer += f"The final answer is: {action}"
123
-
124
- data.append(
125
- {
126
- "observation": state_description,
127
- "goal_description": goal_description,
128
- "action_description": action_description,
129
- "game_description": game_description,
130
- "action": action,
131
- "question": question,
132
- "answer": answer,
133
- "reward": reward,
134
- "cum_reward": utility,
135
- }
136
- )
137
- print(f"Now it is round {round}")
138
- round += 1
139
- # If the game is over, break the loop
140
- if terminated or truncated:
141
- print(f"Terminated!")
142
- break
143
- utilities.append(utility)
144
- data_lst.append(data)
145
- # Return the final reward
146
- with open(file_path, "w") as outfile:
147
- json.dump(data_lst, outfile)
148
- return utility
149
-
150
-
151
- if __name__ == "__main__":
152
- parser = argparse.ArgumentParser(
153
- description="Generate few shots examples of a gym environment."
154
- )
155
- parser.add_argument(
156
- "--init_summarizer",
157
- type=str,
158
- required=True,
159
- help="The name of the init summarizer to use.",
160
- )
161
- parser.add_argument(
162
- "--curr_summarizer",
163
- type=str,
164
- required=True,
165
- help="The name of the curr summarizer to use.",
166
- )
167
- parser.add_argument(
168
- "--env",
169
- type=str,
170
- default="base_env",
171
- help="The name of the gym environment to use.",
172
- )
173
- parser.add_argument(
174
- "--decider",
175
- type=str,
176
- default="naive_actor",
177
- help="The actor used to select action",
178
- )
179
- parser.add_argument(
180
- "--env_name",
181
- type=str,
182
- default="CartPole-v0",
183
- help="The name of the gym environment to use.",
184
- )
185
- parser.add_argument(
186
- "--max_episode_len",
187
- type=int,
188
- default=200,
189
- help="The maximum number of steps in an episode.",
190
- )
191
- parser.add_argument(
192
- "--num_episodes",
193
- type=int,
194
- default=1,
195
- help="The number of episodes to collect data.",
196
- )
197
- parser.add_argument(
198
- "--max_length",
199
- type=int,
200
- default=128,
201
- help="The token length of the observation",
202
- )
203
- parser.add_argument(
204
- "--trans_model_name",
205
- type=str,
206
- default="/home/ubuntu/LLM-Decider-Bench/RL_based/transformer_offline_distilbert",
207
- help="The name of the pretrained transformer to use.",
208
- )
209
- parser.add_argument(
210
- "--policy_path",
211
- type=str,
212
- default=None,
213
- help="The path to the policy to be evaluated",
214
- )
215
- parser.add_argument(
216
- "--n_episodes",
217
- type=int,
218
- default=2,
219
- help="The number of episodes to collect data (for env where episode is too short).",
220
- )
221
-
222
- args = parser.parse_args()
223
- # Get the specified translator, environment, and ChatGPT model
224
- device = "cuda" if th.cuda.is_available() else "cpu"
225
- env_class = envs.REGISTRY[args.env]
226
- init_summarizer = InitSummarizer(envs.REGISTRY[args.init_summarizer])
227
- curr_summarizer = CurrSummarizer(envs.REGISTRY[args.curr_summarizer])
228
- translator = Translator(init_summarizer, curr_summarizer, None, env=None)
229
- environment = env_class(gym.make(args.env_name, render_mode=None), translator)
230
-
231
- fewshot_example_path = get_fewshot_example_path(environment, args.decider)
232
-
233
- if args.decider == "expert":
234
- net = Net_GRU_nn_emb_tianshou(
235
- hidden_sizes=[256, 128],
236
- device=device,
237
- max_length=args.max_length,
238
- trans_model_name=args.trans_model_name,
239
- )
240
- actor = Actor(net, environment.action_space.n, device=device).to(device)
241
- critic = Critic(net, device=device).to(device)
242
- actor_critic = ActorCritic(actor, critic)
243
- optim = th.optim.Adam(actor_critic.parameters(), lr=0.0003)
244
-
245
- # PPO policy
246
- dist = th.distributions.Categorical
247
- policy = PPOPolicy(
248
- actor,
249
- critic,
250
- optim,
251
- dist,
252
- action_space=environment.action_space,
253
- deterministic_eval=True,
254
- )
255
- policy.load_state_dict(th.load(args.policy_path))
256
- utility = gen_expert_examples(
257
- environment, policy, fewshot_example_path,
258
- max_episode_len=args.max_episode_len, n_episodes=args.n_episodes
259
- )
260
- else:
261
- decider_class = deciders.REGISTRY[args.decider]
262
- decider = decider_class(environment.env.action_space)
263
- # Evaluate the translator
264
- utility = gen_examples(
265
- environment, decider, fewshot_example_path,
266
- max_episode_len=args.max_episode_len,
267
- n_episodes=args.n_episodes
268
- )
269
- print(f"(Avg.) Cummulative reward: {utility}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main_merge.py DELETED
@@ -1,365 +0,0 @@
1
- import argparse
2
- import envs
3
- import deciders
4
- import distillers
5
- from matplotlib import animation
6
- import matplotlib.pyplot as plt
7
- import prompts as task_prompts
8
- import os
9
- import datetime
10
- import time
11
- from collections import deque
12
- from envs.translator import InitSummarizer, CurrSummarizer, FutureSummarizer, Translator
13
- import gym
14
- import json
15
- import pandas as pd
16
- import random
17
- import numpy as np
18
- import datetime
19
- from loguru import logger
20
-
21
-
22
- def set_seed(seed):
23
- random.seed(seed)
24
-
25
- def save_frames_as_gif(frames, path="./", filename="gym_animation.gif"):
26
- # Mess with this to change frame size
27
- plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi=72)
28
-
29
- patch = plt.imshow(frames[0])
30
- plt.axis("off")
31
-
32
- def animate(i):
33
- patch.set_data(frames[i])
34
-
35
- anim = animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval=50)
36
-
37
- # Ensure the folder exists, if it does not exist, create it
38
- os.makedirs(path, exist_ok=True)
39
- print(f"file name: {filename}")
40
- print(f"path name: {path}")
41
- anim.save(path + filename, writer="imagemagick", fps=60)
42
-
43
-
44
- def evaluate_translator(translator, environment, decider, max_episode_len, logfile, args):
45
- utilities = []
46
- df = pd.read_csv('record_reflexion.csv', sep=',')
47
- filtered_df = df[(df['env'] == args.env_name) & (df['decider'] == 'expert') & (df['level'] == 1)]
48
- expert_score = filtered_df['avg_score'].item()
49
- seeds = [i*100 for i in range(100)][-args.num_trails:]
50
- seeds_index = -1
51
- # prompt_file = "prompt.txt"
52
- # f = open(prompt_file,"w+")
53
- if not "Blackjack" in args.env_name:
54
- curriculums = 1
55
- num_trails = args.num_trails
56
- else:
57
- curriculums = 20
58
- num_trails = args.num_trails // 20
59
- for trail in range(num_trails):
60
- for curriculum in range(curriculums):
61
- seeds_index += 1
62
- if "Blackjack" in args.env_name:
63
- seed = seeds[trail*curriculums+curriculum]
64
- else:
65
- seed = args.seed
66
- utility = _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed)
67
- utilities.append(utility)
68
- # TODO: set env sucess utility threshold
69
- if args.decider in ['reflexion']:
70
- if utility < expert_score:
71
- decider.update_mem()
72
- else:
73
- decider.update_mem()
74
- # wandb.log({'memory': decider.memory})
75
- # with open('./mem.json', 'w') as f:
76
- # json.dump(decider.memory, f) #, cls=NumpyArrayEncoder)
77
- # f.close()
78
- return utilities
79
-
80
- def _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed):
81
- # Reset the environment
82
- if not "Blackjack" in args.env_name:
83
- set_seed(args.seed)
84
- # Reset the environment
85
- state_description, env_info = environment.reset(seed=args.seed)
86
- else:
87
- set_seed(seed)
88
- # Reset the environment
89
- state_description, env_info = environment.reset(seed=seed)
90
- game_description = environment.get_game_description()
91
- goal_description = environment.get_goal_description()
92
- action_description = environment.get_action_description()
93
-
94
- # Initialize the history
95
- if args.past_horizon:
96
- raise NotImplementedError
97
- history = deque(maxlen=args.past_horizon)
98
- env_info['history'] = history
99
-
100
- # Initialize the statistics
101
- frames = []
102
- utility = 0
103
- current_total_tokens = 0
104
- current_total_cost = 0
105
- columns = ["Prompt", "Response", "Action", "Return", "#All Tokens", "All Cost"]
106
- start_time = datetime.datetime.now()
107
- # Run the game for a maximum number of steps
108
- for round in range(max_episode_len):
109
- # If the past horizon is specified, keep track of the past states, actions, and rewards
110
- if args.past_horizon:
111
- previous_tuples = {'state': None, 'action': None, 'reward': None}
112
-
113
- # Keep asking ChatGPT for an action until it provides a valid one
114
- asking_round = 0
115
- error_flag = True
116
- retry_num = 2
117
- for error_i in range(retry_num):
118
- try:
119
- action, prompt, response, tokens, cost = decider.act(
120
- state_description,
121
- action_description,
122
- env_info,
123
- game_description,
124
- goal_description,
125
- logfile
126
- )
127
-
128
- if args.past_horizon:
129
- raise NotImplementedError
130
- previous_tuples['state'] = state_description
131
-
132
- # Perform the action in the environment
133
- if "Continuous" in args.env_name:
134
- action = [action]
135
-
136
-
137
- state_description, reward, termination, truncation, env_info = environment.step_llm(
138
- action
139
- )
140
- utility += reward
141
-
142
- if args.past_horizon:
143
- raise NotImplementedError
144
- previous_tuples['action'] = action
145
- previous_tuples['reward'] = reward
146
- history.append(previous_tuples)
147
- env_info['history'] = history
148
-
149
- # Update the statistics
150
- current_total_tokens += tokens
151
- current_total_cost += cost
152
- error_flag = False
153
- break
154
- except Exception as e:
155
- print(e)
156
- if error_i < retry_num-1:
157
- decider.env_history.remove_invalid_state()
158
- if logger:
159
- logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
160
- continue
161
- # If the action is still invalid after 5 tries, use the default action
162
- # file.write(prompt+"\n"+"======================================\n")
163
- if error_flag:
164
- if "Continuous" in args.env_name:
165
- action = [decider.default_action]
166
- else:
167
- action = decider.default_action
168
- state_description, reward, termination, truncation, env_info = environment.step_llm(
169
- action
170
- )
171
- utility += reward
172
-
173
- if args.past_horizon:
174
- raise NotImplementedError
175
- previous_tuples['action'] = action
176
- previous_tuples['reward'] = reward
177
- history.append(previous_tuples)
178
- env_info['history'] = history
179
-
180
- # Update the statistics
181
- decider.env_history.add('action', decider.default_action)
182
- logger.info(f'The optimal action is: {decider.default_action}.')
183
- logger.info(f"Now it is round {round}.")
184
- else:
185
- current_total_tokens += tokens
186
- current_total_cost += cost
187
- # print(prompt)
188
- logger.info(f"current_total_tokens: {current_total_tokens}")
189
- logger.info(f"current_total_cost: {current_total_cost}")
190
- logger.info(f"Now it is round {round}.")
191
-
192
- frames.append(environment.render())
193
-
194
- # If the game is over, break the loop
195
- if termination or truncation:
196
- if logger:
197
- logger.info(f"Terminated!")
198
- # save_frames_as_gif(
199
- # frames,
200
- # path=f"./images/{environment.env_name}/",
201
- # filename=f"{translator.__class__.__name__}.gif",
202
- # )
203
- break
204
- time.sleep(1)
205
- decider.env_history.add("cummulative_reward", str(utility))
206
- # Record the final reward
207
- if logger:
208
- logger.info(f"Cummulative reward: {utility}.")
209
- end_time = datetime.datetime.now()
210
- time_diff = end_time - start_time
211
- logger.info(f"Time consumer: {time_diff.total_seconds()} s")
212
- return utility
213
-
214
-
215
- if __name__ == "__main__":
216
- parser = argparse.ArgumentParser(
217
- description="Evaluate a translator in a gym environment with a ChatGPT model."
218
- )
219
- parser.add_argument(
220
- "--init_summarizer",
221
- type=str,
222
- required=True,
223
- help="The name of the init summarizer to use.",
224
- )
225
- parser.add_argument(
226
- "--curr_summarizer",
227
- type=str,
228
- required=True,
229
- help="The name of the curr summarizer to use.",
230
- )
231
- parser.add_argument(
232
- "--future_summarizer",
233
- type=str,
234
- help="The name of the future summarizer to use.",
235
- )
236
- parser.add_argument(
237
- "--env",
238
- type=str,
239
- default="base_env",
240
- help="The name of the gym environment to use.",
241
- )
242
- parser.add_argument(
243
- "--env_name",
244
- type=str,
245
- default="CartPole-v0",
246
- help="The name of the gym environment to use.",
247
- )
248
- parser.add_argument(
249
- "--decider",
250
- type=str,
251
- default="spp_actor",
252
- help="The actor used to select action",
253
- )
254
- parser.add_argument(
255
- "--gpt_version", type=str, default="gpt-35-turbo", help="The version of GPT to use"
256
- )
257
- parser.add_argument(
258
- "--render", type=str, default="rgb_array", help="The render mode"
259
- )
260
- parser.add_argument(
261
- "--max_episode_len",
262
- type=int,
263
- default=200,
264
- help="The maximum number of steps in an episode",
265
- )
266
- parser.add_argument(
267
- "--past_horizon", type=int, help="The horizon of looking back"
268
- )
269
- parser.add_argument(
270
- "--future_horizon", type=int, help="The horizon of looking to the future"
271
- )
272
- parser.add_argument(
273
- "--distiller",
274
- type=str,
275
- default="traj_distiller",
276
- help="The distiller used to generate a few shot examples from traj",
277
- )
278
- parser.add_argument(
279
- "--prompt_path",
280
- type=str,
281
- default="envs/classic_control/few_shot_examples/cartpole",
282
- help="The path of prompts",
283
- )
284
- parser.add_argument(
285
- "--prompt_level",
286
- type=int,
287
- default=1,
288
- help="The level of prompts",
289
- )
290
- parser.add_argument(
291
- "--num_trails",
292
- type=int,
293
- default=5,
294
- help="The number of trials",
295
- )
296
- parser.add_argument(
297
- "--use_short_mem",
298
- type=int,
299
- default=1,
300
- help="Whether use short mem",
301
- )
302
- parser.add_argument(
303
- "--seed",
304
- type=int,
305
- default=100,
306
- help="set seed",
307
- )
308
- parser.add_argument(
309
- "--short_mem_num",
310
- type=int,
311
- default=10,
312
- help="Set numbers of short memories used in actor, if use_short_mem = 1"
313
- )
314
- args = parser.parse_args()
315
-
316
- # Get the specified translator, environment, and ChatGPT model
317
- env_class = envs.REGISTRY[args.env]
318
- init_summarizer = InitSummarizer(envs.REGISTRY[args.init_summarizer])
319
- curr_summarizer = CurrSummarizer(envs.REGISTRY[args.curr_summarizer])
320
-
321
- if args.future_summarizer:
322
- future_summarizer = FutureSummarizer(
323
- envs.REGISTRY[args.future_summarizer],
324
- envs.REGISTRY["cart_policies"],
325
- future_horizon=args.future_horizon,
326
- )
327
- else:
328
- future_summarizer = None
329
-
330
- decider_class = deciders.REGISTRY[args.decider]
331
- distiller_class = distillers.REGISTRY[args.distiller](args=args)
332
- sampling_env = envs.REGISTRY["sampling_wrapper"](gym.make(args.env_name))
333
- if args.prompt_level == 5:
334
- prompts_class = task_prompts.REGISTRY[(args.env_name,args.decider)]()
335
- else:
336
- prompts_class = task_prompts.REGISTRY[(args.decider)]()
337
- translator = Translator(
338
- init_summarizer, curr_summarizer, future_summarizer, env=sampling_env
339
- )
340
- environment = env_class(
341
- gym.make(args.env_name, render_mode=args.render), translator
342
- )
343
-
344
- logfile = (
345
- f"llm.log/output-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
346
- f"-{datetime.datetime.now().timestamp()}.log"
347
- )
348
- if "reflexion" in args.decider or "jarvis" in args.decider:
349
- logfile_reflexion = (
350
- f"llm.log/memory-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
351
- f"-{datetime.datetime.now().timestamp()}.log"
352
- )
353
- my_distiller = distiller_class(logfile_reflexion)
354
- else:
355
- my_distiller = distiller_class()
356
- args.game_description = environment.game_description
357
- args.goal_description = environment.goal_description
358
- args.action_description = environment.action_description
359
-
360
- logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
361
-
362
- decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger)
363
-
364
- # Evaluate the translator
365
- evaluate_translator(translator, environment, decider, args.max_episode_len, logfile, args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main_merge.sh DELETED
@@ -1,123 +0,0 @@
1
- # L1: --prompt_level 1; L2: --prompt_level 2 --distiller traj_distiller; L4: --prompt_level 4 --distiller traj_distiller; L5: --prompt_level 5
2
- # Use History: --use_short_mem 1 or --use_short_mem 0 (default)
3
- # prompt_level default: 1
4
-
5
- # CartPole-v0
6
- # L1
7
- # Naive Actor
8
- python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --seed 0
9
- # PAL
10
- python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --seed 0
11
- # COT
12
- python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --seed 0
13
- # self consistency
14
- python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --seed 0
15
- # self-ask
16
- python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --seed 0
17
- # SPP
18
- python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --seed 0
19
-
20
- # LunarLander-v2
21
- # L1
22
- # Naive Actor
23
- python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider naive_actor --seed 0
24
- # PAL
25
- python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider pal_actor --seed 0
26
- # COT
27
- python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider cot_actor --seed 0
28
- # self consistency
29
- python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider self_consistency_actor --seed 0
30
- # self-ask
31
- python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider selfask_actor --seed 0
32
- # SPP
33
- python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider spp_actor --prompt_level 1 --seed 0
34
-
35
- # Acrobot-v1
36
- # L1
37
- # Naive Actor
38
- # python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 1
39
- # # PAL
40
- # python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 1
41
- # # COT
42
- # python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 1
43
- # # self consistency
44
- # python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 1
45
- # # self-ask
46
- # python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 1
47
- # # SPP
48
- # python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 1
49
-
50
- # MountainCar-v0
51
- # L1
52
- # Naive Actor
53
- # python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider naive_actor --prompt_level 1
54
- # # PAL
55
- # python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider pal_actor --prompt_level 1
56
- # # COT
57
- # python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider cot_actor --prompt_level 1
58
- # # self consistency
59
- # python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider self_consistency_actor --prompt_level 1
60
- # # self-ask
61
- # python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider selfask_actor --prompt_level 1
62
- # # SPP
63
- # python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider spp_actor --prompt_level 1
64
-
65
- # Blackjack-v1
66
- # L1
67
- # Naive Actor
68
- python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 1 --seed 0
69
- # PAL
70
- python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider pal_actor --prompt_level 1 --seed 0
71
- # COT
72
- python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 1 --seed 0
73
- # self consistency
74
- python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 1 --seed 0
75
- # self-ask
76
- python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 1 --seed 0
77
- # SPP
78
- python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 1 --seed 0
79
-
80
- # Taxi-v3
81
- # L1
82
- # Naive Actor
83
- # python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider naive_actor --prompt_level 1
84
- # # PAL
85
- # python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider pal_actor --prompt_level 1
86
- # # COT
87
- # python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider cot_actor --prompt_level 1
88
- # # self consistency
89
- # python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider self_consistency_actor --prompt_level 1
90
- # # self-ask
91
- # python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider selfask_actor --prompt_level 1
92
- # # SPP
93
- # python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider spp_actor --prompt_level 1
94
-
95
- # CliffWalking-v0
96
- # L1
97
- # Naive Actor
98
- # python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 1
99
- # # PAL
100
- # python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 1
101
- # # COT
102
- # python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 1
103
- # # self consistency
104
- # python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 1
105
- # # self-ask
106
- # python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 1
107
- # # SPP
108
- # python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 1
109
-
110
- # FrozenLake-v1
111
- # L1
112
- # Naive Actor
113
- python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 1 --seed 0
114
- # PAL
115
- python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 1 --seed 0
116
- # COT
117
- python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 1 --seed 0
118
- # self consistency
119
- python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 1 --seed 0
120
- # self-ask
121
- python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 1 --seed 0
122
- # SPP
123
- python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 1 --seed 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main_reflexion.py CHANGED
@@ -70,10 +70,6 @@ def evaluate_translator(translator, environment, decider, max_episode_len, logfi
70
  else:
71
  decider.update_mem()
72
  decider.clear_mem()
73
- # wandb.log({'memory': decider.memory})
74
- # with open('./mem.json', 'w') as f:
75
- # json.dump(decider.memory, f) #, cls=NumpyArrayEncoder)
76
- # f.close()
77
  return utilities
78
 
79
  def _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed):
@@ -91,27 +87,15 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
91
  goal_description = environment.get_goal_description()
92
  action_description = environment.get_action_description()
93
 
94
- # Initialize the history
95
- if args.past_horizon:
96
- raise NotImplementedError
97
- history = deque(maxlen=args.past_horizon)
98
- env_info['history'] = history
99
-
100
  # Initialize the statistics
101
  frames = []
102
  utility = 0
103
  current_total_tokens = 0
104
  current_total_cost = 0
105
- columns = ["Prompt", "Response", "Action", "Return", "#All Tokens", "All Cost"]
106
  start_time = datetime.datetime.now()
107
  # Run the game for a maximum number of steps
108
  for round in range(max_episode_len):
109
- # If the past horizon is specified, keep track of the past states, actions, and rewards
110
- if args.past_horizon:
111
- previous_tuples = {'state': None, 'action': None, 'reward': None}
112
-
113
  # Keep asking ChatGPT for an action until it provides a valid one
114
- asking_round = 0
115
  error_flag = True
116
  retry_num = 1
117
  for error_i in range(retry_num):
@@ -125,29 +109,19 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
125
  logfile
126
  )
127
 
128
- if args.past_horizon:
129
- raise NotImplementedError
130
- previous_tuples['state'] = state_description
131
-
132
- # Perform the action in the environment
133
  if "Continuous" in args.env_name:
134
  action = [action]
135
 
136
-
137
  state_description, reward, termination, truncation, env_info = environment.step_llm(
138
  action
139
  )
140
  if "Cliff" in args.env_name or "Frozen" in args.env_name:
141
  decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
 
 
 
142
  utility += reward
143
 
144
- if args.past_horizon:
145
- raise NotImplementedError
146
- previous_tuples['action'] = action
147
- previous_tuples['reward'] = reward
148
- history.append(previous_tuples)
149
- env_info['history'] = history
150
-
151
  # Update the statistics
152
  current_total_tokens += tokens
153
  current_total_cost += cost
@@ -162,8 +136,6 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
162
  if logger:
163
  logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
164
  continue
165
- # If the action is still invalid after 5 tries, use the default action
166
- # file.write(prompt+"\n"+"======================================\n")
167
  if error_flag:
168
  if "Continuous" in args.env_name:
169
  action = [decider.default_action]
@@ -180,14 +152,6 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
180
  decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
181
  utility += reward
182
 
183
- if args.past_horizon:
184
- raise NotImplementedError
185
- previous_tuples['action'] = action
186
- previous_tuples['reward'] = reward
187
- history.append(previous_tuples)
188
- env_info['history'] = history
189
-
190
- # Update the statistics
191
 
192
  logger.info(f"Seed: {seed}")
193
  logger.info(f'The optimal action is: {decider.default_action}.')
@@ -195,23 +159,15 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
195
  else:
196
  current_total_tokens += tokens
197
  current_total_cost += cost
198
- # print(prompt)
199
  logger.info(f"Seed: {seed}")
200
  logger.info(f"current_total_tokens: {current_total_tokens}")
201
  logger.info(f"current_total_cost: {current_total_cost}")
202
  logger.info(f"Now it is round {round}.")
203
 
204
  frames.append(environment.render())
205
-
206
- # If the game is over, break the loop
207
  if termination or truncation:
208
  if logger:
209
  logger.info(f"Terminated!")
210
- # save_frames_as_gif(
211
- # frames,
212
- # path=f"./images/{environment.env_name}/",
213
- # filename=f"{translator.__class__.__name__}.gif",
214
- # )
215
  break
216
  time.sleep(1)
217
  decider.env_history.add('terminate_state', environment.get_terminate_state(round+1, max_episode_len))
@@ -277,10 +233,16 @@ if __name__ == "__main__":
277
  help="The maximum number of steps in an episode",
278
  )
279
  parser.add_argument(
280
- "--past_horizon", type=int, help="The horizon of looking back"
 
 
 
281
  )
282
  parser.add_argument(
283
- "--future_horizon", type=int, help="The horizon of looking to the future"
 
 
 
284
  )
285
  parser.add_argument(
286
  "--distiller",
@@ -306,12 +268,6 @@ if __name__ == "__main__":
306
  default=5,
307
  help="The number of trials",
308
  )
309
- parser.add_argument(
310
- "--trajectories_num",
311
- type=int,
312
- default=20,
313
- help="The number of trials",
314
- )
315
  parser.add_argument(
316
  "--use_short_mem",
317
  type=int,
@@ -327,7 +283,7 @@ if __name__ == "__main__":
327
  parser.add_argument(
328
  "--short_mem_num",
329
  type=int,
330
- default=20,
331
  help="Set numbers of short memories used in actor, if use_short_mem = 1"
332
  )
333
  parser.add_argument(
@@ -370,14 +326,13 @@ if __name__ == "__main__":
370
  f"llm.log/output-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
371
  f"-{datetime.datetime.now().timestamp()}.log"
372
  )
373
- if "reflexion" in args.decider or "jarvis" in args.decider:
374
- logfile_reflexion = (
375
  f"llm.log/memory-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
376
  f"-{datetime.datetime.now().timestamp()}.log"
377
  )
378
- my_distiller = distiller_class(logfile_reflexion,args=args)
379
- else:
380
- my_distiller = distiller_class(args=args)
381
  args.game_description = environment.game_description
382
  args.goal_description = environment.goal_description
383
  args.action_description = environment.action_description
@@ -386,11 +341,6 @@ if __name__ == "__main__":
386
 
387
  logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
388
 
389
- fixed_suggestion = None
390
- fixed_insight = None
391
- if "jarvis" in args.decider:
392
- decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger, fixed_suggestion=fixed_suggestion, fixed_insight=fixed_insight)
393
- else:
394
- decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger)
395
  # Evaluate the translator
396
  evaluate_translator(translator, environment, decider, args.max_episode_len, logfile, args)
 
70
  else:
71
  decider.update_mem()
72
  decider.clear_mem()
 
 
 
 
73
  return utilities
74
 
75
  def _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed):
 
87
  goal_description = environment.get_goal_description()
88
  action_description = environment.get_action_description()
89
 
 
 
 
 
 
 
90
  # Initialize the statistics
91
  frames = []
92
  utility = 0
93
  current_total_tokens = 0
94
  current_total_cost = 0
 
95
  start_time = datetime.datetime.now()
96
  # Run the game for a maximum number of steps
97
  for round in range(max_episode_len):
 
 
 
 
98
  # Keep asking ChatGPT for an action until it provides a valid one
 
99
  error_flag = True
100
  retry_num = 1
101
  for error_i in range(retry_num):
 
109
  logfile
110
  )
111
 
 
 
 
 
 
112
  if "Continuous" in args.env_name:
113
  action = [action]
114
 
 
115
  state_description, reward, termination, truncation, env_info = environment.step_llm(
116
  action
117
  )
118
  if "Cliff" in args.env_name or "Frozen" in args.env_name:
119
  decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
120
+ else:
121
+ decider.env_history.add('reward', f"The player get rewards {reward}.")
122
+
123
  utility += reward
124
 
 
 
 
 
 
 
 
125
  # Update the statistics
126
  current_total_tokens += tokens
127
  current_total_cost += cost
 
136
  if logger:
137
  logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
138
  continue
 
 
139
  if error_flag:
140
  if "Continuous" in args.env_name:
141
  action = [decider.default_action]
 
152
  decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
153
  utility += reward
154
 
 
 
 
 
 
 
 
 
155
 
156
  logger.info(f"Seed: {seed}")
157
  logger.info(f'The optimal action is: {decider.default_action}.')
 
159
  else:
160
  current_total_tokens += tokens
161
  current_total_cost += cost
 
162
  logger.info(f"Seed: {seed}")
163
  logger.info(f"current_total_tokens: {current_total_tokens}")
164
  logger.info(f"current_total_cost: {current_total_cost}")
165
  logger.info(f"Now it is round {round}.")
166
 
167
  frames.append(environment.render())
 
 
168
  if termination or truncation:
169
  if logger:
170
  logger.info(f"Terminated!")
 
 
 
 
 
171
  break
172
  time.sleep(1)
173
  decider.env_history.add('terminate_state', environment.get_terminate_state(round+1, max_episode_len))
 
233
  help="The maximum number of steps in an episode",
234
  )
235
  parser.add_argument(
236
+ "--max_query_tokens",
237
+ type=int,
238
+ default=5000,
239
+ help="The maximum number of tokens when querying",
240
  )
241
  parser.add_argument(
242
+ "--max_tokens",
243
+ type=int,
244
+ default=2000,
245
+ help="The maximum number of tokens when responding",
246
  )
247
  parser.add_argument(
248
  "--distiller",
 
268
  default=5,
269
  help="The number of trials",
270
  )
 
 
 
 
 
 
271
  parser.add_argument(
272
  "--use_short_mem",
273
  type=int,
 
283
  parser.add_argument(
284
  "--short_mem_num",
285
  type=int,
286
+ default=10,
287
  help="Set numbers of short memories used in actor, if use_short_mem = 1"
288
  )
289
  parser.add_argument(
 
326
  f"llm.log/output-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
327
  f"-{datetime.datetime.now().timestamp()}.log"
328
  )
329
+
330
+ logfile_reflexion = (
331
  f"llm.log/memory-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
332
  f"-{datetime.datetime.now().timestamp()}.log"
333
  )
334
+ my_distiller = distiller_class(logfile=logfile_reflexion,args=args)
335
+
 
336
  args.game_description = environment.game_description
337
  args.goal_description = environment.goal_description
338
  args.action_description = environment.action_description
 
341
 
342
  logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
343
 
344
+ decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger, max_tokens=args.max_tokens)
 
 
 
 
 
345
  # Evaluate the translator
346
  evaluate_translator(translator, environment, decider, args.max_episode_len, logfile, args)
memory/env_history.py CHANGED
@@ -17,7 +17,7 @@ class EnvironmentHistory:
17
 
18
  def __str__(self) -> str:
19
  s = ''
20
- for i, item in enumerate(self._history[-150:]):
21
  if item['label'] == 'action':
22
  s += f'He takes action: {item["value"]}'
23
  elif item['label'] == 'observation':
@@ -137,4 +137,5 @@ class EnvironmentHistory:
137
  self._history = self._history[:-1]
138
 
139
  def __len__(self) -> int:
140
- return len(self._history)
 
 
17
 
18
  def __str__(self) -> str:
19
  s = ''
20
+ for i, item in enumerate(self._history):
21
  if item['label'] == 'action':
22
  s += f'He takes action: {item["value"]}'
23
  elif item['label'] == 'observation':
 
137
  self._history = self._history[:-1]
138
 
139
  def __len__(self) -> int:
140
+ action = [item for item in self._history if item['label'] == 'action' ]
141
+ return len(action)
prompts/__init__.py CHANGED
@@ -18,10 +18,7 @@ REGISTRY[('self_consistency_actor')] = prompts.CONSISTENCY
18
  REGISTRY[('selfask_actor')] = prompts.SELFASK
19
  REGISTRY[('spp_actor')] = prompts.SPP
20
  REGISTRY[('reflexion_actor')] = prompts.REFLEXION
21
- REGISTRY[('jarvis_actor')] = prompts.JARVIS
22
- REGISTRY[('jarvis_actor_woi')] = prompts.JARVIS
23
- REGISTRY[('jarvis_actor_wosug')] = prompts.JARVIS
24
- REGISTRY[('jarvis_actor_wosh')] = prompts.JARVIS
25
 
26
  # CartPole-v0
27
  REGISTRY[("CartPole-v0","naive_actor")] = cartpole.ACT
@@ -31,10 +28,7 @@ REGISTRY[("CartPole-v0",'self_consistency_actor')] = cartpole.CONSISTENCY
31
  REGISTRY[("CartPole-v0",'selfask_actor')] = cartpole.SELFASK
32
  REGISTRY[("CartPole-v0",'spp_actor')] = cartpole.SPP
33
  REGISTRY[("CartPole-v0",'reflexion_actor')] = cartpole.REFLEXION
34
- REGISTRY[("CartPole-v0",'jarvis_actor')] = cartpole.EGG
35
- REGISTRY[("CartPole-v0",'jarvis_actor_woi')] = cartpole.EGGWithoutInsights
36
- REGISTRY[("CartPole-v0",'jarvis_actor_wosug')] = cartpole.EGGWithoutSuggestions
37
- REGISTRY[("CartPole-v0",'jarvis_actor_wosh')] = cartpole.EGG
38
 
39
  # LunarLander-v2
40
  REGISTRY[("LunarLander-v2","naive_actor")] = LunarLander.ACT
@@ -44,10 +38,7 @@ REGISTRY[("LunarLander-v2",'self_consistency_actor')] = LunarLander.CONSISTENCY
44
  REGISTRY[("LunarLander-v2",'selfask_actor')] = LunarLander.SELFASK
45
  REGISTRY[("LunarLander-v2",'spp_actor')] = LunarLander.SPP
46
  REGISTRY[("LunarLander-v2",'reflexion_actor')] = LunarLander.REFLEXION
47
- REGISTRY[("LunarLander-v2",'jarvis_actor')] = LunarLander.EGG
48
- REGISTRY[("LunarLander-v2",'jarvis_actor_woi')] = LunarLander.EGGWithoutInsights
49
- REGISTRY[("LunarLander-v2",'jarvis_actor_wosug')] = LunarLander.EGGWithoutSuggestions
50
- REGISTRY[("LunarLander-v2",'jarvis_actor_wosh')] = LunarLander.EGG
51
 
52
 
53
  # Acrobot-v1
@@ -58,10 +49,7 @@ REGISTRY[("Acrobot-v1",'self_consistency_actor')] = acrobot.CONSISTENCY
58
  REGISTRY[("Acrobot-v1",'selfask_actor')] = acrobot.SELFASK
59
  REGISTRY[("Acrobot-v1",'spp_actor')] = acrobot.SPP
60
  REGISTRY[("Acrobot-v1",'reflexion_actor')] = acrobot.REFLEXION
61
- REGISTRY[("Acrobot-v1",'jarvis_actor')] = acrobot.EGG
62
- REGISTRY[("Acrobot-v1",'jarvis_actor_woi')] = acrobot.EGGWithoutInsights
63
- REGISTRY[("Acrobot-v1",'jarvis_actor_wosug')] = acrobot.EGGWithoutSuggestions
64
- REGISTRY[("Acrobot-v1",'jarvis_actor_wosh')] = acrobot.EGG
65
 
66
  # MountainCar-v0
67
  REGISTRY[("MountainCar-v0","naive_actor")] = mountaincar.ACT
@@ -71,10 +59,7 @@ REGISTRY[("MountainCar-v0",'self_consistency_actor')] = mountaincar.CONSISTENCY
71
  REGISTRY[("MountainCar-v0",'selfask_actor')] = mountaincar.SELFASK
72
  REGISTRY[("MountainCar-v0",'spp_actor')] = mountaincar.SPP
73
  REGISTRY[("MountainCar-v0",'reflexion_actor')] = mountaincar.REFLEXION
74
- REGISTRY[("MountainCar-v0",'jarvis_actor')] = mountaincar.EGG
75
- REGISTRY[("MountainCar-v0",'jarvis_actor_woi')] = mountaincar.EGGWithoutInsights
76
- REGISTRY[("MountainCar-v0",'jarvis_actor_wosug')] = mountaincar.EGGWithoutSuggestions
77
- REGISTRY[("MountainCar-v0",'jarvis_actor_wosh')] = mountaincar.EGG
78
 
79
  # Blackjack-v1
80
  REGISTRY[("Blackjack-v1","naive_actor")] = blackjack.ACT
@@ -84,10 +69,7 @@ REGISTRY[("Blackjack-v1",'self_consistency_actor')] = blackjack.CONSISTENCY
84
  REGISTRY[("Blackjack-v1",'selfask_actor')] = blackjack.SELFASK
85
  REGISTRY[("Blackjack-v1",'spp_actor')] = blackjack.SPP
86
  REGISTRY[("Blackjack-v1",'reflexion_actor')] = blackjack.REFLEXION
87
- REGISTRY[("Blackjack-v1",'jarvis_actor')] = blackjack.EGG
88
- REGISTRY[("Blackjack-v1",'jarvis_actor_woi')] = blackjack.EGGWithoutInsights
89
- REGISTRY[("Blackjack-v1",'jarvis_actor_wosug')] = blackjack.EGGWithoutSuggestions
90
- REGISTRY[("Blackjack-v1",'jarvis_actor_wosh')] = blackjack.EGG
91
 
92
  # Taxi-v3
93
  REGISTRY[("Taxi-v3","naive_actor")] = taxi.ACT
@@ -97,11 +79,7 @@ REGISTRY[("Taxi-v3",'self_consistency_actor')] = taxi.CONSISTENCY
97
  REGISTRY[("Taxi-v3",'selfask_actor')] = taxi.SELFASK
98
  REGISTRY[("Taxi-v3",'spp_actor')] = taxi.SPP
99
  REGISTRY[("Taxi-v3",'reflexion_actor')] = taxi.REFLEXION
100
- REGISTRY[("Taxi-v3",'jarvis_actor')] = taxi.EGG
101
- REGISTRY[("Taxi-v3",'jarvis_actor_woi')] = taxi.EGGWithoutInsights
102
- REGISTRY[("Taxi-v3",'jarvis_actor_wosug')] = taxi.EGGWithoutSuggestions
103
- REGISTRY[("Taxi-v3",'jarvis_actor_wosh')] = taxi.EGG
104
-
105
  # CliffWalking-v0
106
  REGISTRY[("CliffWalking-v0","naive_actor")] = cliffwalking.ACT
107
  REGISTRY[("CliffWalking-v0","cot_actor")] = cliffwalking.COT
@@ -110,10 +88,7 @@ REGISTRY[("CliffWalking-v0",'self_consistency_actor')] = cliffwalking.CONSISTENC
110
  REGISTRY[("CliffWalking-v0",'selfask_actor')] = cliffwalking.SELFASK
111
  REGISTRY[("CliffWalking-v0",'spp_actor')] = cliffwalking.SPP
112
  REGISTRY[("CliffWalking-v0",'reflexion_actor')] = cliffwalking.REFLEXION
113
- REGISTRY[("CliffWalking-v0",'jarvis_actor')] = cliffwalking.EGG
114
- REGISTRY[("CliffWalking-v0",'jarvis_actor_woi')] = cliffwalking.EGGWithoutInsights
115
- REGISTRY[("CliffWalking-v0",'jarvis_actor_wosug')] = cliffwalking.EGGWithoutSuggestions
116
- REGISTRY[("CliffWalking-v0",'jarvis_actor_wosh')] = cliffwalking.EGG
117
 
118
  # FrozenLake-v1
119
  REGISTRY[("FrozenLake-v1","naive_actor")] = frozenlake.ACT
@@ -123,10 +98,7 @@ REGISTRY[("FrozenLake-v1",'self_consistency_actor')] = frozenlake.CONSISTENCY
123
  REGISTRY[("FrozenLake-v1",'selfask_actor')] = frozenlake.SELFASK
124
  REGISTRY[("FrozenLake-v1",'spp_actor')] = frozenlake.SPP
125
  REGISTRY[("FrozenLake-v1",'reflexion_actor')] = frozenlake.REFLEXION
126
- REGISTRY[("FrozenLake-v1",'jarvis_actor')] = frozenlake.EGG
127
- REGISTRY[("FrozenLake-v1",'jarvis_actor_woi')] = frozenlake.EGGWithoutInsights
128
- REGISTRY[("FrozenLake-v1",'jarvis_actor_wosug')] = frozenlake.EGGWithoutSuggestions
129
- REGISTRY[("FrozenLake-v1",'jarvis_actor_wosh')] = frozenlake.EGG
130
 
131
  # MountainCarContinuous-v0
132
  REGISTRY[("MountainCarContinuous-v0","naive_actor")] = mountaincarContinuous.ACT
@@ -136,7 +108,4 @@ REGISTRY[("MountainCarContinuous-v0",'self_consistency_actor')] = mountaincarCon
136
  REGISTRY[("MountainCarContinuous-v0",'selfask_actor')] = mountaincarContinuous.SELFASK
137
  REGISTRY[("MountainCarContinuous-v0",'spp_actor')] = mountaincarContinuous.SPP
138
  REGISTRY[("MountainCarContinuous-v0",'reflexion_actor')] = mountaincarContinuous.REFLEXION
139
- REGISTRY[("MountainCarContinuous-v0",'jarvis_actor')] = mountaincarContinuous.EGG
140
- REGISTRY[("MountainCarContinuous-v0",'jarvis_actor_woi')] = mountaincarContinuous.EGGWithoutInsights
141
- REGISTRY[("MountainCarContinuous-v0",'jarvis_actor_wosug')] = mountaincarContinuous.EGGWithoutSuggestions
142
- REGISTRY[("MountainCarContinuous-v0",'jarvis_actor_wosh')] = mountaincarContinuous.EGG
 
18
  REGISTRY[('selfask_actor')] = prompts.SELFASK
19
  REGISTRY[('spp_actor')] = prompts.SPP
20
  REGISTRY[('reflexion_actor')] = prompts.REFLEXION
21
+ REGISTRY[('exe_actor')] = prompts.EXE
 
 
 
22
 
23
  # CartPole-v0
24
  REGISTRY[("CartPole-v0","naive_actor")] = cartpole.ACT
 
28
  REGISTRY[("CartPole-v0",'selfask_actor')] = cartpole.SELFASK
29
  REGISTRY[("CartPole-v0",'spp_actor')] = cartpole.SPP
30
  REGISTRY[("CartPole-v0",'reflexion_actor')] = cartpole.REFLEXION
31
+ REGISTRY[("CartPole-v0",'exe_actor')] = cartpole.EXE
 
 
 
32
 
33
  # LunarLander-v2
34
  REGISTRY[("LunarLander-v2","naive_actor")] = LunarLander.ACT
 
38
  REGISTRY[("LunarLander-v2",'selfask_actor')] = LunarLander.SELFASK
39
  REGISTRY[("LunarLander-v2",'spp_actor')] = LunarLander.SPP
40
  REGISTRY[("LunarLander-v2",'reflexion_actor')] = LunarLander.REFLEXION
41
+ REGISTRY[("LunarLander-v2",'exe_actor')] = LunarLander.EXE
 
 
 
42
 
43
 
44
  # Acrobot-v1
 
49
  REGISTRY[("Acrobot-v1",'selfask_actor')] = acrobot.SELFASK
50
  REGISTRY[("Acrobot-v1",'spp_actor')] = acrobot.SPP
51
  REGISTRY[("Acrobot-v1",'reflexion_actor')] = acrobot.REFLEXION
52
+ REGISTRY[("Acrobot-v1",'exe_actor')] = acrobot.EXE
 
 
 
53
 
54
  # MountainCar-v0
55
  REGISTRY[("MountainCar-v0","naive_actor")] = mountaincar.ACT
 
59
  REGISTRY[("MountainCar-v0",'selfask_actor')] = mountaincar.SELFASK
60
  REGISTRY[("MountainCar-v0",'spp_actor')] = mountaincar.SPP
61
  REGISTRY[("MountainCar-v0",'reflexion_actor')] = mountaincar.REFLEXION
62
+ REGISTRY[("MountainCar-v0",'exe_actor')] = mountaincar.EXE
 
 
 
63
 
64
  # Blackjack-v1
65
  REGISTRY[("Blackjack-v1","naive_actor")] = blackjack.ACT
 
69
  REGISTRY[("Blackjack-v1",'selfask_actor')] = blackjack.SELFASK
70
  REGISTRY[("Blackjack-v1",'spp_actor')] = blackjack.SPP
71
  REGISTRY[("Blackjack-v1",'reflexion_actor')] = blackjack.REFLEXION
72
+ REGISTRY[("Blackjack-v1",'exe_actor')] = blackjack.EXE
 
 
 
73
 
74
  # Taxi-v3
75
  REGISTRY[("Taxi-v3","naive_actor")] = taxi.ACT
 
79
  REGISTRY[("Taxi-v3",'selfask_actor')] = taxi.SELFASK
80
  REGISTRY[("Taxi-v3",'spp_actor')] = taxi.SPP
81
  REGISTRY[("Taxi-v3",'reflexion_actor')] = taxi.REFLEXION
82
+ REGISTRY[("Taxi-v3",'exe_actor')] = taxi.EXE
 
 
 
 
83
  # CliffWalking-v0
84
  REGISTRY[("CliffWalking-v0","naive_actor")] = cliffwalking.ACT
85
  REGISTRY[("CliffWalking-v0","cot_actor")] = cliffwalking.COT
 
88
  REGISTRY[("CliffWalking-v0",'selfask_actor')] = cliffwalking.SELFASK
89
  REGISTRY[("CliffWalking-v0",'spp_actor')] = cliffwalking.SPP
90
  REGISTRY[("CliffWalking-v0",'reflexion_actor')] = cliffwalking.REFLEXION
91
+ REGISTRY[("CliffWalking-v0",'exe_actor')] = cliffwalking.EXE
 
 
 
92
 
93
  # FrozenLake-v1
94
  REGISTRY[("FrozenLake-v1","naive_actor")] = frozenlake.ACT
 
98
  REGISTRY[("FrozenLake-v1",'selfask_actor')] = frozenlake.SELFASK
99
  REGISTRY[("FrozenLake-v1",'spp_actor')] = frozenlake.SPP
100
  REGISTRY[("FrozenLake-v1",'reflexion_actor')] = frozenlake.REFLEXION
101
+ REGISTRY[("FrozenLake-v1",'exe_actor')] = frozenlake.EXE
 
 
 
102
 
103
  # MountainCarContinuous-v0
104
  REGISTRY[("MountainCarContinuous-v0","naive_actor")] = mountaincarContinuous.ACT
 
108
  REGISTRY[("MountainCarContinuous-v0",'selfask_actor')] = mountaincarContinuous.SELFASK
109
  REGISTRY[("MountainCarContinuous-v0",'spp_actor')] = mountaincarContinuous.SPP
110
  REGISTRY[("MountainCarContinuous-v0",'reflexion_actor')] = mountaincarContinuous.REFLEXION
111
+ REGISTRY[("MountainCarContinuous-v0",'exe_actor')] = mountaincarContinuous.EXE
 
 
 
prompts/task_irrelevant/prompts.py CHANGED
@@ -2,7 +2,7 @@ class ACT:
2
  def __init__(self):
3
  self.TASK_IRRELEVANT_PROMPTS = []
4
 
5
- class JARVIS:
6
  def __init__(self):
7
  self.TASK_IRRELEVANT_PROMPTS = []
8
 
 
2
  def __init__(self):
3
  self.TASK_IRRELEVANT_PROMPTS = []
4
 
5
+ class EXE:
6
  def __init__(self):
7
  self.TASK_IRRELEVANT_PROMPTS = []
8
 
prompts/task_relevant/box2d/LunarLander.py CHANGED
@@ -464,7 +464,7 @@ class REFLEXION:
464
  },
465
  ]
466
 
467
- class EGG:
468
  def __init__(self):
469
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
470
  {
@@ -510,79 +510,3 @@ class EGG:
510
  """
511
  },
512
  ]
513
-
514
- class EGGWithoutInsights:
515
- def __init__(self):
516
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
517
- {
518
- "question":
519
- """
520
- State description: The lander is at position (-0.01, 1.39), the horizontal speed of movement is -0.65, the vertical velocity speed of movement is -0.41. The angle is 0.01 radians, and it's rotating at 0.13 radians per second. The left leg is not in contact with ground. The right leg is not in contact with ground.
521
- Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
522
- Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
523
- The suggestions are listed below:
524
- 1. For exploration in the next episode, the player should focus on gathering more information about the lander's movement and behavior during descent. This can be done by varying the thrust and angle adjustments to see how the lander responds, and observing any patterns or trends in its movement. The player can also try different starting positions and initial forces to see how they affect the lander's trajectory.
525
- 2. To improve the policy for higher performance in the next episode, the player should focus on fine-tuning the thrust and angle adjustments to optimize the lander's descent and landing. This can be done by analyzing the data gathered from exploration and adjusting the policy accordingly. The player should also pay attention to the lander's rotation and angle to ensure a gentle landing on the pad.
526
- 3. The player should weigh exploration and exploitation equally in the next episode, as both are important for improving the policy and achieving a successful landing. The player should continue to gather information through exploration while also using that information to make informed decisions during exploitation.
527
- """,
528
- "answer":
529
- """
530
- Based on the current game state and the suggestions, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
531
- """
532
- },
533
- {
534
- "question":
535
- """
536
- State description: The lander is at position (0.31, 0.04), the horizontal speed of movement is -0.21, the vertical velocity speed of movement is -0.09. The angle is 0.24 radians, and it's rotating at 0.17 radians per second. The left leg is not in contact with ground. The right leg is in contact with ground.
537
- Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
538
- Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
539
- The suggestions are listed below:
540
- 1. Exploration:
541
- - The player should explore the effect of adjusting the thrust and angle of the lander during descent to ensure a gentle landing on the pad.
542
- - To make the exploration, the player can try different combinations of thrust and angle adjustments during descent and observe the effect on the lander's movement and performance score.
543
- 2. Exploitation:
544
- - The player should improve the policy by taking into account the contact with the ground and adjusting the thrust accordingly to ensure a gentle landing.
545
- - Additionally, the policy should also take into account the angle and rotation of the lander to ensure that it lands upright on the pad.
546
- - To improve the policy, the player can use the information obtained from the previous episodes and adjust the thrust and angle accordingly during descent.
547
- 3. Trade-off:
548
- - The player should focus more on exploitation in the next episode as they have already explored different thrust and angle adjustments in the previous episodes.
549
- - However, the player should still allocate some time for exploration to fine-tune the policy and ensure a successful landing on the pad.
550
- - A good trade-off would be to allocate seventy percent of the time for exploitation and thirty percent of the time for exploration.
551
- """,
552
- "answer":
553
- """
554
- Based on the current game state and the suggestions, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
555
- """
556
- },
557
- ]
558
-
559
- class EGGWithoutSuggestions:
560
- def __init__(self):
561
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
562
- {
563
- "question":
564
- """
565
- State description: The lander is at position (-0.01, 1.39), the horizontal speed of movement is -0.65, the vertical velocity speed of movement is -0.41. The angle is 0.01 radians, and it's rotating at 0.13 radians per second. The left leg is not in contact with ground. The right leg is not in contact with ground.
566
- Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
567
- Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
568
- The insights of the game are listed below: The key information that can be exploited to improve the player's performance includes taking into account the lander's movement and adjusting the thrust accordingly to stabilize its descent, considering the angle and rotation of the lander to ensure a gentle landing, and fine-tuning the policy to optimize the thrust and angle adjustments for a smoother landing. Additionally, the player should avoid constantly applying thrust to the lander as it may not be effective in successfully landing the lander on the pad.
569
- """,
570
- "answer":
571
- """
572
- Based on the current game state and the insights, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
573
- """
574
- },
575
- {
576
- "question":
577
- """
578
- State description: The lander is at position (0.31, 0.04), the horizontal speed of movement is -0.21, the vertical velocity speed of movement is -0.09. The angle is 0.24 radians, and it's rotating at 0.17 radians per second. The left leg is not in contact with ground. The right leg is in contact with ground.
579
- Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
580
- Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
581
- The insights of the game are listed below: The key information that can be exploited to improve the player's performance includes taking into account the lander's movement and adjusting the thrust accordingly to stabilize its descent, considering the angle and rotation of the lander to ensure a gentle landing, and adjusting the thrust to ensure a gentle landing when the lander makes contact with the ground. Additionally, fine-tuning the policy to optimize the thrust and angle adjustments for a smoother landing can further improve performance.
582
- """,
583
- "answer":
584
- """
585
- Based on the current game state and the insights, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
586
- """
587
- },
588
- ]
 
464
  },
465
  ]
466
 
467
+ class EXE:
468
  def __init__(self):
469
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
470
  {
 
510
  """
511
  },
512
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/task_relevant/classic_control/acrobot.py CHANGED
@@ -362,7 +362,7 @@ class REFLEXION:
362
  },
363
  ]
364
 
365
- class EGG:
366
  def __init__(self):
367
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
368
  {
@@ -420,92 +420,4 @@ class EGG:
420
  Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
421
  """
422
  },
423
- ]
424
-
425
- class EGGWithoutInsights:
426
- def __init__(self):
427
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
428
- {
429
- "question":
430
- """
431
- State description: Current Game State: Link1: angle theta1 -0.04 radians, rotating 0.02 radians per second clockwise. Link2: angle theta2 0.02 radians relative to Link1, rotating 0.07 radians per second counterclockwise.
432
- Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
433
- Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
434
- The suggestions are listed below:
435
- 1. Exploration:
436
- - The player should explore different torque values and directions to find the optimal policy.
437
- - To make the exploration, the player can randomly choose torque values and directions for a certain number of steps, then evaluate the performance and adjust the policy accordingly.
438
-
439
- 2. Exploitation:
440
- - The player should adjust the torque based on the current state of the system, such as the angles and angular velocities of the links.
441
- - The player should also try to apply torque in a way that helps swing the free end of the linear chain above the target height.
442
- - To improve the policy, the player can use techniques such as reinforcement learning or model-based control.
443
-
444
- 3. Weighting:
445
- - The player should focus more on exploration in the beginning of the episode to find the optimal policy.
446
- - As the episode progresses, the player should shift the focus towards exploitation to gain a higher performance.
447
- - The player should also adjust the weighting based on the performance and adjust the policy accordingly.
448
- """,
449
- "answer":
450
- """
451
- Based on the current game state and the suggestions, the optimal action for the player to take would be to apply -1 torque (Action 1). Therefore, the optimal action to take now is to apply -1 torque (Action 1).
452
- """
453
- },{
454
- "question":
455
- """
456
- State description: Current Game State: Link1: angle theta1 0.31 radians, rotating 0.76 radians per second clockwise. Link2: angle theta2 -0.73 radians relative to Link1, rotating 0.98 radians per second counterclockwise.
457
- Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
458
- Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
459
- The suggestions are listed below:
460
- 1. Exploration:
461
- - The player should explore different torque values and directions, rather than sticking to a consistent pattern.
462
- - The player should also adjust the torque based on the current state of the system, such as the angles and angular velocities of the links.
463
- - To make the exploration, the player can randomly choose torque values and directions, or try to systematically vary the torque values and directions to cover a wider range of possibilities.
464
-
465
- 2. Exploitation:
466
- - The player should use the information obtained from exploration to inform a more adaptive and effective policy.
467
- - The player can use techniques such as reinforcement learning or model-based control to improve the policy.
468
- - The policy should take into account the current state of the system, such as the angles and angular velocities of the links, to adjust the torque values and directions accordingly.
469
-
470
- 3. Weighting for exploration and exploitation:
471
- - The player should balance exploration and exploitation to find the optimal policy.
472
- - In the beginning of the episode, the player should focus more on exploration to gather information about the system and find a wider range of possible solutions.
473
- - As the episode progresses, the player should shift the focus towards exploitation to improve the policy and achieve a higher performance.
474
- - The weighting can be adjusted
475
- """,
476
- "answer":
477
- """
478
- Based on the current game state and the suggestions, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
479
- """
480
- },
481
- ]
482
-
483
- class EGGWithoutSuggestions:
484
- def __init__(self):
485
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
486
- {
487
- "question":
488
- """
489
- State description: Current Game State: Link1: angle theta1 -0.04 radians, rotating 0.02 radians per second clockwise. Link2: angle theta2 0.02 radians relative to Link1, rotating 0.07 radians per second counterclockwise.
490
- Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
491
- Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
492
- The insights of the game are listed below: The key information that can be exploited to improve the player's performance is that the current policy of consistently applying torque in one direction is not effective in achieving the goal of swinging the free end of the linear chain above the target height. The player may benefit from exploring different torque values and directions and adjusting its torque based on the current state of the system, such as the angles and angular velocities of the links. This information can be used to inform a more adaptive and effective policy, potentially using techniques such as reinforcement learning or model-based control.
493
- """,
494
- "answer":
495
- """
496
- Based on the current game state and the insights, the optimal action for the player to take would be to apply -1 torque (Action 1). Therefore, the optimal action to take now is to apply -1 torque (Action 1).
497
- """
498
- },{
499
- "question":
500
- """
501
- State description: Current Game State: Link1: angle theta1 0.31 radians, rotating 0.76 radians per second clockwise. Link2: angle theta2 -0.73 radians relative to Link1, rotating 0.98 radians per second counterclockwise.
502
- Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
503
- Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
504
- The insights of the game are listed below: The key information that can be exploited to improve the performance of the player is that the current policy of consistently applying torque in one direction is not effective in achieving the goal of swinging the free end of the linear chain above the target height. The player may benefit from exploring different torque values and directions and adjusting its torque based on the current state of the system, such as the angles and angular velocities of the links. This information can be used to inform a more adaptive and effective policy, potentially using techniques such as reinforcement learning or model-based control.
505
- """,
506
- "answer":
507
- """
508
- Based on the current game state and the insights, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
509
- """
510
- },
511
  ]
 
362
  },
363
  ]
364
 
365
+ class EXE:
366
  def __init__(self):
367
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
368
  {
 
420
  Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
421
  """
422
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  ]
prompts/task_relevant/classic_control/cartpole.py CHANGED
@@ -364,7 +364,7 @@ class REFLEXION:
364
  },
365
  ]
366
 
367
- class EGG:
368
  def __init__(self):
369
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
370
  {
@@ -401,69 +401,3 @@ class EGG:
401
  """
402
  },
403
  ]
404
-
405
- class EGGWithoutInsights:
406
- def __init__(self):
407
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
408
- {
409
- "question":
410
- """
411
- State description: The current state is: The cart is positioned at -0.053, with a velocity of 0.01 towards the right. The pole is tilted at 0.05 radians, rotating at 0.11 radians per second towards the right.
412
- Goal description: The goal is to keep the pole balanced upright for as long as possible.
413
- Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
414
- The suggestions are listed below:Suggestion for the next episode:
415
- 1. Exploration: The player should explore different strategies for recovering from large tilts of the pole. This can be done by trying out different actions when the pole is tilted beyond a certain angle, such as moving the cart in the opposite direction or applying a stronger force. The player can also try to observe the behavior of the pole and cart in different situations to gain a better understanding of the dynamics of the game.
416
- 2. Exploitation: To improve performance, the player can adjust the policy to take into account the increasing velocity of the cart towards the end of the game. This can be done by increasing the sensitivity of the policy to changes in the velocity of the cart, or by using a different algorithm that is better suited to handling non-stationary environments. Additionally, the player can incorporate a mechanism for learning from past mistakes, such as experience replay, to help the policy recover from large tilts of the pole.
417
- 3. Weighting: The player should focus more on exploitation than exploration in the next episode, as they have already explored different strategies in the previous episodes. However, they should still allocate some time for exploration to gain a better understanding of the dynamics of the game and to try out new strategies for recovering from large til
418
- """,
419
- "answer":
420
- """
421
- Based on the current game state and the suggestions, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
422
- """
423
- },{
424
- "question":
425
- """
426
- State description: The current state is: The cart is positioned at 0.033, with a velocity of 0.05 towards the left. The pole is tilted at 0.02 radians, rotating at 0.03 radians per second towards the right.
427
- Goal description: The goal is to keep the pole balanced upright for as long as possible.
428
- Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
429
- The suggestions are listed below:
430
- 1. For exploration, the player should try to experiment with different action sequences to see if there are any patterns that lead to longer pole balancing times. One way to do this is to randomly select actions for a certain number of steps before returning to the current policy.
431
- 2. To improve policy performance, the player can try incorporating a mechanism for learning from past mistakes, such as experience replay. Additionally, the policy can be adjusted to take into account the increasing velocity of the cart by adding a penalty for large changes in cart velocity.
432
- 3. The player should focus more on exploitation than exploration in the next episode, as they have already tried multiple exploration strategies in previous episodes. However, they should still allocate a small portion of their actions to exploration to avoid getting stuck in a suboptimal policy. A good ratio to start with could be 80% exploitation and 20% exploration.
433
- """,
434
- "answer":
435
- """
436
- Based on the current game state and the suggestions, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
437
- """
438
- },
439
- ]
440
-
441
- class EGGWithoutSuggestions:
442
- def __init__(self):
443
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
444
- {
445
- "question":
446
- """
447
- State description: The current state is: The cart is positioned at -0.053, with a velocity of 0.01 towards the right. The pole is tilted at 0.05 radians, rotating at 0.11 radians per second towards the right.
448
- Goal description: The goal is to keep the pole balanced upright for as long as possible.
449
- Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
450
- The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the policy alternates between moving the cart left and right based on the pole's tilt, but struggles to recover from large tilts of the pole. Additionally, the velocity of the cart increases rapidly towards the end of the game, indicating that the policy may not be able to keep up with the increasing difficulty of the game. To improve performance, the policy can be adjusted to take into account the increasing velocity of the cart or a different algorithm can be used. Incorporating a mechanism for learning from past mistakes, such as experience replay, may also be beneficial.
451
- """,
452
- "answer":
453
- """
454
- Based on the current game state and the insights, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
455
- """
456
- },{
457
- "question":
458
- """
459
- State description: The current state is: The cart is positioned at 0.033, with a velocity of 0.05 towards the left. The pole is tilted at 0.02 radians, rotating at 0.03 radians per second towards the right.
460
- Goal description: The goal is to keep the pole balanced upright for as long as possible.
461
- Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
462
- The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the policy alternates between moving the cart left and right based on the pole's tilt, but struggles to recover from large tilts of the pole. Additionally, the velocity of the cart increases rapidly towards the end of the game, which may indicate that the policy is not able to keep up with the increasing difficulty of the game. To improve performance, the policy can be adjusted to take into account the increasing velocity of the cart or a different algorithm can be used. Incorporating a mechanism for learning from past mistakes, such as experience replay, may also be beneficial.
463
- """,
464
- "answer":
465
- """
466
- Based on the current game state and the insights, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
467
- """
468
- },
469
- ]
 
364
  },
365
  ]
366
 
367
+ class EXE:
368
  def __init__(self):
369
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
370
  {
 
401
  """
402
  },
403
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/task_relevant/classic_control/mountaincar.py CHANGED
@@ -336,7 +336,7 @@ class REFLEXION:
336
  },
337
  ]
338
 
339
- class EGG:
340
  def __init__(self):
341
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
342
  {
@@ -378,74 +378,3 @@ class EGG:
378
  """
379
  },
380
  ]
381
-
382
- class EGGWithoutInsights:
383
- def __init__(self):
384
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
385
- {
386
- "question":
387
- """
388
- State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
389
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
390
- Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
391
- The suggestions are listed below:Suggestion for the next episode:
392
- 1. Exploration: In the next episode, the player should focus on exploring the environment more thoroughly by trying out different actions. Specifically, the player should try accelerating in both directions (left and right) to see if there are any hidden paths that can lead to the goal state more quickly. To encourage exploration, the player can add some randomness to the policy by choosing actions randomly with a certain probability (e.g., 10% of the time).
393
- 2. Exploitation: To improve performance, the player should continue to use the policy that has been successful in the past episodes, which is to constantly accelerate in the direction of the goal, with occasional changes in direction to avoid getting stuck in local minima. However, the player should also try to optimize the policy by adjusting the frequency of changes in direction based on the car's velocity and position. For example, if the car is moving slowly and is far from the goal, the player should consider changing direction more frequently to explore more of the environment.
394
- 3. Exploration vs. Exploitation: The player should balance exploration and exploitation by adjusting the probability of choosing actions randomly. In the beginning of the episode, the player should focus more on exploration by choosing actions randomly with a higher probability
395
- """,
396
- "answer":
397
- """
398
- Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the right (action 3). Therefore, the optimal action to take now is to push the cart to accelerate to the right (action 3).
399
- """
400
- },{
401
- "question":
402
- """
403
- State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
404
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
405
- Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
406
- The suggestions are listed below:
407
- 1. The player should explore different acceleration strategies, such as accelerating in the opposite direction of the goal or accelerating randomly, to gather information on the environment and avoid getting stuck in local minima. This can be done by adding some randomness to the policy or using a different algorithm that balances exploration and exploitation.
408
- 2. To improve policy performance, the player can use the information gathered from exploration to update the value function and adjust the policy accordingly. Additionally, the player can consider using a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, to better capture the dynamics of the environment.
409
- 3. The player should balance exploration and exploitation by allocating a certain percentage of actions to exploration and the remaining percentage to exploitation. This can be done by setting a threshold for the amount of exploration and adjusting it based on the performance of the policy. The player should also monitor the performance of the policy and adjust the balance between exploration and exploitation accordingly.
410
- """,
411
- "answer":
412
- """
413
- Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the left (action 1) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (action 1).
414
- """
415
- },
416
- ]
417
-
418
- class EGGWithoutSuggestions:
419
- def __init__(self):
420
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
421
- {
422
- "question":
423
- """
424
- State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
425
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
426
- Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
427
- The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the car often gets stuck in local minima, the policy of constantly accelerating in the direction of the goal with occasional changes in direction, and the use of exploration techniques to encourage the car to try out different actions and explore the environment more thoroughly. Additionally, a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, and a different algorithm that balances exploration and exploitation more effectively can also improve performance.
428
- """,
429
- "answer":
430
- """
431
- Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the right (action 3). Therefore, the optimal action to take now is to push the cart to accelerate to the right (action 3).
432
- """
433
- },{
434
- "question":
435
- """
436
- State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
437
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
438
- Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
439
- The insights of the game are listed below:
440
- 1. The policy of constantly accelerating towards the goal with occasional changes in direction to avoid local minima is not performing well.
441
- 2. Exploration techniques can be used to encourage the car to try out different actions and explore the environment more thoroughly.
442
- 3. A more sophisticated memory system that takes into account the car's velocity and position can improve the accuracy of the value function.
443
- 4. A different algorithm that balances exploration and exploitation more effectively can also be considered.
444
- 5. The car is able to gain enough momentum to climb the hill towards the goal, but still gets stuck in local minima at times.
445
- """,
446
- "answer":
447
- """
448
- Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the left (action 1) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (action 1).
449
- """
450
- },
451
- ]
 
336
  },
337
  ]
338
 
339
+ class EXE:
340
  def __init__(self):
341
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
342
  {
 
378
  """
379
  },
380
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/task_relevant/classic_control/mountaincarContinuous.py CHANGED
@@ -355,7 +355,7 @@ class REFLEXION:
355
  },
356
  ]
357
 
358
- class EGG:
359
  def __init__(self):
360
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
361
  {
@@ -397,74 +397,3 @@ class EGG:
397
  """
398
  },
399
  ]
400
-
401
- class EGGWithoutInsights:
402
- def __init__(self):
403
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
404
- {
405
- "question":
406
- """
407
- State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
408
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
409
- Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
410
- The suggestions are listed below:Suggestion for the next episode:
411
- 1. Exploration: In the next episode, the player should focus on exploring the environment more thoroughly by trying out different actions. Specifically, the player should try accelerating in both directions (left and right) to see if there are any hidden paths that can lead to the goal state more quickly. To encourage exploration, the player can add some randomness to the policy by choosing actions randomly with a certain probability (e.g., 10% of the time).
412
- 2. Exploitation: To improve performance, the player should continue to use the policy that has been successful in the past episodes, which is to constantly accelerate in the direction of the goal, with occasional changes in direction to avoid getting stuck in local minima. However, the player should also try to optimize the policy by adjusting the frequency of changes in direction based on the car's velocity and position. For example, if the car is moving slowly and is far from the goal, the player should consider changing direction more frequently to explore more of the environment.
413
- 3. Exploration vs. Exploitation: The player should balance exploration and exploitation by adjusting the probability of choosing actions randomly. In the beginning of the episode, the player should focus more on exploration by choosing actions randomly with a higher probability
414
- """,
415
- "answer":
416
- """
417
- Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the right (Action 1.0). Therefore, the optimal action to take now is to push the cart to accelerate to the right (Action 1.0).
418
- """
419
- },{
420
- "question":
421
- """
422
- State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
423
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
424
- Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
425
- The suggestions are listed below:
426
- 1. The player should explore different acceleration strategies, such as accelerating in the opposite direction of the goal or accelerating randomly, to gather information on the environment and avoid getting stuck in local minima. This can be done by adding some randomness to the policy or using a different algorithm that balances exploration and exploitation.
427
- 2. To improve policy performance, the player can use the information gathered from exploration to update the value function and adjust the policy accordingly. Additionally, the player can consider using a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, to better capture the dynamics of the environment.
428
- 3. The player should balance exploration and exploitation by allocating a certain percentage of actions to exploration and the remaining percentage to exploitation. This can be done by setting a threshold for the amount of exploration and adjusting it based on the performance of the policy. The player should also monitor the performance of the policy and adjust the balance between exploration and exploitation accordingly.
429
- """,
430
- "answer":
431
- """
432
- Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the left (Action -1.0) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (Action -1.0).
433
- """
434
- },
435
- ]
436
-
437
- class EGGWithoutSuggestions:
438
- def __init__(self):
439
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
440
- {
441
- "question":
442
- """
443
- State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
444
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
445
- Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
446
- The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the car often gets stuck in local minima, the policy of constantly accelerating in the direction of the goal with occasional changes in direction, and the use of exploration techniques to encourage the car to try out different actions and explore the environment more thoroughly. Additionally, a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, and a different algorithm that balances exploration and exploitation more effectively can also improve performance.
447
- """,
448
- "answer":
449
- """
450
- Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the right (Action 1.0). Therefore, the optimal action to take now is to push the cart to accelerate to the right (Action 1.0).
451
- """
452
- },{
453
- "question":
454
- """
455
- State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
456
- Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
457
- Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
458
- The insights of the game are listed below:
459
- 1. The policy of constantly accelerating towards the goal with occasional changes in direction to avoid local minima is not performing well.
460
- 2. Exploration techniques can be used to encourage the car to try out different actions and explore the environment more thoroughly.
461
- 3. A more sophisticated memory system that takes into account the car's velocity and position can improve the accuracy of the value function.
462
- 4. A different algorithm that balances exploration and exploitation more effectively can also be considered.
463
- 5. The car is able to gain enough momentum to climb the hill towards the goal, but still gets stuck in local minima at times.
464
- """,
465
- "answer":
466
- """
467
- Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the left (Action -1.0) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (Action -1.0).
468
- """
469
- },
470
- ]
 
355
  },
356
  ]
357
 
358
+ class EXE:
359
  def __init__(self):
360
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
361
  {
 
397
  """
398
  },
399
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/task_relevant/toy_text/blackjack.py CHANGED
@@ -374,7 +374,7 @@ class REFLEXION:
374
  }
375
  ]
376
 
377
- class EGG:
378
  def __init__(self):
379
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
380
  {
@@ -411,69 +411,3 @@ class EGG:
411
  """Based on the current game state, the insights and the suggestions, The optimal action is: 1."""
412
  }
413
  ]
414
-
415
- class EGGWithoutInsights:
416
- def __init__(self):
417
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
418
- {
419
- "question":
420
- """
421
- State description: Current Game State: The player's current sum is 6, the dealer is showing 7, and the player has a usable ace: no.
422
- Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
423
- Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
424
- The suggestions are listed below:
425
- 1. For exploration in the next episode, the player should try to observe the dealer's face-up card and use that information to inform their decision-making process. They can do this by hitting until they have a hand value of at least 12, and then observing the dealer's card before deciding whether to hit or stick. This will help the player make more informed decisions and potentially increase their chances of winning.
426
- 2. To improve the policy and gain a higher performance in the next episode, the player should focus on using their usable ace to their advantage. They should aim to use the ace as 11 when their hand value is low (e.g. less than 12) and as 1 when their hand value is high (e.g. more than 17). This will help the player avoid busting and increase their chances of getting closer to 21.
427
- 3. The player should weight exploration and exploitation equally in the next episode. While it is important to explore and gather new information, it is also important to use that information to improve performance. By balancing exploration and exploitation, the player can make more informed decisions and potentially increase their chances of winning.
428
- """,
429
- "answer":
430
- """
431
- Based on the current game state and the suggestions, the player should hit (add a card) as their current sum is very low and they have no usable ace to use to their advantage.
432
- The goal is to get closer to 21 without going over, and hitting will increase the player's chances of achieving this goal.
433
- Therefore, the optimal action for the player is to choose action number 2 to hit.
434
- """
435
- },{
436
- "question":
437
- """
438
- State description: Current Game State: The player's current sum is 20, the dealer is showing 4, and the player has a usable ace: no.
439
- Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
440
- Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
441
- The suggestions are listed below:
442
- 1. Exploration: The player should explore the value of their usable ace in different scenarios. They can try using the ace as 1 or 11 and observe the outcome. To make the exploration, the player can randomly choose to use the ace as 1 or 11 in different game states and record the results.
443
- 2. Exploitation: The player should improve their policy by considering the dealer's face-up card and the likelihood of the dealer having a higher hand. They can also use the information from the previous trajectories to make more informed decisions. For example, if the player has a low hand value and the dealer has a high face-up card, the player may choose to hit to improve their hand. On the other hand, if the player has a high hand value and the dealer has a low face-up card, the player may choose to stick to avoid going over 21.
444
- 3. Weighting: The player should balance exploration and exploitation by allocating more time to exploitation. Since the player only has two episodes left, they should focus on exploiting the information they have gathered so far to increase their chances of winning. However, they should also continue to explore the value of their usable ace in different scenarios to potentially improve their policy in future games.
445
- """,
446
- "answer":
447
- """Based on the current game state and the suggestions, The optimal action is: 1."""
448
- }
449
- ]
450
-
451
- class EGGWithoutSuggestions:
452
- def __init__(self):
453
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
454
- {
455
- "question":
456
- """
457
- State description: Current Game State: The player's current sum is 6, the dealer is showing 7, and the player has a usable ace: no.
458
- Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
459
- Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
460
- The insights of the game are listed below: The key information that can be exploited to improve the performance of the player is the presence of a usable ace in their hand. This information can be used to inform the player's decision-making process and potentially improve their performance by allowing them to make more informed decisions about whether to hit or stick based on the value of their hand. By taking into account the value of the ace, the player can potentially increase their chances of winning the hand.
461
- """,
462
- "answer":
463
- """
464
- Based on the current game state and the insights, the player should hit (add a card) as their current sum is very low and they have no usable ace to use to their advantage.
465
- The goal is to get closer to 21 without going over, and hitting will increase the player's chances of achieving this goal.
466
- Therefore, the optimal action for the player is to choose action number 2 to hit.
467
- """
468
- },{
469
- "question":
470
- """
471
- State description: Current Game State: The player's current sum is 20, the dealer is showing 4, and the player has a usable ace: no.
472
- Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
473
- Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
474
- The insights of the game are listed below: The key information that can be exploited to improve the performance of the player includes the presence of a usable ace, the player's current hand sum, the dealer's face-up card, and the likelihood of the dealer having a higher hand. By taking into account these factors, the player can make more informed decisions on whether to hit or stick, potentially increasing their chances of winning.
475
- """,
476
- "answer":
477
- """Based on the current game state and the insights, The optimal action is: 1."""
478
- }
479
- ]
 
374
  }
375
  ]
376
 
377
+ class EXE:
378
  def __init__(self):
379
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
380
  {
 
411
  """Based on the current game state, the insights and the suggestions, The optimal action is: 1."""
412
  }
413
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/task_relevant/toy_text/cliffwalking.py CHANGED
@@ -365,7 +365,7 @@ class REFLEXION:
365
  },
366
  ]
367
 
368
- class EGG:
369
  def __init__(self):
370
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
371
  {
@@ -408,77 +408,3 @@ class EGG:
408
  },
409
  ]
410
 
411
- class EGGWithoutInsights:
412
- def __init__(self):
413
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
414
- {
415
- "question":
416
- """
417
- State description: Current Game State: The player is at location [2, 11] in the grid world.
418
- Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
419
- Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
420
- The suggestions are listed below:Suggestion for the next episode:
421
- 1. Exploration: The player should explore different paths towards the goal location, rather than always moving in a straight line. This can be done by randomly selecting actions with a certain probability, such as 10%, to encourage the player to try different paths. The player should also try to gather information about the rewards and penalties associated with different actions, especially those near the cliff.
422
- 2. Exploitation: The player should prioritize avoiding the cliff over reaching the goal quickly. This can be done by updating the policy to include a penalty for moving towards the cliff or a reward for moving away from it. The player should also try to incorporate a more cautious approach that considers the potential consequences of each action.
423
- 3. Weighting: The player should balance exploration and exploitation by assigning a higher weight to exploitation. This is because the player has already tried two episodes and needs to focus on improving performance. However, some level of exploration is still necessary to avoid getting stuck in a suboptimal policy. A reasonable weighting could be 70% exploitation and 30% exploration.
424
- """,
425
- "answer":
426
- """
427
- Based on the current game state and the suggestions, the optimal action for the player to take would be to move down (action 3) to reach the goal location at [3, 11]. Therefore, the optimal action to take now is to move down (Action 3).
428
- """
429
- },{
430
- "question":
431
- """
432
- State description: Current Game State: The player is at location [3,0] in the grid world.
433
- Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
434
- Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
435
- The suggestions are listed below:
436
- 1. The player should explore different paths towards the goal location, especially those that are not in a straight line or zigzag pattern. This can be done by randomly selecting actions or by using a more exploratory policy, such as epsilon-greedy or softmax. The player should also pay attention to the rewards and penalties associated with each action, especially those related to cliff avoidance.
437
- 2. To improve performance, the policy should prioritize both reaching the goal location and avoiding the cliff. This can be done by incorporating a penalty for moving towards the cliff or a reward for moving away from it. The policy should also encourage diagonal movements towards the goal location, which would reduce the number of steps needed to reach the goal. Additionally, the policy should include a penalty for falling off the cliff, which would further incentivize the player to avoid it.
438
- 3. The player should balance exploration and exploitation by using a more exploratory policy, such as epsilon-greedy or softmax, with a low exploration rate. This would allow the player to explore different paths towards the goal location while still prioritizing cliff avoidance and efficient movements. The player should also periodically evaluate the policy and adjust the exploration rate accordingly.
439
-
440
- """,
441
- "answer":
442
- """
443
- Based on the current game state and the suggestions, it seems like the player is stuck in a loop and not making any progress towards the goal location.
444
- To improve performance, the player should prioritize both reaching the goal location and avoiding the cliff.
445
- It is recommended to incorporate a penalty for moving towards the cliff or a reward for moving away from it.
446
- The policy should also encourage diagonal movements towards the goal location, which would reduce the number of steps needed to reach the goal.
447
- For the next action, I suggest the player to move up (action 1) to explore a different path towards the goal location.
448
- """
449
- },
450
- ]
451
-
452
- class EGGWithoutSuggestions:
453
- def __init__(self):
454
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
455
- {
456
- "question":
457
- """
458
- State description: Current Game State: The player is at location [2, 11] in the grid world.
459
- Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
460
- Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
461
- The insights of the game are listed below: The key information that can be exploited to improve the player's performance is that they tend to move in a straight line towards the goal location, even if it means getting closer to the cliff. This behavior can be addressed by incorporating a more cautious approach that prioritizes avoiding the cliff over reaching the goal quickly. Additionally, the policy could be updated to encourage exploration of different paths towards the goal location, rather than always moving in a straight line. The negative consequences of not prioritizing cliff avoidance are highlighted in the player's poor performance and multiple penalties.
462
- """,
463
- "answer":
464
- """
465
- Based on the current game state and the insights, the optimal action for the player to take would be to move down (action 3) to reach the goal location at [3, 11]. Therefore, the optimal action to take now is to move down (Action 3).
466
- """
467
- },{
468
- "question":
469
- """
470
- State description: Current Game State: The player is at location [3,0] in the grid world.
471
- Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
472
- Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
473
- The insights of the game are listed below: The key information that can be exploited to improve performance of the player is that they tend to move in a straight line or zigzag pattern towards the goal location, even if it means getting closer to the cliff. This behavior can be improved by incorporating a more cautious approach that prioritizes avoiding the cliff over reaching the goal quickly, and by encouraging exploration of different paths towards the goal location. Additionally, the policy could be updated to include a penalty for falling off the cliff, which would further incentivize the player to avoid it.
474
- """,
475
- "answer":
476
- """
477
- Based on the current game state and the insights, it seems like the player is stuck in a loop and not making any progress towards the goal location.
478
- To improve performance, the player should prioritize both reaching the goal location and avoiding the cliff.
479
- It is recommended to incorporate a penalty for moving towards the cliff or a reward for moving away from it.
480
- The policy should also encourage diagonal movements towards the goal location, which would reduce the number of steps needed to reach the goal.
481
- For the next action, I suggest the player to move up (action 1) to explore a different path towards the goal location.
482
- """
483
- },
484
- ]
 
365
  },
366
  ]
367
 
368
+ class EXE:
369
  def __init__(self):
370
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
371
  {
 
408
  },
409
  ]
410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/task_relevant/toy_text/frozenlake.py CHANGED
@@ -446,7 +446,7 @@ class REFLEXION:
446
  },
447
  ]
448
 
449
- class EGG:
450
  def __init__(self):
451
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
452
  {
@@ -472,52 +472,3 @@ class EGG:
472
  """
473
  },
474
  ]
475
-
476
- class EGGWithoutInsights:
477
- def __init__(self):
478
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
479
- {
480
- "question":
481
- """
482
- State description: Current Game State: The current position of the player is at row 2, column 0.
483
- Goal description: The goal is to navigate across the frozen lake and reach the goal position located at (3,3) without falling into any holes, which are located at (1,1), (1,3), (2,3) and (0,3).
484
- Action description: Your Next Move:
485
- Please choose an action. The possible actions are:
486
- '1': Move left (Decrease the horizontal coordinate by 1)
487
- '2': Move down (Increase the vertical coordinate by 1)
488
- '3': Move right (Increase the horizontal coordinate by 1)
489
- '4': Move up (Decrease the vertical coordinate by 1). Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
490
- The suggestions are listed below:Suggestion for the next episode:
491
- 1. Exploration: The player should explore the environment to understand the slippery nature of the frozen lake. The player can do this by taking random actions and observing the resulting movements. This will help the player to develop a better understanding of the environment and adjust its actions accordingly.
492
- 2. Exploitation: The player should use a Q-learning algorithm to improve its policy. The Q-learning algorithm will help the player to learn from its experiences and develop a better policy over time. The player should also consider the slippery nature of the frozen lake when updating its Q-values.
493
- 3. Weighting: The player should balance exploration and exploitation by using an epsilon-greedy policy. The player should set a high value of epsilon initially to encourage exploration and gradually decrease it over time to encourage exploitation. This will help the player to explore the environment initially and then exploit its knowledge to gain a higher performance.
494
- """,
495
- "answer":
496
- """
497
- Based on the current game state and the suggestions, the optimal action for the player to take would be to move right (action 3) to navigate around the holes and reach the goal position. Therefore, the optimal action to take now is to move right (action 3).
498
- """
499
- },
500
- ]
501
-
502
- class EGGWithoutSuggestions:
503
- def __init__(self):
504
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
505
- {
506
- "question":
507
- """
508
- State description: Current Game State: The current position of the player is at row 2, column 0.
509
- Goal description: The goal is to navigate across the frozen lake and reach the goal position located at (3,3) without falling into any holes, which are located at (1,1), (1,3), (2,3) and (0,3).
510
- Action description: Your Next Move:
511
- Please choose an action. The possible actions are:
512
- '1': Move left (Decrease the horizontal coordinate by 1)
513
- '2': Move down (Increase the vertical coordinate by 1)
514
- '3': Move right (Increase the horizontal coordinate by 1)
515
- '4': Move up (Decrease the vertical coordinate by 1). Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
516
- The insights of the game are listed below: The key information that can be exploited to improve the performance of the player includes the fact that the current policy is random and ineffective, the player is not learning from its experiences, and the slippery nature of the frozen lake is causing unintended movements. To improve performance, the player needs to incorporate a learning algorithm to improve its policy over time and develop a strategy that takes into account the slippery nature of the ice.
517
- """,
518
- "answer":
519
- """
520
- Based on the current game state and the insights, the optimal action for the player to take would be to move right (action 3) to navigate around the holes and reach the goal position. Therefore, the optimal action to take now is to move right (action 3).
521
- """
522
- },
523
- ]
 
446
  },
447
  ]
448
 
449
+ class EXE:
450
  def __init__(self):
451
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
452
  {
 
472
  """
473
  },
474
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts/task_relevant/toy_text/taxi.py CHANGED
@@ -481,7 +481,7 @@ class REFLEXION:
481
  },
482
  ]
483
 
484
- class EGG:
485
  def __init__(self):
486
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
487
  {
@@ -528,81 +528,4 @@ class EGG:
528
  Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
529
  """
530
  },
531
- ]
532
-
533
- class EGGWithoutInsights:
534
- def __init__(self):
535
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
536
- {
537
- "question":
538
- """
539
- State description: Current Game State: Taxi is at Row 0, Col 1. The passenger is at the Green location. The passenger wants to go to the Blue location.
540
- Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
541
- Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
542
- The suggestions are listed below:
543
- 1. The player should explore the environment by trying out different routes to reach the pick-up and drop-off locations. They should also try to identify any patterns in the locations of the pick-up and drop-off points, and use this information to plan their route. The exploration can be done by randomly selecting actions and observing the rewards obtained.
544
- 2. To improve the policy, the player should use a Q-learning algorithm to learn the optimal policy. They should use the information obtained through exploration to update the Q-values for each state-action pair. The player should also incorporate a penalty for repeating the same action multiple times in a row and a heuristic that encourages the taxi to move towards the passenger or destination.
545
- 3. The player should weight the exploration and exploitation differently by using an epsilon-greedy policy. They should set a high value of epsilon initially to encourage exploration and gradually decrease it over time to encourage exploitation. The player should also monitor the performance of the policy and adjust the exploration-exploitation trade-off accordingly.
546
- """,
547
- "answer":
548
- """
549
- Based on the current game state and the suggestions, the optimal action for the player to take would be to move down (action 1) to navigate around the obstacles and pick up the passenager. Therefore, the optimal action to take now is to move down (Action 1).
550
- """
551
- },{
552
- "question":
553
- """
554
- State description: Current Game State: Taxi is at Row 4, Col 4. The passenger is at the Red location. The passenger wants to go to the Green location.
555
- Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
556
- Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
557
- The suggestions are listed below:Suggestion for the next episode:
558
- 1. Exploration: The player should explore the shortest path to reach the pick-up and drop-off points. This can be done by using a heuristic algorithm such as A* search to find the shortest path. The player should also explore the penalty for repeating the same action multiple times in a row to avoid getting stuck in a loop.
559
- 2. Exploitation: The player should use a Q-learning algorithm to improve the policy. The Q-learning algorithm should take into account the shortest path to reach the pick-up and drop-off points, as well as the penalty for repeating the same action multiple times in a row. The player should also incorporate a heuristic that encourages the taxi to move towards the passenger or destination.
560
- 3. Weighting: The player should focus more on exploitation than exploration in this episode, as they have already explored the random policy in the previous episodes. The player should use the information obtained from the exploration to improve the policy and gain a higher performance. However, the player should still explore the penalty for repeating the same action multiple times in a row to avoid getting stuck in a loop.
561
- """,
562
- "answer":
563
- """
564
- Based on the current game state and the suggestions, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
565
- """
566
- },
567
- ]
568
-
569
- class EGGWithoutSuggestions:
570
- def __init__(self):
571
- self.PERCEPTRON_BASIC_FS_EXAMPLES = [
572
- {
573
- "question":
574
- """
575
- State description: Current Game State: Taxi is at Row 0, Col 1. The passenger is at the Green location. The passenger wants to go to the Blue location.
576
- Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
577
- Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
578
- The insights of the game are listed below:
579
- 1. The current policy is random and not using any information about the locations of the pick-up and drop-off points or the shortest path to reach them.
580
- 2. The taxi is often stuck in a loop of repeatedly taking the same action, which could be addressed by implementing a penalty for repeating the same action multiple times in a row.
581
- 3. The taxi often moves in the opposite direction of the passenger or destination, which could be addressed by incorporating a heuristic that encourages the taxi to move towards the passenger or destination.
582
- 4. The performance of the current policy is very poor, as indicated by the negative rewards received in each episode.
583
- 5. A more informed policy, such as a Q-learning algorithm, could be used to improve performance.
584
- """,
585
- "answer":
586
- """
587
- Based on the current game state and the insights, the optimal action for the player to take would be to move down (action 1) to navigate around the obstacles and pick up the passenager. Therefore, the optimal action to take now is to move down (Action 1).
588
- """
589
- },{
590
- "question":
591
- """
592
- State description: Current Game State: Taxi is at Row 4, Col 4. The passenger is at the Red location. The passenger wants to go to the Green location.
593
- Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
594
- Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
595
- The insights of the game are listed below:
596
- 1. The current policy is random and not using any information about the locations of the pick-up and drop-off points or the shortest path to reach them.
597
- 2. The taxi is often stuck in a loop of repeating the same action multiple times in a row.
598
- 3. The taxi often moves in the opposite direction of the passenger or destination.
599
- 4. A more informed policy, such as a Q-learning algorithm, could be used to improve performance.
600
- 5. Implementing a penalty for repeating the same action multiple times in a row could address the issue of being stuck in a loop.
601
- 6. Incorporating a heuristic that encourages the taxi to move towards the passenger or destination could address the issue of moving in the opposite direction.
602
- """,
603
- "answer":
604
- """
605
- Based on the current game state and the insights, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
606
- """
607
- },
608
  ]
 
481
  },
482
  ]
483
 
484
+ class EXE:
485
  def __init__(self):
486
  self.PERCEPTRON_BASIC_FS_EXAMPLES = [
487
  {
 
528
  Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
529
  """
530
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  ]
record_reflexion.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ env,level,decider,avg_score
2
+ CartPole-v0,1,expert,200.0
3
+ LunarLander-v2,1,expert,200.0
4
+ Acrobot-v1,1,expert,200.0
5
+ MountainCar-v0,1,expert,200.0
6
+ Blackjack-v1,1,expert,200.0
7
+ Taxi-v3,1,expert,200.0
8
+ CliffWalking-v0,1,expert,200.0
9
+ FrozenLake-v1,1,expert,200.0
10
+ MountainCarContinuous-v0,1,expert,200.0
11
+
requirements.txt DELETED
@@ -1,98 +0,0 @@
1
- absl-py==1.4.0
2
- aiohttp==3.8.4
3
- ale-py==0.8.1
4
- annotated-types==0.5.0
5
- appdirs==1.4.4
6
- beautifulsoup4==4.12.2
7
- box2d-py==2.3.5
8
- cachetools==5.3.1
9
- cchardet==2.1.7
10
- charset-normalizer==3.1.0
11
- click==8.1.3
12
- cloudpickle==2.2.1
13
- contourpy==1.1.0
14
- cycler==0.11.0
15
- cython==3.0.1
16
- dataclasses-json==0.5.14
17
- decorator==4.4.2
18
- docker-pycreds==0.4.0
19
- fasteners==0.18
20
- filelock==3.12.2
21
- fonttools==4.40.0
22
- fsspec==2023.6.0
23
- gitdb==4.0.10
24
- gitpython==3.1.31
25
- glfw==2.6.2
26
- google-auth==2.21.0
27
- google-auth-oauthlib==1.0.0
28
- greenlet==2.0.2
29
- grpcio==1.56.0
30
- gym[box2d]==0.26.2
31
- gym-notices==0.0.8
32
- h5py==3.9.0
33
- huggingface-hub==0.15.1
34
- imageio==2.31.2
35
- imageio-ffmpeg==0.4.8
36
- importlib-metadata==6.6.0
37
- importlib-resources==5.12.0
38
- iniconfig==2.0.0
39
- kiwisolver==1.4.4
40
- langchain==0.0.284
41
- langsmith==0.0.33
42
- llvmlite==0.40.1
43
- lz4==4.3.2
44
- markdown==3.4.3
45
- markupsafe==2.1.1
46
- marshmallow==3.20.1
47
- matplotlib==3.7.1
48
- moviepy==1.0.3
49
- mujoco==2.2.0
50
- mujoco-py==2.1.2.14
51
- multidict==6.0.4
52
- numba==0.57.1
53
- numexpr==2.8.5
54
- numpy==1.24.4
55
- oauthlib==3.2.2
56
- openai==0.27.8
57
- opencv-python==4.8.0.76
58
- pathtools==0.1.2
59
- pillow==9.5.0
60
- pluggy==1.2.0
61
- proglog==0.1.10
62
- protobuf==3.19.6
63
- py==1.11.0
64
- pyasn1==0.5.0
65
- pyasn1-modules==0.3.0
66
- pydantic==2.3.0
67
- pydantic-core==2.6.3
68
- pyopengl==3.1.7
69
- pyparsing==3.0.9
70
- pytest==7.0.1
71
- regex==2023.6.3
72
- requests==2.31.0
73
- requests-oauthlib==1.3.1
74
- rsa==4.9
75
- safetensors==0.3.1
76
- sentry-sdk==1.26.0
77
- setproctitle==1.3.2
78
- smmap==5.0.0
79
- soupsieve==2.4.1
80
- sqlalchemy==2.0.20
81
- swig==4.1.1
82
- tenacity==8.2.3
83
- tensorboard==2.14.0
84
- tensorboard-data-server==0.7.1
85
- tianshou==0.4.10
86
- tokenizers==0.13.3
87
- tqdm==4.65.0
88
- transformers==4.30.2
89
- typing==3.7.4.3
90
- typing-extensions==4.7.1
91
- typing-inspect==0.9.0
92
- urllib3
93
- v==1
94
- wandb==0.15.4
95
- werkzeug==2.3.6
96
- yarl==1.9.2
97
- zipp==3.15.0
98
- aquarel==0.0.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
shell/test_acrobot.sh CHANGED
@@ -3,42 +3,42 @@
3
  # Naive Actor
4
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
6
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
7
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
8
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # PAL
11
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
13
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
14
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
15
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
16
 
17
  # COT
18
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
20
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
21
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
22
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
23
 
24
  # self consistency
25
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
27
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
28
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
29
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
30
 
31
  # self-ask
32
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
34
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
35
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
36
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
37
 
38
  # SPP
39
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
40
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
41
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
42
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
43
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
44
 
@@ -49,9 +49,9 @@ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_tr
49
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
50
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
51
 
52
- # Jarvis
53
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
54
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
55
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
56
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
57
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
3
  # Naive Actor
4
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
6
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
8
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # PAL
11
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
13
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
14
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
15
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
16
 
17
  # COT
18
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
20
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
21
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
22
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
23
 
24
  # self consistency
25
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
27
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
28
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
29
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
30
 
31
  # self-ask
32
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
34
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
35
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
36
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
37
 
38
  # SPP
39
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
40
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
41
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
42
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
43
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
44
 
 
49
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
50
  python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
51
 
52
+ # exe
53
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
54
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
55
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
56
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
57
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_blackjack.sh CHANGED
@@ -3,34 +3,34 @@
3
  # Naive Actor
4
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
6
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
7
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
8
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # COT
11
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
13
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
14
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
15
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
16
 
17
  # self consistency
18
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
20
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
21
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
22
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
23
 
24
  # self-ask
25
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
27
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
28
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
29
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
30
  # SPP
31
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
32
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
33
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
34
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
35
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
36
 
@@ -41,9 +41,9 @@ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_ini
41
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
42
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
43
 
44
- # Jarvis
45
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
46
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
47
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
48
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
49
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
3
  # Naive Actor
4
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
6
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
8
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # COT
11
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
13
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
14
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
15
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
16
 
17
  # self consistency
18
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
20
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
21
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
22
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
23
 
24
  # self-ask
25
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
27
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
28
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
29
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
30
  # SPP
31
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
32
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
33
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
34
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
35
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
36
 
 
41
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
42
  python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
43
 
44
+ # exe
45
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
46
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
47
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
48
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
49
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_cartpole.sh CHANGED
@@ -3,42 +3,35 @@
3
  # Naive Actor
4
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
6
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
7
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
8
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
- # PAL
11
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
12
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
13
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
14
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
15
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
16
-
17
  # COT
18
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
20
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
21
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
22
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
23
 
24
  # self consistency
25
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
27
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
28
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
29
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
30
 
31
  # self-ask
32
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
34
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
35
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
36
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
37
 
38
  # SPP
39
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
40
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
41
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
42
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
43
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
44
 
@@ -49,9 +42,9 @@ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_tran
49
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
50
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
51
 
52
- # Jarvis
53
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
54
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
55
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
56
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
57
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
3
  # Naive Actor
4
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
6
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
8
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
 
 
 
 
 
 
 
10
  # COT
11
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
13
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
14
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
15
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
16
 
17
  # self consistency
18
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
20
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
21
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
22
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
23
 
24
  # self-ask
25
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
27
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
28
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
29
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
30
 
31
  # SPP
32
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
34
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
35
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
36
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
37
 
 
42
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
43
  python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
44
 
45
+ # exe
46
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
47
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
48
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
49
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
50
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_cliffwalking.sh CHANGED
@@ -3,42 +3,42 @@
3
  # Naive Actor
4
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
6
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
7
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
8
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # PAL
11
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
13
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
14
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
15
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
16
 
17
  # COT
18
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
20
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
21
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
22
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
23
 
24
  # self consistency
25
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
27
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
28
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
29
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
30
 
31
  # self-ask
32
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
34
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
35
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
36
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
37
 
38
  # SPP
39
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
40
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
41
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
42
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
43
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
44
 
@@ -49,9 +49,9 @@ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalki
49
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
50
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
51
 
52
- # Jarvis
53
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
54
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
55
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
56
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
57
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
3
  # Naive Actor
4
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
6
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
8
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # PAL
11
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
13
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
14
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
15
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
16
 
17
  # COT
18
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
20
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
21
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
22
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
23
 
24
  # self consistency
25
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
27
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
28
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
29
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
30
 
31
  # self-ask
32
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
34
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
35
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
36
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
37
 
38
  # SPP
39
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
40
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
41
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
42
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
43
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
44
 
 
49
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
50
  python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
51
 
52
+ # exe
53
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
54
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
55
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
56
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
57
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/{test_jarvis.sh → test_exe.sh} RENAMED
@@ -1,55 +1,55 @@
1
  # Acrobot-v1
2
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
3
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
4
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
5
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
6
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
7
 
8
  # Blackjack-v1
9
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 50 --distiller guide_generator
10
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
11
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 50 --distiller guide_generator
12
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
13
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 50 --distiller guide_generator
14
 
15
  # CartPole-v0
16
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
17
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
18
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
19
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
20
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
21
 
22
  # CliffWalking-v0
23
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
24
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
25
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
26
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
27
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
28
 
29
  # LunarLander-v2
30
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
31
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
32
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
33
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
34
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
35
 
36
  # MountainCar-v0
37
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
38
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
39
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
40
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
41
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
42
 
43
  # MountainCarContinuous-v0
44
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
45
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
46
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
47
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
48
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
49
 
50
  # Taxi-v3
51
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
52
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
53
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
54
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
55
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
1
  # Acrobot-v1
2
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
3
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
4
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
5
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
6
+ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
7
 
8
  # Blackjack-v1
9
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 1 --num_trails 5 --distiller guide_generator
10
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 2 --num_trails 5 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
11
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
12
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 4 --num_trails 5 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
13
+ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 5 --num_trails 5 --distiller guide_generator
14
 
15
  # CartPole-v0
16
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
17
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
18
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
19
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
20
+ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
21
 
22
  # CliffWalking-v0
23
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
24
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
25
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
26
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
27
+ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
28
 
29
  # LunarLander-v2
30
+ python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
31
+ python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander"
32
+ python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
33
+ python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander"
34
+ python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
35
 
36
  # MountainCar-v0
37
+ python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
38
+ python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
39
+ python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
40
+ python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
41
+ python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
42
 
43
  # MountainCarContinuous-v0
44
+ python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
45
+ python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
46
+ python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
47
+ python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
48
+ python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
49
 
50
  # Taxi-v3
51
+ python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
52
+ python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
53
+ python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
54
+ python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
55
+ python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_frozenlake.sh CHANGED
@@ -3,42 +3,42 @@
3
  # Naive Actor
4
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
6
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
7
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
8
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # PAL
11
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
13
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
14
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
15
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
16
 
17
  # COT
18
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
20
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
21
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
22
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
23
 
24
  # self consistency
25
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
27
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
28
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
29
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
30
 
31
  # self-ask
32
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
34
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
35
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
36
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
37
 
38
  # SPP
39
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
40
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
41
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
42
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
43
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
44
 
@@ -49,9 +49,9 @@ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_i
49
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
50
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
51
 
52
- # Jarvis
53
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
54
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
55
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
56
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
57
- python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 
3
  # Naive Actor
4
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
5
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
6
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
8
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
9
 
10
  # PAL
11
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
12
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
13
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
14
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
15
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
16
 
17
  # COT
18
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
19
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
20
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
21
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
22
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
23
 
24
  # self consistency
25
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
26
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
27
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
28
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
29
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
30
 
31
  # self-ask
32
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
33
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
34
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
35
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
36
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
37
 
38
  # SPP
39
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
40
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
41
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
42
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
43
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
44
 
 
49
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
50
  python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
51
 
52
+ # exe
53
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
54
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
55
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
56
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
57
+ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
shell/test_jarvis_woi.sh DELETED
@@ -1,55 +0,0 @@
1
- # Acrobot-v1
2
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
3
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
4
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
5
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
6
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
7
-
8
- # Blackjack-v1
9
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 50 --distiller guide_generator
10
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
11
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 50 --distiller guide_generator
12
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
13
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 50 --distiller guide_generator
14
-
15
- # CartPole-v0
16
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
17
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
18
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
19
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
20
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
21
-
22
- # CliffWalking-v0
23
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
24
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
25
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
26
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
27
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
28
-
29
- # LunarLander-v2
30
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
31
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
32
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
33
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
34
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
35
-
36
- # MountainCar-v0
37
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
38
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
39
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
40
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
41
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
42
-
43
- # MountainCarContinuous-v0
44
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
45
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
46
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
47
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
48
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
49
-
50
- # Taxi-v3
51
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
52
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
53
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
54
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
55
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
shell/test_jarvis_wosh.sh DELETED
@@ -1,55 +0,0 @@
1
- # Acrobot-v1
2
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
3
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
4
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
5
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
6
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
7
-
8
- # Blackjack-v1
9
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 50 --distiller guide_generator
10
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
11
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 50 --distiller guide_generator
12
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
13
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 50 --distiller guide_generator
14
-
15
- # CartPole-v0
16
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
17
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
18
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
19
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
20
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
21
-
22
- # CliffWalking-v0
23
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
24
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
25
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
26
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
27
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
28
-
29
- # LunarLander-v2
30
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
31
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
32
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
33
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
34
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
35
-
36
- # MountainCar-v0
37
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
38
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
39
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
40
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
41
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
42
-
43
- # MountainCarContinuous-v0
44
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
45
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
46
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
47
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
48
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
49
-
50
- # Taxi-v3
51
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
52
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
53
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
54
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
55
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
shell/test_jarvis_wosug.sh DELETED
@@ -1,55 +0,0 @@
1
- # Acrobot-v1
2
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
3
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
4
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
5
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
6
- python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
7
-
8
- # Blackjack-v1
9
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 50 --distiller guide_generator
10
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
11
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 50 --distiller guide_generator
12
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
13
- python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 50 --distiller guide_generator
14
-
15
- # CartPole-v0
16
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
17
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
18
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
19
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
20
- python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
21
-
22
- # CliffWalking-v0
23
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
24
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
25
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
26
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
27
- python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
28
-
29
- # LunarLander-v2
30
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
31
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
32
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
33
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
34
- python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0 --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
35
-
36
- # MountainCar-v0
37
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
38
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
39
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
40
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
41
- python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
42
-
43
- # MountainCarContinuous-v0
44
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
45
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
46
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
47
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
48
- python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
49
-
50
- # Taxi-v3
51
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
52
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
53
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
54
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
55
- python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator