Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

hzxwonder commited on Nov 24, 2023

Commit

65ee2b8

1 Parent(s): 8c8cf65

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

deciders/__init__.py +2 -10
deciders/act.py +22 -20
deciders/cot.py +1 -1
deciders/{jarvis.py → exe.py} +29 -24
deciders/jarvis_without_insights.py +0 -179
deciders/jarvis_without_shortmem.py +0 -182
deciders/jarvis_without_suggestions.py +0 -180
deciders/jarvis_without_summary.py +0 -179
deciders/pal.py +0 -149
deciders/reflexion.py +3 -3
deciders/self_consistency.py +2 -2
deciders/selfask.py +1 -1
deciders/spp.py +1 -1
deciders/utils.py +12 -17
distillers/guider.py +12 -43
distillers/self_reflection.py +10 -4
distillers/traj_prompt_summarizer.py +20 -6
draw_overall_performance.py +0 -59
environment.yml → environment.yaml +96 -115
envs/classic_control/acrobot_policies.py +1 -1
envs/toy_text/few_shot_examples/cliffwalking_l2.json +0 -0
envs/toy_text/few_shot_examples/cliffwalking_l4.json +0 -0
gen_examples.sh +0 -55
gen_few_shots_examples.py +0 -269
main_merge.py +0 -365
main_merge.sh +0 -123
main_reflexion.py +17 -67
memory/env_history.py +3 -2
prompts/__init__.py +10 -41
prompts/task_irrelevant/prompts.py +1 -1
prompts/task_relevant/box2d/LunarLander.py +1 -77
prompts/task_relevant/classic_control/acrobot.py +1 -89
prompts/task_relevant/classic_control/cartpole.py +1 -67
prompts/task_relevant/classic_control/mountaincar.py +1 -72
prompts/task_relevant/classic_control/mountaincarContinuous.py +1 -72
prompts/task_relevant/toy_text/blackjack.py +1 -67
prompts/task_relevant/toy_text/cliffwalking.py +1 -75
prompts/task_relevant/toy_text/frozenlake.py +1 -50
prompts/task_relevant/toy_text/taxi.py +1 -78
record_reflexion.csv +11 -0
requirements.txt +0 -98
shell/test_acrobot.sh +12 -12
shell/test_blackjack.sh +11 -11
shell/test_cartpole.sh +11 -18
shell/test_cliffwalking.sh +12 -12
shell/{test_jarvis.sh → test_exe.sh} +40 -40
shell/test_frozenlake.sh +12 -12
shell/test_jarvis_woi.sh +0 -55
shell/test_jarvis_wosh.sh +0 -55
shell/test_jarvis_wosug.sh +0 -55

deciders/__init__.py CHANGED Viewed

@@ -1,26 +1,18 @@
 from .act import NaiveAct, RandomAct
 from .selfask import SelfAskAct
-from .pal import PAL
 from .cot import ChainOfThought
 from .self_consistency import SelfConsistency
 from .spp import SPP
 from .reflexion import Reflexion
-from .jarvis import Jarvis
-from .jarvis_without_insights import JarvisWithoutInsight
-from .jarvis_without_suggestions import JarvisWithoutSuggestions
-from .jarvis_without_shortmem import JarvisWithoutShortMem
 REGISTRY = {}
 REGISTRY['random_actor'] = RandomAct
 REGISTRY['naive_actor'] = NaiveAct
 REGISTRY['selfask_actor'] = SelfAskAct
-REGISTRY['pal_actor'] = PAL
 REGISTRY['cot_actor'] = ChainOfThought
 REGISTRY['self_consistency_actor'] = SelfConsistency
 REGISTRY['spp_actor'] = SPP
 REGISTRY['reflexion_actor'] = Reflexion
-REGISTRY['jarvis_actor'] = Jarvis
-REGISTRY['jarvis_actor_woi'] = JarvisWithoutInsight
-REGISTRY['jarvis_actor_wosug'] = JarvisWithoutSuggestions
-REGISTRY['jarvis_actor_wosh'] = JarvisWithoutShortMem

 from .act import NaiveAct, RandomAct
 from .selfask import SelfAskAct
 from .cot import ChainOfThought
 from .self_consistency import SelfConsistency
 from .spp import SPP
 from .reflexion import Reflexion
+from .exe import EXE
 REGISTRY = {}
 REGISTRY['random_actor'] = RandomAct
 REGISTRY['naive_actor'] = NaiveAct
 REGISTRY['selfask_actor'] = SelfAskAct
 REGISTRY['cot_actor'] = ChainOfThought
 REGISTRY['self_consistency_actor'] = SelfConsistency
 REGISTRY['spp_actor'] = SPP
 REGISTRY['reflexion_actor'] = Reflexion
+REGISTRY['exe_actor'] = EXE

deciders/act.py CHANGED Viewed

@@ -6,7 +6,7 @@ from loguru import logger
 from .parser import PARSERS
 from langchain.output_parsers import PydanticOutputParser
 from langchain.output_parsers import OutputFixingParser
-from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
 from memory.env_history import EnvironmentHistory
 import tiktoken
 import json
@@ -21,7 +21,7 @@ class RandomAct():
         return self.action_space.sample()+1, '', '', '', 0, 0
 class NaiveAct(gpt):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.0, max_tokens=512, logger=None):
         self.action_space = action_space
         self.temperature = temperature
         self.action_desc_dict = args.action_desc_dict
@@ -48,7 +48,7 @@ class NaiveAct(gpt):
             self.memory = self.summarized_fewshot_example
         if args.use_short_mem == 1:
             self.use_short_mem = True
-            self.mem_num = self.args.trajectories_num
         else:
             self.use_short_mem = False
             self.mem_num = 0
@@ -74,10 +74,11 @@ class NaiveAct(gpt):
         self.env_history.reset()
     def clear_mem(self):
         self.pre_memory = []
         self.post_memory = []
         self.is_first = True
-        self._update_mem(None)
     def _parser_initialization(self):
@@ -87,16 +88,15 @@ class NaiveAct(gpt):
         else:
             num_action = 1
-        # autofixing_chat = AzureChatOpenAI(
-        #     openai_api_type=openai.api_type,
-        #     openai_api_version=openai.api_version,
-        #     openai_api_base=openai.api_base,
-        #     openai_api_key=openai.api_key,
-        #     deployment_name="gpt-35-turbo",
-        #     temperature=self.temperature,
-        #     max_tokens=self.max_tokens
-        # )
-        autofixing_chat = ChatOpenAI(temperature=0, openai_api_key=openai.api_key)
         parser = PydanticOutputParser(pydantic_object=PARSERS[num_action])
         autofixing_parser = OutputFixingParser.from_llm(
@@ -127,13 +127,14 @@ class NaiveAct(gpt):
                 for i, transition in enumerate(traj):
                     traj_text += transition['observation']
                     traj_text += f"> {transition['action']}"
                     one_traj_token = self.num_tokens_from_string(traj_text)
-                    if one_traj_token > 5000:
                         max_step_num = i+1
                         break
                 traj_text += f"Your performance is: {transition['cum_reward']}"
             if not max_step_num:
-                max_step_num = 200
             self.summarized_fewshot_example = self.distiller.generate_from_file(json_file,max_step_num=max_step_num)
     def response(self, state_description, action_description, env_info, game_description=None, goal_description=None, fewshot_examples=None):
@@ -155,10 +156,11 @@ class NaiveAct(gpt):
         self.game_description = game_description
         self.goal_description = goal_description
         self.env_history.add("observation", state_description)
-        # print(self.env_history)
-        if len(self.env_history) >= 2:
             one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
-            self.env_history.set_history(6000 // one_history_token)
     def act(self, state_description, action_description, env_info, game_description=None, goal_description=None, logfile=None):
         self._add_history_before_action(game_description, goal_description, state_description)
@@ -192,7 +194,7 @@ class NaiveAct(gpt):
         if self.use_short_mem:
             if len(self.env_history) > 1:
                 my_mem += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
-                my_mem += f"\nBelow are the latest {min(self.args.short_mem_num,len(self.env_history)//2)} historical data entries:\n"
                 my_mem += f"{self.env_history.get_histories(self.mem_num)}"
         while asking_round < 3:

 from .parser import PARSERS
 from langchain.output_parsers import PydanticOutputParser
 from langchain.output_parsers import OutputFixingParser
+from langchain.chat_models import AzureChatOpenAI
 from memory.env_history import EnvironmentHistory
 import tiktoken
 import json
         return self.action_space.sample()+1, '', '', '', 0, 0
 class NaiveAct(gpt):
+    def __init__(self, action_space, args, prompts, distiller, temperature=0.0, max_tokens=2048, logger=None):
         self.action_space = action_space
         self.temperature = temperature
         self.action_desc_dict = args.action_desc_dict
             self.memory = self.summarized_fewshot_example
         if args.use_short_mem == 1:
             self.use_short_mem = True
+            self.mem_num = self.args.short_mem_num
         else:
             self.use_short_mem = False
             self.mem_num = 0
         self.env_history.reset()
     def clear_mem(self):
+        self.update_mem()
         self.pre_memory = []
         self.post_memory = []
         self.is_first = True
+        self.env_history.reset()
     def _parser_initialization(self):
         else:
             num_action = 1
+        autofixing_chat = AzureChatOpenAI(
+            openai_api_type=openai.api_type,
+            openai_api_version=openai.api_version,
+            openai_api_base=openai.api_base,
+            openai_api_key=openai.api_key,
+            deployment_name=self.args.gpt_version,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens
+        )
         parser = PydanticOutputParser(pydantic_object=PARSERS[num_action])
         autofixing_parser = OutputFixingParser.from_llm(
                 for i, transition in enumerate(traj):
                     traj_text += transition['observation']
                     traj_text += f"> {transition['action']}"
+                    traj_text += f"{transition.get('reward','')}\n"
                     one_traj_token = self.num_tokens_from_string(traj_text)
+                    if one_traj_token > self.args.max_query_tokens:
                         max_step_num = i+1
                         break
                 traj_text += f"Your performance is: {transition['cum_reward']}"
             if not max_step_num:
+                max_step_num = self.args.max_episode_len
             self.summarized_fewshot_example = self.distiller.generate_from_file(json_file,max_step_num=max_step_num)
     def response(self, state_description, action_description, env_info, game_description=None, goal_description=None, fewshot_examples=None):
         self.game_description = game_description
         self.goal_description = goal_description
         self.env_history.add("observation", state_description)
+        # limit the token used, or it may exceed the max token
+        if len(self.env_history):
             one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
+            self.env_history.set_history(self.args.max_query_tokens // one_history_token)
     def act(self, state_description, action_description, env_info, game_description=None, goal_description=None, logfile=None):
         self._add_history_before_action(game_description, goal_description, state_description)
         if self.use_short_mem:
             if len(self.env_history) > 1:
                 my_mem += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
+                my_mem += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
                 my_mem += f"{self.env_history.get_histories(self.mem_num)}"
         while asking_round < 3:

deciders/cot.py CHANGED Viewed

@@ -88,7 +88,7 @@ class ChainOfThought(NaiveAct):
             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
-                human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
+                human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

deciders/{jarvis.py → exe.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 import openai
 from .misc import history_to_str
-from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
 from langchain.prompts.chat import (
     PromptTemplate,
     ChatPromptTemplate,
@@ -19,7 +19,7 @@ from loguru import logger
-class Jarvis(NaiveAct):
     def __init__(self, action_space, args, prompts, distiller, temperature=0., max_tokens=None, logger=None, fixed_suggestion=None, fixed_insight=None):
         super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
         self.pre_memory = []
@@ -30,8 +30,7 @@ class Jarvis(NaiveAct):
         self.goal_description = args.goal_description
         self.action_description = args.action_description
         self.action_desc_dict = args.action_desc_dict
-        self.mem_num = args.trajectories_num
-        self.temperature = temperature
         self.fixed_suggestion = fixed_suggestion
         self.fixed_insight = fixed_insight
         self._update_mem(None)
@@ -50,10 +49,12 @@ class Jarvis(NaiveAct):
         self._update_mem(traj)
     def clear_mem(self):
         self.pre_memory = []
         self.post_memory = []
         self.is_first = True
-        self._update_mem(None)
     def _update_mem(self, traj):
         if self.memory:
@@ -82,6 +83,7 @@ class Jarvis(NaiveAct):
             insight_str += f"{self.insight}\n"
         suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
         return insight_str + suggestion_str
     def act(
         self,
         state_description,
@@ -94,7 +96,15 @@ class Jarvis(NaiveAct):
         self.game_description = game_description
         self.goal_description = goal_description
         self.env_history.add("observation", state_description)
-        chat = ChatOpenAI(temperature=0.5, openai_api_key=openai.api_key, model=self.args.gpt_version)
         # print(self.logger)
         reply_format_description = \
             "Your response should choose an optimal action from valid action list, and terminated with following format: "
@@ -107,8 +117,8 @@ class Jarvis(NaiveAct):
             for examples in self.irr_few_shot_examples:
                 template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
-        template += "\n\nNow you are in the task."
-        template += " {game_description} {action_description} {goal_description}"
         template += "You are observing something and  " \
                 "you need to choose the optimal action acoordingly."
         template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
@@ -116,7 +126,7 @@ class Jarvis(NaiveAct):
         template += self._read_mem()
         system_message_prompt = SystemMessagePromptTemplate.from_template(template)
-        short_memory_template = HumanMessagePromptTemplate.from_template("{history}")
         chat_prompt = ChatPromptTemplate.from_messages(
             [system_message_prompt, short_memory_template])
         if self.logger:
@@ -130,12 +140,7 @@ class Jarvis(NaiveAct):
         handler = FileCallbackHandler(logfile)
         total_tokens, total_cost = 0, 0
         max_think_times = 1
-        # TODO: ADD REACT Support
-        # print(str(self.env_history))
-        if self.use_short_mem:
-            my_history = str(self.env_history)
-        else:
-            my_history = ""
         for i_think in range(max_think_times):
             # chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=True)
             chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
@@ -145,11 +150,11 @@ class Jarvis(NaiveAct):
                     game_description=game_description,
                     goal_description=goal_description,
                     action_description=action_description,
-                    # state_description = self.env_history.get_last_history(),
-                    history=self.env_history.get_histories_with_last(self.mem_num),
                     format_instructions=self.parser.get_format_instructions(),
                     reply_format_description=reply_format_description,
-                    max_token=3000
                 )
                 total_tokens += cb.total_tokens
@@ -166,12 +171,12 @@ class Jarvis(NaiveAct):
             self.logger.info(f'History: {history_to_str(env_info["history"])}')
         text_prompt = chat_prompt.format_messages(
             game_description=game_description,
-                    goal_description=goal_description,
-                    action_description=action_description,
-                    # state_description = self.env_history.get_last_history(),
-                    history=self.env_history.get_histories_with_last(self.mem_num),
-                    format_instructions=self.parser.get_format_instructions(),
-                    reply_format_description=reply_format_description,
         )
         text_prompt = f'{text_prompt[0].content}\n{text_prompt[1].content}'
         return action, text_prompt, response, total_tokens, total_cost

 import openai
 from .misc import history_to_str
+from langchain.chat_models import AzureChatOpenAI
 from langchain.prompts.chat import (
     PromptTemplate,
     ChatPromptTemplate,
+class EXE(NaiveAct):
     def __init__(self, action_space, args, prompts, distiller, temperature=0., max_tokens=None, logger=None, fixed_suggestion=None, fixed_insight=None):
         super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
         self.pre_memory = []
         self.goal_description = args.goal_description
         self.action_description = args.action_description
         self.action_desc_dict = args.action_desc_dict
+        self.mem_num = args.short_mem_num
         self.fixed_suggestion = fixed_suggestion
         self.fixed_insight = fixed_insight
         self._update_mem(None)
         self._update_mem(traj)
     def clear_mem(self):
+        self.update_mem()
         self.pre_memory = []
         self.post_memory = []
         self.is_first = True
+        self.env_history.reset()
+        # self._update_mem(None)
     def _update_mem(self, traj):
         if self.memory:
             insight_str += f"{self.insight}\n"
         suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
         return insight_str + suggestion_str
     def act(
         self,
         state_description,
         self.game_description = game_description
         self.goal_description = goal_description
         self.env_history.add("observation", state_description)
+        chat = AzureChatOpenAI(
+            openai_api_type=openai.api_type,
+            openai_api_version=openai.api_version,
+            openai_api_base=openai.api_base,
+            openai_api_key=openai.api_key,
+            deployment_name=self.args.gpt_version,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens,
+        )
         # print(self.logger)
         reply_format_description = \
             "Your response should choose an optimal action from valid action list, and terminated with following format: "
             for examples in self.irr_few_shot_examples:
                 template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
+        template += "\n\nNow you are in the task.\n"
+        template += " {game_description}\n{action_description}\n{goal_description}"
         template += "You are observing something and  " \
                 "you need to choose the optimal action acoordingly."
         template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
         template += self._read_mem()
         system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+        short_memory_template = HumanMessagePromptTemplate.from_template("{history}\nNext is the observation that the agent gets:\n{state_description}Please select an optimal action to gain higher rewards based on the current state and history. The action description is below: {action_description}. Please think step by step.")
         chat_prompt = ChatPromptTemplate.from_messages(
             [system_message_prompt, short_memory_template])
         if self.logger:
         handler = FileCallbackHandler(logfile)
         total_tokens, total_cost = 0, 0
         max_think_times = 1
         for i_think in range(max_think_times):
             # chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=True)
             chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
                     game_description=game_description,
                     goal_description=goal_description,
                     action_description=action_description,
+                    state_description = self.env_history.get_last_history(),
+                    history=self.env_history.get_histories(self.mem_num),
                     format_instructions=self.parser.get_format_instructions(),
                     reply_format_description=reply_format_description,
+                    max_token=self.max_tokens
                 )
                 total_tokens += cb.total_tokens
             self.logger.info(f'History: {history_to_str(env_info["history"])}')
         text_prompt = chat_prompt.format_messages(
             game_description=game_description,
+            goal_description=goal_description,
+            action_description=action_description,
+            state_description = self.env_history.get_last_history(),
+            history=self.env_history.get_histories(self.mem_num),
+            format_instructions=self.parser.get_format_instructions(),
+            reply_format_description=reply_format_description,
         )
         text_prompt = f'{text_prompt[0].content}\n{text_prompt[1].content}'
         return action, text_prompt, response, total_tokens, total_cost

deciders/jarvis_without_insights.py DELETED Viewed

@@ -1,179 +0,0 @@
-import openai
-from .misc import history_to_str
-from langchain.chat_models import AzureChatOpenAI
-from langchain.prompts.chat import (
-    PromptTemplate,
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-from langchain.prompts.few_shot import FewShotPromptTemplate
-from langchain import LLMChain
-from loguru import logger
-from langchain.callbacks import FileCallbackHandler
-from langchain.callbacks import get_openai_callback
-from .act import NaiveAct
-from memory.env_history import EnvironmentHistory
-import tiktoken
-from .utils import run_chain
-class JarvisWithoutInsight(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
-        self.pre_memory = []
-        self.post_memory = []
-        self.is_first = True
-        self.num_trails = args.num_trails
-        self.game_description = args.game_description
-        self.goal_description = args.goal_description
-        self.action_description = args.action_description
-        self._update_mem(None)
-    def update_mem(self,):
-        traj = self.game_description
-        traj += self.goal_description
-        max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
-        traj += self.env_history.get_histories(max_step_num)
-        self._update_mem(traj)
-    def _update_mem(self, traj):
-        if not self.is_first:
-            summary = self.distiller.generate_summary(traj, self.post_memory)
-            self.post_memory.append(summary)
-            self.insight = self.distiller.generate_insight(self.post_memory)
-        else:
-            self.is_first = False
-        suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
-        self.pre_memory.append(suggestion)
-        self.env_history.reset()
-    def _read_mem(self, ):
-        insight_str = ""
-        suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
-        return insight_str + suggestion_str
-    def act(
-        self,
-        state_description,
-        action_description,
-        env_info,
-        game_description,
-        goal_description,
-        logfile=None,
-    ):
-        self.game_description = game_description
-        self.goal_description = goal_description
-        self.env_history.add("observation", state_description)
-        chat = AzureChatOpenAI(
-            openai_api_type=openai.api_type,
-            openai_api_version=openai.api_version,
-            openai_api_base=openai.api_base,
-            openai_api_key=openai.api_key,
-            deployment_name=self.args.gpt_version,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-        )
-        reply_format_description = \
-            "Your response should choose an optimal action from valid action list, and terminated with following format: "
-            # only task relevant examplesA
-        template = "Now you are completing a task. "
-        template += "You need to carefully understand the description of the game. "
-        # TODO: few shot example handle
-        if self.irr_few_shot_examples:
-            template += "Here are some examples of how you should completing a task."
-            for examples in self.irr_few_shot_examples:
-                template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
-        if self.fewshot_example:
-            if self.expert_knowledge:
-                template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
-                template += self.expert_knowledge + '\n'
-            template += "Next are some examples: "
-        system_message_prompt = SystemMessagePromptTemplate.from_template(template)
-        human_template = ""
-        human_template += "\n\nNow you are in the task.\n"
-        human_template += "{game_description}\n{action_description}\n{goal_description}\n"
-        human_template += "You are observing something and  " \
-                "you need to choose the optimal action acoordingly. "
-        human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
-        human_template += self._read_mem()
-        human_template += "\n\nHere are some history states listed below:\n"
-        fewshot_example_prompt = PromptTemplate(
-            input_variables=["question", "answer"],
-            template="Question: \n{question}\n{answer}"
-        )
-        human_message_prompt = FewShotPromptTemplate(
-            examples=self.fewshot_example,
-            example_prompt=fewshot_example_prompt,
-            suffix=human_template,
-            input_variables=[
-                'game_description', 'goal_description',
-                'action_description', 'reply_format_description'],
-            partial_variables={'format_instructions': self.parser.get_format_instructions()}
-        )
-        human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
-        short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state:")
-        chat_prompt = ChatPromptTemplate.from_messages(
-            [system_message_prompt, human_message_prompt, short_memory_template])
-        if logfile:
-            # logger.remove()
-            if self.first_call:
-                logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
-                self.first_call = False
-            handler = FileCallbackHandler(logfile)
-        total_tokens, total_cost = 0, 0
-        max_think_times = 1
-        # TODO: ADD REACT Support
-        # print(str(self.env_history))
-        if self.use_short_mem:
-            my_history = str(self.env_history)
-        else:
-            my_history = ""
-        for i_think in range(max_think_times):
-            chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
-            with get_openai_callback() as cb:
-                response = run_chain(
-                    chain,
-                    game_description=game_description,
-                    goal_description=goal_description,
-                    action_description=action_description,
-                    history=str(self.env_history),
-                    format_instructions=self.parser.get_format_instructions(),
-                    reply_format_description=reply_format_description,
-                    max_token = 3000
-                )
-                total_tokens += cb.total_tokens
-                total_cost += cb.total_cost
-            action = self.parser.parse(response).action
-        text_prompt = chat_prompt.format_messages(
-            game_description=game_description,
-            goal_description=goal_description,
-            action_description=action_description,
-            history=str(self.env_history),
-            format_instructions=self.parser.get_format_instructions(),
-            reply_format_description=reply_format_description,
-        )
-        texts = ""
-        for text in text_prompt:
-            texts += text.content + "\n"
-        self._add_history_after_action(action)
-        logger.info(f'The GPT response is: {response}.')
-        logger.info(f'The optimal action is: {action}.')
-        if self.pre_memory:
-            logger.info(f'The suggestion is: {self.pre_memory[-1]}.')
-        if self.post_memory:
-            logger.info(f'The summary is: {self.post_memory[-1]}.')
-        if env_info.get('history'):
-            logger.info(f'History: {history_to_str(env_info["history"])}')
-        return action, texts, response, logger, total_tokens, total_cost

deciders/jarvis_without_shortmem.py DELETED Viewed

@@ -1,182 +0,0 @@
-import openai
-from .misc import history_to_str
-from langchain.chat_models import AzureChatOpenAI
-from langchain.prompts.chat import (
-    PromptTemplate,
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-from langchain.prompts.few_shot import FewShotPromptTemplate
-from langchain import LLMChain
-from loguru import logger
-from langchain.callbacks import FileCallbackHandler
-from langchain.callbacks import get_openai_callback
-from .act import NaiveAct
-from memory.env_history import EnvironmentHistory
-import tiktoken
-from .utils import run_chain
-class JarvisWithoutShortMem(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
-        self.pre_memory = []
-        self.post_memory = []
-        self.is_first = True
-        self.num_trails = args.num_trails
-        self.game_description = args.game_description
-        self.goal_description = args.goal_description
-        self.action_description = args.action_description
-        self._update_mem(None)
-    def update_mem(self,):
-        traj = self.game_description
-        traj += self.goal_description
-        max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
-        traj += self.env_history.get_histories(max_step_num)
-        self._update_mem(traj)
-    def _update_mem(self, traj):
-        if not self.is_first:
-            summary = self.distiller.generate_summary(traj, self.post_memory)
-            self.post_memory.append(summary)
-            self.insight = self.distiller.generate_insight(self.post_memory)
-        else:
-            self.is_first = False
-        suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
-        self.pre_memory.append(suggestion)
-        self.env_history.reset()
-    def _read_mem(self, ):
-        insight_str = ""
-        if len(self.post_memory) > 0:
-            insight_str += "The insights of the game are listed below: "
-            insight_str += f"{self.insight}\n"
-        suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
-        return insight_str + suggestion_str
-    def act(
-        self,
-        state_description,
-        action_description,
-        env_info,
-        game_description,
-        goal_description,
-        logfile=None,
-    ):
-        self.game_description = game_description
-        self.goal_description = goal_description
-        self.env_history.add("observation", state_description)
-        chat = AzureChatOpenAI(
-            openai_api_type=openai.api_type,
-            openai_api_version=openai.api_version,
-            openai_api_base=openai.api_base,
-            openai_api_key=openai.api_key,
-            deployment_name=self.args.gpt_version,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-        )
-        reply_format_description = \
-            "Your response should choose an optimal action from valid action list, and terminated with following format: "
-            # only task relevant examplesA
-        template = "Now you are completing a task. "
-        template += "You need to carefully understand the description of the game. "
-        # TODO: few shot example handle
-        if self.irr_few_shot_examples:
-            template += "Here are some examples of how you should completing a task."
-            for examples in self.irr_few_shot_examples:
-                template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
-        if self.fewshot_example:
-            if self.expert_knowledge:
-                template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
-                template += self.expert_knowledge + '\n'
-            template += "Next are some examples: "
-        system_message_prompt = SystemMessagePromptTemplate.from_template(template)
-        human_template = ""
-        human_template += "\n\nNow you are in the task.\n"
-        human_template += "{game_description}\n{action_description}\n{goal_description}\n"
-        human_template += "You are observing something and  " \
-                "you need to choose the optimal action acoordingly. "
-        human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
-        human_template += self._read_mem()
-        human_template += "\n\nHere are some history states listed below:\n"
-        fewshot_example_prompt = PromptTemplate(
-            input_variables=["question", "answer"],
-            template="Question: \n{question}\n{answer}"
-        )
-        human_message_prompt = FewShotPromptTemplate(
-            examples=self.fewshot_example,
-            example_prompt=fewshot_example_prompt,
-            suffix=human_template,
-            input_variables=[
-                'game_description', 'goal_description',
-                'action_description', 'reply_format_description'],
-            partial_variables={'format_instructions': self.parser.get_format_instructions()}
-        )
-        human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
-        short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state:")
-        chat_prompt = ChatPromptTemplate.from_messages(
-            [system_message_prompt, human_message_prompt, short_memory_template])
-        if logfile:
-            # logger.remove()
-            if self.first_call:
-                logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
-                self.first_call = False
-            handler = FileCallbackHandler(logfile)
-        total_tokens, total_cost = 0, 0
-        max_think_times = 1
-        # TODO: ADD REACT Support
-        # print(str(self.env_history))
-        if self.use_short_mem:
-            my_history = str(self.env_history)
-        else:
-            my_history = ""
-        for i_think in range(max_think_times):
-            chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
-            with get_openai_callback() as cb:
-                response = run_chain(
-                    chain,
-                    game_description=game_description,
-                    goal_description=goal_description,
-                    action_description=action_description,
-                    history=self.env_history.get_last_history(),
-                    format_instructions=self.parser.get_format_instructions(),
-                    reply_format_description=reply_format_description,
-                    max_token = 3000
-                )
-                total_tokens += cb.total_tokens
-                total_cost += cb.total_cost
-            action = self.parser.parse(response).action
-        text_prompt = chat_prompt.format_messages(
-            game_description=game_description,
-            goal_description=goal_description,
-            action_description=action_description,
-            history=self.env_history.get_last_history(),
-            format_instructions=self.parser.get_format_instructions(),
-            reply_format_description=reply_format_description,
-        )
-        texts = ""
-        for text in text_prompt:
-            texts += text.content + "\n"
-        self._add_history_after_action(action)
-        logger.info(f'The GPT response is: {response}.')
-        logger.info(f'The optimal action is: {action}.')
-        if self.pre_memory:
-            logger.info(f'The suggestion is: {self.pre_memory[-1]}.')
-        if self.post_memory:
-            logger.info(f'The summary is: {self.post_memory[-1]}.')
-        if env_info.get('history'):
-            logger.info(f'History: {history_to_str(env_info["history"])}')
-        return action, texts, response, logger, total_tokens, total_cost

deciders/jarvis_without_suggestions.py DELETED Viewed

@@ -1,180 +0,0 @@
-import openai
-from .misc import history_to_str
-from langchain.chat_models import AzureChatOpenAI
-from langchain.prompts.chat import (
-    PromptTemplate,
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-from langchain.prompts.few_shot import FewShotPromptTemplate
-from langchain import LLMChain
-from loguru import logger
-from langchain.callbacks import FileCallbackHandler
-from langchain.callbacks import get_openai_callback
-from .act import NaiveAct
-from memory.env_history import EnvironmentHistory
-import tiktoken
-from .utils import run_chain
-class JarvisWithoutSuggestions(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
-        self.pre_memory = []
-        self.post_memory = []
-        self.is_first = True
-        self.num_trails = args.num_trails
-        self.game_description = args.game_description
-        self.goal_description = args.goal_description
-        self.action_description = args.action_description
-        self._update_mem(None)
-    def update_mem(self,):
-        traj = self.game_description
-        traj += self.goal_description
-        max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
-        traj += self.env_history.get_histories(max_step_num)
-        self._update_mem(traj)
-    def _update_mem(self, traj):
-        if not self.is_first:
-            summary = self.distiller.generate_summary(traj, self.post_memory)
-            self.post_memory.append(summary)
-            self.insight = self.distiller.generate_insight(self.post_memory)
-        else:
-            self.is_first = False
-        suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
-        self.pre_memory.append(suggestion)
-        self.env_history.reset()
-    def _read_mem(self, ):
-        insight_str = ""
-        if len(self.post_memory) > 0:
-            insight_str += "The insights of the game are listed below: "
-            insight_str += f"{self.insight}\n"
-        suggestion_str = "\n"
-        return insight_str + suggestion_str
-    def act(
-        self,
-        state_description,
-        action_description,
-        env_info,
-        game_description,
-        goal_description,
-        logfile=None,
-    ):
-        self.game_description = game_description
-        self.goal_description = goal_description
-        self.env_history.add("observation", state_description)
-        chat = AzureChatOpenAI(
-            openai_api_type=openai.api_type,
-            openai_api_version=openai.api_version,
-            openai_api_base=openai.api_base,
-            openai_api_key=openai.api_key,
-            deployment_name=self.args.gpt_version,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-        )
-        reply_format_description = \
-            "Your response should choose an optimal action from valid action list, and terminated with following format: "
-            # only task relevant examplesA
-        template = "Now you are completing a task. "
-        template += "You need to carefully understand the description of the game. "
-        # TODO: few shot example handle
-        if self.irr_few_shot_examples:
-            template += "Here are some examples of how you should completing a task."
-            for examples in self.irr_few_shot_examples:
-                template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
-        if self.fewshot_example:
-            if self.expert_knowledge:
-                template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
-                template += self.expert_knowledge + '\n'
-            template += "Next are some examples: "
-        system_message_prompt = SystemMessagePromptTemplate.from_template(template)
-        human_template = ""
-        human_template += "\n\nNow you are in the task.\n"
-        human_template += "{game_description}\n{action_description}\n{goal_description}\n"
-        human_template += "You are observing something and  " \
-                "you need to choose the optimal action acoordingly. "
-        human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
-        human_template += self._read_mem()
-        human_template += "\n\nHere are some history states listed below:\n"
-        fewshot_example_prompt = PromptTemplate(
-            input_variables=["question", "answer"],
-            template="Question: \n{question}\n{answer}"
-        )
-        human_message_prompt = FewShotPromptTemplate(
-            examples=self.fewshot_example,
-            example_prompt=fewshot_example_prompt,
-            suffix=human_template,
-            input_variables=[
-                'game_description', 'goal_description',
-                'action_description', 'reply_format_description'],
-            partial_variables={'format_instructions': self.parser.get_format_instructions()}
-        )
-        human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
-        short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state:")
-        chat_prompt = ChatPromptTemplate.from_messages(
-            [system_message_prompt, human_message_prompt, short_memory_template])
-        if logfile:
-            # logger.remove()
-            if self.first_call:
-                logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
-                self.first_call = False
-            handler = FileCallbackHandler(logfile)
-        total_tokens, total_cost = 0, 0
-        max_think_times = 1
-        # TODO: ADD REACT Support
-        # print(str(self.env_history))
-        if self.use_short_mem:
-            my_history = str(self.env_history)
-        else:
-            my_history = ""
-        for i_think in range(max_think_times):
-            chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
-            with get_openai_callback() as cb:
-                response = run_chain(
-                    chain,
-                    game_description=game_description,
-                    goal_description=goal_description,
-                    action_description=action_description,
-                    history=str(self.env_history),
-                    format_instructions=self.parser.get_format_instructions(),
-                    reply_format_description=reply_format_description,
-                    max_token = 3000
-                )
-                total_tokens += cb.total_tokens
-                total_cost += cb.total_cost
-            action = self.parser.parse(response).action
-        text_prompt = chat_prompt.format_messages(
-            game_description=game_description,
-            goal_description=goal_description,
-            action_description=action_description,
-            history=str(self.env_history),
-            format_instructions=self.parser.get_format_instructions(),
-            reply_format_description=reply_format_description,
-        )
-        texts = ""
-        for text in text_prompt:
-            texts += text.content + "\n"
-        self._add_history_after_action(action)
-        logger.info(f'The GPT response is: {response}.')
-        logger.info(f'The optimal action is: {action}.')
-        if self.post_memory:
-            logger.info(f'The summary is: {self.post_memory[-1]}.')
-        if env_info.get('history'):
-            logger.info(f'History: {history_to_str(env_info["history"])}')
-        return action, texts, response, logger, total_tokens, total_cost

deciders/jarvis_without_summary.py DELETED Viewed

@@ -1,179 +0,0 @@
-import openai
-from .misc import history_to_str
-from langchain.chat_models import AzureChatOpenAI
-from langchain.prompts.chat import (
-    PromptTemplate,
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-from langchain.prompts.few_shot import FewShotPromptTemplate
-from langchain import LLMChain
-from loguru import logger
-from langchain.callbacks import FileCallbackHandler
-from langchain.callbacks import get_openai_callback
-from .act import NaiveAct
-from memory.env_history import EnvironmentHistory
-import tiktoken
-class Jarvis(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens)
-        self.pre_memory = []
-        self.post_memory = []
-        self.is_first = True
-        self.num_trails = args.num_trails
-        self.game_description = args.game_description
-        self.goal_description = args.goal_description
-        self.action_description = args.action_description
-        self._update_mem(None)
-    def update_mem(self,):
-        traj = self.game_description
-        traj += self.goal_description
-        max_step_num = min(14000 // self.num_tokens_from_string(self.env_history.get_one_history()),200)
-        traj += self.env_history.get_histories(max_step_num)
-        self._update_mem(traj)
-    def _update_mem(self, traj):
-        if not self.is_first:
-            summary = self.distiller.generate_summary(traj, self.post_memory)
-            self.post_memory.append(summary)
-            self.insight = self.distiller.generate_insight(self.post_memory)
-        else:
-            self.is_first = False
-        suggestion = self.distiller.generate_suggestion(self.game_description, self.goal_description, self.action_description, self.pre_memory, self.post_memory, self.num_trails)
-        self.pre_memory.append(suggestion)
-        self.env_history.reset()
-    def _read_mem(self, ):
-        insight_str = ""
-        if len(self.post_memory) > 0:
-            insight_str += "The insights of the game are listed below: "
-            insight_str += f"{self.insight}\n"
-        suggestion_str = "The suggestions are listed below:" + self.pre_memory[-1]
-        return insight_str + suggestion_str
-    def act(
-        self,
-        state_description,
-        action_description,
-        env_info,
-        game_description,
-        goal_description,
-        logfile=None,
-    ):
-        self.game_description = game_description
-        self.goal_description = goal_description
-        self.env_history.add("observation", state_description)
-        chat = AzureChatOpenAI(
-            openai_api_type=openai.api_type,
-            openai_api_version=openai.api_version,
-            openai_api_base=openai.api_base,
-            openai_api_key=openai.api_key,
-            deployment_name=self.args.gpt_version,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-        )
-        reply_format_description = \
-            "Your response should choose an optimal action from valid action list, and terminated with following format: "
-            # only task relevant examplesA
-        template = "Now you are completing a task. "
-        template += "You need to carefully understand the description of the game. "
-        # TODO: few shot example handle
-        if self.irr_few_shot_examples:
-            template += "Here are some examples of how you should completing a task."
-            for examples in self.irr_few_shot_examples:
-                template += "\nQuestion: \n" + examples['question'] + "Answer: \n" + examples['answer']
-        if self.fewshot_example:
-            if self.expert_knowledge:
-                template += "Here, I will provide you with some expert knowledge to help you better understand the rules of the task."
-                template += self.expert_knowledge + '\n'
-            template += "Next are some examples: "
-        system_message_prompt = SystemMessagePromptTemplate.from_template(template)
-        human_template = ""
-        human_template += "\n"
-        human_template += "{game_description}\n{action_description}\n{goal_description}\n"
-        human_template += "You are observing something and  " \
-                "you need to choose the optimal action acoordingly. "
-        human_template += 'Response and interact using the format: {reply_format_description}{format_instructions}\n'
-        human_template += self._read_mem()
-        human_template += "\n\nHere are some history states listed below:\n"
-        fewshot_example_prompt = PromptTemplate(
-            input_variables=["question", "answer"],
-            template="Question: \n{question}\n{answer}"
-        )
-        human_message_prompt = FewShotPromptTemplate(
-            examples=self.fewshot_example,
-            example_prompt=fewshot_example_prompt,
-            suffix=human_template,
-            input_variables=[
-                'game_description', 'goal_description',
-                'action_description', 'reply_format_description'],
-            partial_variables={'format_instructions': self.parser.get_format_instructions()}
-        )
-        human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
-        short_memory_template = HumanMessagePromptTemplate.from_template("{history} Please select an action based on the current game state. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or perform any prohibited actions. Here's the action description below: \n {action_description}\n")
-        chat_prompt = ChatPromptTemplate.from_messages(
-            [system_message_prompt, human_message_prompt, short_memory_template])
-        if logfile:
-            # logger.remove()
-            if self.first_call:
-                logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
-                self.first_call = False
-            handler = FileCallbackHandler(logfile)
-        total_tokens, total_cost = 0, 0
-        max_think_times = 1
-        # TODO: ADD REACT Support
-        # print(str(self.env_history))
-        if self.use_short_mem:
-            my_history = str(self.env_history)
-        else:
-            my_history = ""
-        for i_think in range(max_think_times):
-            chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
-            with get_openai_callback() as cb:
-                response = chain.run(
-                    game_description=game_description,
-                    goal_description=goal_description,
-                    action_description=action_description,
-                    history=self.env_history.get_histories(11),
-                    format_instructions=self.parser.get_format_instructions(),
-                    reply_format_description=reply_format_description,
-                    max_token = 3000
-                )
-                total_tokens += cb.total_tokens
-                total_cost += cb.total_cost
-            action = self.parser.parse(response).action
-        text_prompt = chat_prompt.format_messages(
-            game_description=game_description,
-            goal_description=goal_description,
-            action_description=action_description,
-            history=self.env_history.get_histories(11),
-            format_instructions=self.parser.get_format_instructions(),
-            reply_format_description=reply_format_description,
-        )
-        texts = ""
-        for text in text_prompt:
-            texts += text.content + "\n"
-        self._add_history_after_action(action)
-        logger.info(f'The GPT response is: {response}.')
-        logger.info(f'The optimal action is: {action}.')
-        if self.pre_memory:
-            logger.info(f'The suggestion is: {self.pre_memory[-1]}.')
-        if self.post_memory:
-            logger.info(f'The summary is: {self.post_memory[-1]}.')
-        if env_info.get('history'):
-            logger.info(f'History: {history_to_str(env_info["history"])}')
-        return action, texts, response, logger, total_tokens, total_cost

deciders/pal.py DELETED Viewed

@@ -1,149 +0,0 @@
-import openai
-from .misc import history_to_str
-from langchain.chat_models import AzureChatOpenAI
-from langchain.prompts.chat import (
-    PromptTemplate,
-    ChatPromptTemplate,
-    SystemMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-)
-from langchain.prompts.few_shot import FewShotPromptTemplate
-from langchain import LLMChain
-from loguru import logger
-from langchain.callbacks import FileCallbackHandler
-from langchain.callbacks import get_openai_callback
-from .act import NaiveAct
-from .utils import run_chain
-def get_last_n_lines(text, n):
-    lines = text.splitlines()
-    return '\n'.join(lines[-n:])
-class PAL(NaiveAct):
-    def __init__(self, action_space, args, prompts, distiller, temperature=0.1, max_tokens=None, logger=None):
-        super().__init__(action_space, args, prompts, distiller, temperature, max_tokens, logger)
-    def act(
-        self,
-        state_description,
-        action_description,
-        env_info,
-        game_description,
-        goal_description,
-        logfile=None,
-    ):
-        self._add_history_before_action(game_description, goal_description, state_description)
-        chat = AzureChatOpenAI(
-            openai_api_type=openai.api_type,
-            openai_api_version=openai.api_version,
-            openai_api_base=openai.api_base,
-            openai_api_key=openai.api_key,
-            deployment_name=self.args.gpt_version,
-            temperature=self.temperature,
-            max_tokens=self.max_tokens
-        )
-        suffix_flag = False
-        reply_format_description = \
-            "Your response should choose an optimal action from a valid action list and terminate with the following format: "
-        # System Message
-        human_template = "Now, you are completing a challenging task. You must carefully understand the Program-aided Language method you will use and apply it to the following task.\n"
-        # task-irrelevant SystemMessage
-        if self.irr_few_shot_examples:
-            human_template += 'In the following example, I shall present a set of question and answer with the Program-aided Language method. Please adhere to the format and reasoning of the provided response when addressing the subsequent task.\n'
-            for i, examples in enumerate(self.irr_few_shot_examples):
-                human_template += f"\nExample {i+1}:\n"
-                human_template += "Question: \n" + examples['question'] + "\nAnswer: \n" + examples['answer']
-        # task-irrelevant few shot if have
-        if self.irr_few_shot_examples:
-            human_template += "\nMoving forward, I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below.\n"
-        if self.fewshot_example:
-            human_template += "I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below."
-        # print(fewshot_example_prompt.format(**fewshot_examples[0]))
-        human_template += '\nTask Description: {game_description} \n'
-        human_template += 'Goal Description: {goal_description}\n'
-        human_template += 'Actions Description: {action_description}\n'
-        if self.fewshot_example:
-            human_template += "Here, I will provide you with some guidance to help you better understand the rules of the task. Next are some examples: "
-            for i, examples in enumerate(self.fewshot_example):
-                human_template += f"\nExample {i+1}:\n"
-                human_template += "Question: \n" + examples['question'] + "\nAnswer: \n" + examples['answer']
-        if self.prompt_level in [2, 3, 4]:
-            if self.memory:
-                human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.\n'
-                suffix_flag = True
-                if self.prompt_level == 2:
-                    human_template += 'I have collected a few trajectories from a random policy, and the summaries are listed below.'
-                elif self.prompt_level == 3:
-                    human_template += 'I have collected a few trajectories before, and the summaries are listed below.'
-                elif self.prompt_level == 4:
-                    human_template += 'I have collected a few trajectories from an expert policy, and the summaries are listed below.'
-                human_template += self._read_mem() + "\n"
-        if self.use_short_mem:
-            if len(self.env_history) > 1:
-                if not suffix_flag:
-                    human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
-                human_template += f"\nBelow are the latest {min(self.args.short_mem_num,len(self.env_history)//2)} historical data entries:\n"
-                human_template += f"{self.env_history.get_histories(self.mem_num)}"
-        human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
-        human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
-        human_template += 'Please generate Python program as answers to given questions, similar to the provided examples.\n'
-        human_template += 'And You should calculate the final result based on the program ,not just give a code script alone!\n'
-        human_message_prompt = PromptTemplate(
-            template=human_template,
-            input_variables=[
-                'state_description', 'goal_description', 'game_description',
-                'action_description'],
-        )
-        human_message_prompt = HumanMessagePromptTemplate(prompt=human_message_prompt)
-        chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
-        if not self.logger:
-            logger.remove()
-            self.logger = logger.add(logfile, colorize=True, enqueue=True)
-        handler = FileCallbackHandler(logfile)
-        chain = LLMChain(llm=chat, prompt=chat_prompt, callbacks=[handler], verbose=False)
-        with get_openai_callback() as cb:
-            response = run_chain(
-                chain,
-                game_description=game_description,
-                state_description=state_description,
-                goal_description=goal_description,
-                action_description=action_description,
-            )
-            total_tokens = cb.total_tokens
-            total_cost = cb.total_cost
-            _response = get_last_n_lines(response, 2)
-        action = self.parser.parse(_response).action
-        text_prompt = chat_prompt.format_messages(
-            game_description=game_description,
-            state_description=state_description,
-            goal_description=goal_description,
-            action_description=action_description,
-        )
-        texts = ""
-        for text in text_prompt:
-            texts += text.content + "\n"
-        self._add_history_after_action(action)
-        self.logger.info(f'The GPT response is: {response}.')
-        self.logger.info(f'The optimal action is: {action}.')
-        if env_info.get('history'):
-            self.logger.info(f'History: {history_to_str(env_info["history"])}')
-        return action, texts, response, total_tokens, total_cost

deciders/reflexion.py CHANGED Viewed

@@ -31,7 +31,7 @@ class Reflexion(NaiveAct):
         traj = self.game_description
         traj += self.goal_description
         one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
-        history_num = 4000 // one_history_token
         traj += self.env_history.get_histories_with_last(history_num)
         self._update_mem(traj)
@@ -109,7 +109,7 @@ class Reflexion(NaiveAct):
             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
-                human_template += f"\nBelow are the latest {self.mem_num} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
@@ -150,7 +150,7 @@ class Reflexion(NaiveAct):
                     action_description=action_description,
                     format_instructions=self.parser.get_format_instructions(),
                     reply_format_description=reply_format_description,
-                    max_token = 3000
                 )
                 total_tokens += cb.total_tokens

         traj = self.game_description
         traj += self.goal_description
         one_history_token = self.num_tokens_from_string(self.env_history.get_one_history())
+        history_num = self.args.max_query_tokens // one_history_token
         traj += self.env_history.get_histories_with_last(history_num)
         self._update_mem(traj)
             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
+                human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'
                     action_description=action_description,
                     format_instructions=self.parser.get_format_instructions(),
                     reply_format_description=reply_format_description,
+                    max_token = self.max_tokens
                 )
                 total_tokens += cb.total_tokens

deciders/self_consistency.py CHANGED Viewed

@@ -64,7 +64,7 @@ class SelfConsistency(NaiveAct):
         if self.fewshot_example:
             human_template += "I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below."
-        # print(fewshot_example_prompt.format(**fewshot_examples[0]))
         human_template += '\nTask Description: {game_description} \n'
         human_template += 'Goal Description: {goal_description}\n'
         human_template += 'Actions Description: {action_description}\n'
@@ -91,7 +91,7 @@ class SelfConsistency(NaiveAct):
             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
-                human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

         if self.fewshot_example:
             human_template += "I will describe the task, the goal, and the actions you may execute. Please pay close attention to comprehend the information presented below."
         human_template += '\nTask Description: {game_description} \n'
         human_template += 'Goal Description: {goal_description}\n'
         human_template += 'Actions Description: {action_description}\n'
             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
+                human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

deciders/selfask.py CHANGED Viewed

@@ -88,7 +88,7 @@ class SelfAskAct(NaiveAct):
             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
-                human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
+                human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

deciders/spp.py CHANGED Viewed

@@ -81,7 +81,7 @@ class SPP(NaiveAct):
             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
-                human_template += f"\nBelow are the latest {self.args.short_mem_num} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

             if len(self.env_history) > 1:
                 if not suffix_flag:
                     human_template += '\nSubsequently, I will offer pertinent guidance or information about the task. Please utilize this instruction to accomplish the given task effectively.'
+                human_template += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
                 human_template += f"{self.env_history.get_histories(self.mem_num)}"
         human_template += '\nNext is the observation that the agent gets:\nCurrent {state_description}\n'
         human_template += 'Please select an action based on the current game state and the information you get. You must select the appropriate action from the given action descriptions and cannot refrain from taking action or performing any prohibited actions. Here is the action description below:\n{action_description}\n'

deciders/utils.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import sys
 import openai
-from openai import OpenAI
 from tenacity import (
     retry,
     stop_after_attempt, # type: ignore
@@ -27,10 +26,8 @@ def run_chain(chain, *args, **kwargs):
 @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def get_completion(prompt: str, engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None) -> str:
-    client = OpenAI(api_key=openai.api_key)
-    response = client.chat.completions.create(
-                model=engine,
                 prompt=prompt,
                 temperature=temperature,
                 max_tokens=max_tokens,
@@ -42,7 +39,7 @@ def get_completion(prompt: str, engine: str = "gpt-35-turbo", temperature: float
             )
     return response.choices[0].text
-# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None, is_batched: bool = False) -> str:
     assert model != "text-davinci-003"
     messages = [
@@ -51,15 +48,13 @@ def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-tur
             "content": prompt
         }
     ]
-    # import pdb;pdb.set_trace()
-    client = OpenAI(api_key=openai.api_key)
-    response = client.chat.completions.create(
-            model=model,
-            messages=messages,
-            max_tokens=max_tokens,
-            stop=stop_strs,
-            temperature=temperature,
-            # request_timeout = 1
     )
-    return response.choices[0].message.content

 import os
 import sys
 import openai
 from tenacity import (
     retry,
     stop_after_attempt, # type: ignore
 @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def get_completion(prompt: str, engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None) -> str:
+    response = openai.Completion.create(
+                engine=engine,
                 prompt=prompt,
                 temperature=temperature,
                 max_tokens=max_tokens,
             )
     return response.choices[0].text
+@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def get_chat(prompt: str, model: str = "gpt-35-turbo", engine: str = "gpt-35-turbo", temperature: float = 0.0, max_tokens: int = 256, stop_strs: Optional[List[str]] = None, is_batched: bool = False) -> str:
     assert model != "text-davinci-003"
     messages = [
             "content": prompt
         }
     ]
+    response = openai.ChatCompletion.create(
+        model=model,
+        engine=engine,
+        messages=messages,
+        max_tokens=max_tokens,
+        stop=stop_strs,
+        temperature=temperature,
+        # request_timeout = 1
     )
+    return response.choices[0]["message"]["content"]

distillers/guider.py CHANGED Viewed

@@ -9,8 +9,6 @@ class Guidance_Generator():
         self.args = args
         with open("./distillers/guidance_summary_few_shot_examples.txt", 'r') as f:
             self.SUMMARY_FEW_SHOT_EXAMPLES = f.read()
-        # with open("./distillers/exploration_few_shot_examples.txt", 'r') as f:
-        #     self.SUGGEST_FEW_SHOT_EXAMPLES = f.read()
         self.insight = ""
         self.suggestion = ""
         if logfile:
@@ -22,11 +20,17 @@ class Guidance_Generator():
         with open(file_path, 'r') as infile:
             data = json.load(infile)
         for traj in data:
-            traj_text = traj[0]['game_description']
-            traj_text += traj[0]['goal_description']
             for transition in traj[-max_step_num:]:
-                traj_text += transition['observation']
-                traj_text += f"Action: {transition['action']}"
             summary = self.generate_summary(traj_text, mem)
             mem.append(summary)
         return mem
@@ -44,11 +48,6 @@ class Guidance_Generator():
         """
         segments = []
-        # Summarization memory
-        # if post_memory:
-        #     segments.append('Your summarization memory is as below:')
-        #     segments.extend([f'Episode #{i}: {m}' for i, m in enumerate(post_memory)])
         # Trajectory
         segments.append(f"Your new collected trajectory is as below:\n {traj}")
         segments.append(f"The suggestion to guide the trajectory is:\n{self.suggestion}")
@@ -65,19 +64,6 @@ class Guidance_Generator():
         query = '\n'.join(segments)
         return query
-    # def _generate_summary_query(self, traj, post_memory):
-    #     """Allows the Agent to generate exploration guidance."""
-    #     query = ""
-    #     if len(post_memory) > 0:
-    #         query += '\Your summarization memory is as below:\n'
-    #         for i, m in enumerate(post_memory):
-    #             query += f'Episode #{i}: {m}\n'
-    #     query += f"""
-    #     {traj}
-    #     Above is the trajectory of the new experience.
-    #     """
-    #     query += '\n Anwser the following questions.\n 1. What is the performance of this policy and does it improve the performance compared to before? 2. Summarize the main reason that makes the policy improve or reduce the performance; 3. What new information of the task can be inferred compared to the memory?'
-    #     return query
     def generate_summary(self, traj, post_memory):
         query = self._generate_summary_query(traj, post_memory)
@@ -93,6 +79,7 @@ class Guidance_Generator():
                 query += f'Episode #{i}: {m}\n'
         query += '\n Identify and summarize the key information that can be exploited to improve performance of the player.'
         insight = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
         logger.info(f'[Reflexion Memory]The insight prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The insight response is: {insight}.')
         return insight
@@ -105,26 +92,8 @@ class Guidance_Generator():
             query +=  f"""You have obtained experience as below """
             for i, m in enumerate(post_memory):
                 query += f'Episode #{i}: {m}\n'
-        # if max_num_trials - len(post_memory) == 1:
-        #     query = (f"\n The main goal is to aid the human player in winning the game in the next episode. "
-        #             f"This is his {len(post_memory) + 1} try out of {max(max_num_trials, 1)} episodes. "
-        #             "Your suggestions should be simple, executable with heuristic policy, and suitable for an LLM agent. "
-        #             "Reply in an item list format. Specifically, focus on:"
-        #             "\n1. How to achieve optimal performance (exploitation) using the obtained knowledge?"
-        #             "\nNote: Stress the importance of prioritizing performance without exploration.")
-        #     suggestion = get_chat(query) + "\n Remember, in this attempt, aim solely for high performance without exploration."
-        # else:
-        # if max_num_trials-len(post_memory) == 1:
-        #     query += f"\n The main aim for you is to help the human player win the game in the last episode. The next episode is the last episode. You can give suggestions before each episode. Then what is your suggestion for his next episode? Note that this is the last try and he should not explore which may decrease his performance. The suggestions should be simple to follow, executable with heuristic policy, easy to use for an llm agent,and reply in item list format. The answer should instruct him to exploit all the knowlegde to gain the highest performance (exploitation) in the next episode. "
-        # else:
         query += f"\n The main aim for you is to help the human player win the game in the last episode. He has only {max(max_num_trials-len(post_memory), 1)} episodes left to try.You can give suggestions before each episode. Then what is your suggestion for his next episode? Please provide simple, concise answers suitable for a six-year-old child, focusing on the following in item list format: 1. What game-relevant knowledge is critical to determine the optimal policy. Notice that the knowledge should be obtainable by interacting with the environment and helpful for the decisions.\n 2. How should the player conduct exploration in the next episode to acquire this information?\n3. How can the player exploit the information obtained to achieve higher performance in subsequent episodes?\n 4. How should exploration and exploitation be balanced to improve performance in the next episode?\n"
-            # query += (f"\n The primary goal is to assist the human player in winning the game in the final episode. "
-            #         f"This is his {len(post_memory) + 1} try out of {max(max_num_trials, 1)} episodes. "
-            #         "Provide suggestions for the next episode that balance both exploration and exploitation. "
-            #         "The suggestions should be in item list format, easy to follow, aligned with heuristic policy, and usable for an LLM agent. Address:"
-            #         "\n1. Which information the player should gather via exploration and the best ways to explore?"
-            #         "\n2. Strategies to refine the policy for enhanced performance (exploitation)?"
-            #         "\n3. How should exploration and exploitation be weighted in the next episode?")
         # TODO: consider the inconsistency between past suggestion and past memory.
         suggestion = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)

         self.args = args
         with open("./distillers/guidance_summary_few_shot_examples.txt", 'r') as f:
             self.SUMMARY_FEW_SHOT_EXAMPLES = f.read()
         self.insight = ""
         self.suggestion = ""
         if logfile:
         with open(file_path, 'r') as infile:
             data = json.load(infile)
         for traj in data:
+            traj_text = traj[0]['game_description']+'\n'
+            traj_text += traj[0]['goal_description']+'\n'
             for transition in traj[-max_step_num:]:
+                traj_text += transition['observation']+'\n'
+                if type(eval(transition['action'])) == type([]):
+                    action = float(eval(transition['action'])[0])-1
+                else:
+                    action = transition['action']
+                traj_text += f"Action: {action}\n"
+                traj_text += f"Reward: {transition['reward']}\n"
+            traj_text += f"Your performance is: {transition['cum_reward']}\n"
             summary = self.generate_summary(traj_text, mem)
             mem.append(summary)
         return mem
         """
         segments = []
         # Trajectory
         segments.append(f"Your new collected trajectory is as below:\n {traj}")
         segments.append(f"The suggestion to guide the trajectory is:\n{self.suggestion}")
         query = '\n'.join(segments)
         return query
     def generate_summary(self, traj, post_memory):
         query = self._generate_summary_query(traj, post_memory)
                 query += f'Episode #{i}: {m}\n'
         query += '\n Identify and summarize the key information that can be exploited to improve performance of the player.'
         insight = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)
+        # import pdb;pdb.set_trace()
         logger.info(f'[Reflexion Memory]The insight prompt is: {query}.')
         logger.info(f'[Reflexion Memory]The insight response is: {insight}.')
         return insight
             query +=  f"""You have obtained experience as below """
             for i, m in enumerate(post_memory):
                 query += f'Episode #{i}: {m}\n'
         query += f"\n The main aim for you is to help the human player win the game in the last episode. He has only {max(max_num_trials-len(post_memory), 1)} episodes left to try.You can give suggestions before each episode. Then what is your suggestion for his next episode? Please provide simple, concise answers suitable for a six-year-old child, focusing on the following in item list format: 1. What game-relevant knowledge is critical to determine the optimal policy. Notice that the knowledge should be obtainable by interacting with the environment and helpful for the decisions.\n 2. How should the player conduct exploration in the next episode to acquire this information?\n3. How can the player exploit the information obtained to achieve higher performance in subsequent episodes?\n 4. How should exploration and exploitation be balanced to improve performance in the next episode?\n"
         # TODO: consider the inconsistency between past suggestion and past memory.
         suggestion = get_chat(query,model=self.args.gpt_version, engine=self.args.gpt_version)

distillers/self_reflection.py CHANGED Viewed

@@ -18,11 +18,17 @@ class RefletionGenerator():
         with open(file_path, 'r') as infile:
             data = json.load(infile)
         for traj in data:
-            traj_text = traj[0]['game_description']
-            traj_text += traj[0]['goal_description']
             for transition in traj[-max_step_num:]:
-                traj_text += transition['observation']
-                traj_text += f"Action: {transition['action']}"
             reflection = self.generate(traj_text, mem, max_len_mem=5)
             mem.append(reflection)
         return mem

         with open(file_path, 'r') as infile:
             data = json.load(infile)
         for traj in data:
+            traj_text = traj[0]['game_description']+'\n'
+            traj_text += traj[0]['goal_description']+'\n'
             for transition in traj[-max_step_num:]:
+                traj_text += transition['observation']+'\n'
+                if type(eval(transition['action'])) == type([]):
+                    action = float(eval(transition['action'])[0])-1
+                else:
+                    action = transition['action']
+                traj_text += f"Action: {action}\n"
+                traj_text += f"Reward: {transition['reward']}\n"
+            traj_text += f"Your performance is: {transition['cum_reward']}\n"
             reflection = self.generate(traj_text, mem, max_len_mem=5)
             mem.append(reflection)
         return mem

distillers/traj_prompt_summarizer.py CHANGED Viewed

@@ -1,23 +1,35 @@
 import random
 from deciders.utils import get_completion
 import json
 class TrajPromptSummarizer():
-    def __init__(self,args=None):
         self.args = args
         with open("./distillers/traj_summary_few_shot_examples.txt", 'r') as f:
             self.FEW_SHOT_EXAMPLES = f.read()
     def generate_from_file(self, file_path,max_step_num=200):
         mem = []
         with open(file_path, 'r') as infile:
             data = json.load(infile)
         for traj in data:
-            traj_text = traj[0]['game_description']
-            traj_text += traj[0]['goal_description']
             for transition in traj[-max_step_num:]:
-                traj_text += transition['observation']
-                traj_text += f"> {transition['action']}"
-            traj_text += f"Your performance is: {transition['cum_reward']}"
             reflection = self.generate(traj_text, mem, max_len_mem=5)
             mem.append(reflection)
         return mem
@@ -43,4 +55,6 @@ class TrajPromptSummarizer():
         else:
             reflection_query = self._generate_summary_query(traj, memory)
         reflection = get_completion(reflection_query, engine=self.args.gpt_version)
         return reflection

 import random
 from deciders.utils import get_completion
 import json
+from loguru import logger
 class TrajPromptSummarizer():
+    def __init__(self,args=None,logfile=None):
         self.args = args
         with open("./distillers/traj_summary_few_shot_examples.txt", 'r') as f:
             self.FEW_SHOT_EXAMPLES = f.read()
+        if logfile:
+            # logger.remove()
+            logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' in x['message'])
     def generate_from_file(self, file_path,max_step_num=200):
         mem = []
         with open(file_path, 'r') as infile:
             data = json.load(infile)
         for traj in data:
+            traj_text = traj[0]['game_description']+'\n'
+            traj_text += traj[0]['goal_description']+'\n'
             for transition in traj[-max_step_num:]:
+                traj_text += transition['observation']+'\n'
+                if type(eval(transition['action'])) == type([]):
+                    action = float(eval(transition['action'])[0])-1
+                else:
+                    action = transition['action']
+                traj_text += f"Action: {action}\n"
+                traj_text += f"Reward: {transition['reward']}\n"
+            traj_text += f"Your performance is: {transition['cum_reward']}\n"
             reflection = self.generate(traj_text, mem, max_len_mem=5)
             mem.append(reflection)
         return mem
         else:
             reflection_query = self._generate_summary_query(traj, memory)
         reflection = get_completion(reflection_query, engine=self.args.gpt_version)
+        logger.info(f'[Reflexion Memory]The reflexion prompt is: {reflection_query}.')
+        logger.info(f'[Reflexion Memory]The reflexion response is: {reflection}.')
         return reflection

draw_overall_performance.py DELETED Viewed

@@ -1,59 +0,0 @@
-import pandas as pd
-import matplotlib.pyplot as plt
-# Load the CSV data
-data = pd.read_csv("performance_data.csv")
-# Group games by type
-game_types = {
-    "Classic Control": ["Acrobot-v1", "CartPole-v0", "MountainCar-v0"],
-    "Box 2D": ["LunarLander-v2"],
-    "Toy Text": ["Taxi-v3", "CliffWalking-v0", "Blackjack-v1"]
-}
-for game_type, games in game_types.items():
-    fig, axs = plt.subplots(1, len(games), figsize=(12 * len(games), 6))
-    fig.suptitle(f"Performance Plot: {game_type}", fontsize=28, fontname="Times New Roman")
-    if len(games) == 1:
-        axs = [axs]
-    handles, labels = [], []
-    for idx, game in enumerate(games):
-        # Filter data to get information for the current game (in the loop)
-        game_data = data[data["game"] == game]
-        axs[idx].set_title(game, fontsize=20, fontname="Times New Roman")
-        axs[idx].set_xlabel("Levels", fontsize=16, fontname="Times New Roman")
-        if idx == 0:
-            axs[idx].set_ylabel("Scores", fontsize=16, fontname="Times New Roman")
-        for index, row in game_data.iterrows():
-            decider_name = row["decider_name"]
-            levels = ["l1", "l2", "l3", "l4", "l5"]
-            scores = row[levels].values.tolist()
-            lines = axs[idx].plot(levels, scores, "-o", label=decider_name)
-            # Grab the handle and label for creating a global legend
-            handles.append(lines[0])
-            labels.append(decider_name)
-    # Eliminate duplicate labels and handles
-    unique_labels = []
-    unique_handles = []
-    for handle, label in zip(handles, labels):
-        if label not in unique_labels:
-            unique_labels.append(label)
-            unique_handles.append(handle)
-    # Add a legend at the bottom middle of the figure
-    fig.legend(
-        unique_handles,
-        unique_labels,
-        loc="lower center",
-        ncol=4, prop={'size': 18}
-    )
-    # Adjust layout to accommodate the legend and prevent cropping
-    plt.savefig("./vis/" + game_type + ".png", dpi=300)

environment.yml → environment.yaml RENAMED Viewed

@@ -1,26 +1,21 @@
 name: llm-gym
-channels:
-  - conda-forge
-  - defaults
 dependencies:
   - _libgcc_mutex=0.1=main
   - _openmp_mutex=5.1=1_gnu
-  - aiosignal=1.2.0=pyhd3eb1b0_0
   - asttokens=2.0.5=pyhd3eb1b0_0
   - async-timeout=4.0.2=py38h06a4308_0
-  - attrs=22.1.0=py38h06a4308_0
   - backcall=0.2.0=pyhd3eb1b0_0
   - blas=1.0=mkl
   - brotlipy=0.7.0=py38h27cfd23_1003
-  - ca-certificates=2023.08.22=h06a4308_0
   - cached-property=1.5.2=py_0
-  - certifi=2023.7.22=py38h06a4308_0
   - cffi=1.15.1=py38h5eee18b_3
   - chardet=4.0.0=py38h06a4308_1003
   - comm=0.1.2=py38h06a4308_0
   - cryptography=39.0.1=py38h9ce1e76_2
   - cudatoolkit=11.3.1=h2bc3f7f_2
   - debugpy=1.5.1=py38h295c915_0
   - executing=0.8.3=pyhd3eb1b0_0
   - frozenlist=1.3.3=py38h5eee18b_0
   - hdf5=1.10.6=h3ffc7dd_1
@@ -42,7 +37,6 @@ dependencies:
   - libprotobuf=3.20.3=he621ea3_0
   - libsodium=1.0.18=h7b6447c_0
   - libstdcxx-ng=11.2.0=h1234567_1
-  - loguru=0.7.1=py38h578d9bd_0
   - matplotlib-inline=0.1.6=py38h06a4308_0
   - mkl=2023.1.0=h6d00ec8_46342
   - mkl-service=2.4.0=py38h5eee18b_1
@@ -50,17 +44,16 @@ dependencies:
   - mkl_random=1.2.2=py38h417a72b_1
   - ncurses=6.4=h6a678d5_0
   - nest-asyncio=1.5.6=py38h06a4308_0
   - numpy-base=1.24.3=py38h060ed82_1
   - openssl=3.0.10=h7f8727e_2
-  - packaging=23.0=py38h06a4308_0
   - parso=0.8.3=pyhd3eb1b0_0
   - pcre=8.45=h295c915_0
   - pexpect=4.8.0=pyhd3eb1b0_3
   - pickleshare=0.7.5=pyhd3eb1b0_1003
-  - pip=23.2.1=py38h06a4308_0
   - platformdirs=2.5.2=py38h06a4308_0
   - prompt-toolkit=3.0.36=py38h06a4308_0
-  - psutil=5.9.0=py38h5eee18b_0
   - ptyprocess=0.7.0=pyhd3eb1b0_2
   - pure_eval=0.2.2=pyhd3eb1b0_0
   - pycparser=2.21=pyhd3eb1b0_0
@@ -70,7 +63,6 @@ dependencies:
   - python=3.8.16=h955ad1f_4
   - python-dateutil=2.8.2=pyhd3eb1b0_0
   - python_abi=3.8=2_cp38
-  - pyyaml=6.0=py38h0a891b7_4
   - pyzmq=25.1.0=py38h6a678d5_0
   - readline=8.2=h5eee18b_0
   - setuptools=67.8.0=py38h06a4308_0
@@ -81,7 +73,6 @@ dependencies:
   - tk=8.6.12=h1ccaba5_0
   - tornado=6.2=py38h5eee18b_0
   - traitlets=5.7.1=py38h06a4308_0
-  - typing_extensions=4.6.3=py38h06a4308_0
   - wcwidth=0.2.5=pyhd3eb1b0_0
   - wheel=0.38.4=py38h06a4308_0
   - xz=5.4.2=h5eee18b_0
@@ -89,105 +80,95 @@ dependencies:
   - zeromq=4.3.4=h2531618_0
   - zlib=1.2.13=h5eee18b_0
   - pip:
-    - absl-py==1.4.0
-    - aiohttp==3.8.4
-    - ale-py==0.8.1
-    - annotated-types==0.5.0
-    - appdirs==1.4.4
-    - beautifulsoup4==4.12.2
-    - box2d-py==2.3.5
-    - cachetools==5.3.1
-    - cchardet==2.1.7
-    - charset-normalizer==3.1.0
-    - click==8.1.3
-    - cloudpickle==2.2.1
-    - contourpy==1.1.0
-    - cycler==0.11.0
-    - cython==3.0.1
-    - dataclasses-json==0.5.14
-    - decorator==4.4.2
-    - docker-pycreds==0.4.0
-    - fasteners==0.18
-    - filelock==3.12.2
-    - fonttools==4.40.0
-    - fsspec==2023.6.0
-    - gitdb==4.0.10
-    - gitpython==3.1.31
-    - glfw==2.6.2
-    - google-auth==2.21.0
-    - google-auth-oauthlib==1.0.0
-    - greenlet==2.0.2
-    - grpcio==1.56.0
-    - gym==0.26.2
-    - gym-notices==0.0.8
-    - h5py==3.9.0
-    - huggingface-hub==0.15.1
-    - imageio==2.31.2
-    - imageio-ffmpeg==0.4.8
-    - importlib-metadata==6.6.0
-    - importlib-resources==5.12.0
-    - iniconfig==2.0.0
-    - kiwisolver==1.4.4
-    - langchain==0.0.284
-    - langsmith==0.0.33
-    - llvmlite==0.40.1
-    - lz4==4.3.2
-    - markdown==3.4.3
-    - markupsafe==2.1.1
-    - marshmallow==3.20.1
-    - matplotlib==3.7.1
-    - moviepy==1.0.3
-    - mujoco==2.2.0
-    - mujoco-py==2.1.2.14
-    - multidict==6.0.4
-    - numba==0.57.1
-    - numexpr==2.8.5
-    - numpy==1.24.4
-    - oauthlib==3.2.2
-    - openai==0.27.8
-    - opencv-python==4.8.0.76
-    - pathtools==0.1.2
-    - pillow==9.5.0
-    - pluggy==1.2.0
-    - proglog==0.1.10
-    - protobuf==3.19.6
-    - py==1.11.0
-    - pyasn1==0.5.0
-    - pyasn1-modules==0.3.0
-    - pydantic==2.3.0
-    - pydantic-core==2.6.3
-    - pygame==2.1.0
-    - pyopengl==3.1.7
-    - pyparsing==3.0.9
-    - pytest==7.0.1
-    - regex==2023.6.3
-    - requests==2.31.0
-    - requests-oauthlib==1.3.1
-    - rsa==4.9
-    - safetensors==0.3.1
-    - sentry-sdk==1.26.0
-    - setproctitle==1.3.2
-    - smmap==5.0.0
-    - soupsieve==2.4.1
-    - sqlalchemy==2.0.20
-    - swig==4.1.1
-    - tenacity==8.2.3
-    - tensorboard==2.14.0
-    - tensorboard-data-server==0.7.1
-    - tianshou==0.4.10
-    - tokenizers==0.13.3
-    # - torch==1.12.0+cu113
-    # - torchaudio==0.12.0+cu113
-    # - torchvision==0.13.0+cu113
-    - tqdm==4.65.0
-    - transformers==4.30.2
-    - typing==3.7.4.3
-    - typing-extensions==4.7.1
-    - typing-inspect==0.9.0
-    - urllib3
-    - v==1
-    - wandb==0.15.4
-    - werkzeug==2.3.6
-    - yarl==1.9.2
-    - zipp==3.15.0
-    - aquarel==0.0.5

 name: llm-gym
 dependencies:
   - _libgcc_mutex=0.1=main
   - _openmp_mutex=5.1=1_gnu
   - asttokens=2.0.5=pyhd3eb1b0_0
   - async-timeout=4.0.2=py38h06a4308_0
   - backcall=0.2.0=pyhd3eb1b0_0
   - blas=1.0=mkl
   - brotlipy=0.7.0=py38h27cfd23_1003
+  - ca-certificates=2023.01.10=h06a4308_0
   - cached-property=1.5.2=py_0
   - cffi=1.15.1=py38h5eee18b_3
   - chardet=4.0.0=py38h06a4308_1003
   - comm=0.1.2=py38h06a4308_0
   - cryptography=39.0.1=py38h9ce1e76_2
   - cudatoolkit=11.3.1=h2bc3f7f_2
   - debugpy=1.5.1=py38h295c915_0
+  - decorator=5.1.1=pyhd8ed1ab_0
   - executing=0.8.3=pyhd3eb1b0_0
   - frozenlist=1.3.3=py38h5eee18b_0
   - hdf5=1.10.6=h3ffc7dd_1
   - libprotobuf=3.20.3=he621ea3_0
   - libsodium=1.0.18=h7b6447c_0
   - libstdcxx-ng=11.2.0=h1234567_1
   - matplotlib-inline=0.1.6=py38h06a4308_0
   - mkl=2023.1.0=h6d00ec8_46342
   - mkl-service=2.4.0=py38h5eee18b_1
   - mkl_random=1.2.2=py38h417a72b_1
   - ncurses=6.4=h6a678d5_0
   - nest-asyncio=1.5.6=py38h06a4308_0
+  - numpy=1.24.3=py38hf6e8229_1
   - numpy-base=1.24.3=py38h060ed82_1
   - openssl=3.0.10=h7f8727e_2
   - parso=0.8.3=pyhd3eb1b0_0
   - pcre=8.45=h295c915_0
   - pexpect=4.8.0=pyhd3eb1b0_3
   - pickleshare=0.7.5=pyhd3eb1b0_1003
+  - pip=23.1.2=py38h06a4308_0
   - platformdirs=2.5.2=py38h06a4308_0
   - prompt-toolkit=3.0.36=py38h06a4308_0
   - ptyprocess=0.7.0=pyhd3eb1b0_2
   - pure_eval=0.2.2=pyhd3eb1b0_0
   - pycparser=2.21=pyhd3eb1b0_0
   - python=3.8.16=h955ad1f_4
   - python-dateutil=2.8.2=pyhd3eb1b0_0
   - python_abi=3.8=2_cp38
   - pyzmq=25.1.0=py38h6a678d5_0
   - readline=8.2=h5eee18b_0
   - setuptools=67.8.0=py38h06a4308_0
   - tk=8.6.12=h1ccaba5_0
   - tornado=6.2=py38h5eee18b_0
   - traitlets=5.7.1=py38h06a4308_0
   - wcwidth=0.2.5=pyhd3eb1b0_0
   - wheel=0.38.4=py38h06a4308_0
   - xz=5.4.2=h5eee18b_0
   - zeromq=4.3.4=h2531618_0
   - zlib=1.2.13=h5eee18b_0
   - pip:
+      - absl-py==1.4.0
+      - aiohttp==3.8.4
+      - aiosignal==1.3.1
+      - annotated-types==0.5.0
+      - appdirs==1.4.4
+      - aquarel==0.0.5
+      - attrs==23.1.0
+      - box2d-py==2.3.5
+      - cachetools==5.3.1
+      - certifi==2023.5.7
+      - charset-normalizer==3.1.0
+      - click==8.1.6
+      - cloudpickle==2.2.1
+      - colorama==0.4.6
+      - contourpy==1.1.0
+      - cycler==0.11.0
+      - dataclasses-json==0.5.14
+      - docker-pycreds==0.4.0
+      - filelock==3.12.3
+      - fonttools==4.40.0
+      - fsspec==2023.6.0
+      - gitdb==4.0.10
+      - gitpython==3.1.32
+      - google-auth==2.22.0
+      - google-auth-oauthlib==1.0.0
+      - greenlet==2.0.2
+      - grpcio==1.57.0
+      - gym==0.26.2
+      - gym-notices==0.0.8
+      - h5py==3.9.0
+      - huggingface-hub==0.16.4
+      - importlib-metadata==6.6.0
+      - importlib-resources==5.12.0
+      - kiwisolver==1.4.4
+      - langchain==0.0.270
+      - langsmith==0.0.25
+      - llvmlite==0.40.1
+      - logger==1.4
+      - loguru==0.7.0
+      - markdown==3.4.4
+      - markupsafe==2.1.3
+      - marshmallow==3.20.1
+      - matplotlib==3.7.1
+      - multidict==6.0.4
+      - mypy-extensions==1.0.0
+      - numba==0.57.1
+      - numexpr==2.8.5
+      - oauthlib==3.2.2
+      - openai==0.27.8
+      - packaging==23.1
+      - pandas==2.0.3
+      - pathtools==0.1.2
+      - pillow==9.5.0
+      - protobuf==3.19.6
+      - psutil==5.9.5
+      - pyasn1==0.5.0
+      - pyasn1-modules==0.3.0
+      - pydantic==1.10.11
+      - pydantic-core==2.6.1
+      - pygame==2.1.0
+      - pyparsing==3.0.9
+      - pytz==2023.3.post1
+      - pyyaml==6.0.1
+      - regex==2023.8.8
+      - requests==2.31.0
+      - requests-oauthlib==1.3.1
+      - rsa==4.9
+      - safetensors==0.3.3
+      - seaborn==0.13.0
+      - sentry-sdk==1.28.1
+      - setproctitle==1.3.2
+      - smmap==5.0.0
+      - sqlalchemy==2.0.20
+      - swig==4.1.1
+      - tenacity==8.2.3
+      - tensorboard==2.14.0
+      - tensorboard-data-server==0.7.1
+      - tiktoken==0.4.0
+      - timeout-decorator==0.5.0
+      - tokenizers==0.13.3
+      - tqdm==4.65.0
+      - transformers==4.30.2
+      - typing-extensions==4.7.1
+      - typing-inspect==0.9.0
+      - tzdata==2023.3
+      - urllib3==1.26.16
+      - v==1
+      - wandb==0.15.5
+      - werkzeug==2.3.7
+      - win32-setctime==1.1.0
+      - yarl==1.9.2
+      - zipp==3.15.0

envs/classic_control/acrobot_policies.py CHANGED Viewed

@@ -6,7 +6,7 @@ import numpy as np
 def dedicated_1_policy(state, pre_action=1):
     def get_description():
         return "Always select action 1"
-    dedicated_0_policy.description = get_description()
     return 1
 def dedicated_2_policy(state, pre_action=1):

 def dedicated_1_policy(state, pre_action=1):
     def get_description():
         return "Always select action 1"
+    dedicated_1_policy.description = get_description()
     return 1
 def dedicated_2_policy(state, pre_action=1):

envs/toy_text/few_shot_examples/cliffwalking_l2.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

envs/toy_text/few_shot_examples/cliffwalking_l4.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

gen_examples.sh DELETED Viewed

@@ -1,55 +0,0 @@
-# # (Wenhao Li, 2023-09-06, 09:20)
-# # Important !!!
-# # For environment that truncate at 200 steps automatically, you could set the max_episode_len to greater than 200.
-# # Otherwise, you need to set the max_episode_len to 200 manually (for fair comparison).
-# # L2
-# ## Cartpole env
-# python gen_few_shots_examples.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
-# ## Acrobot-v1 env
-# # Note that we want to use the Acrobot-v0 but it is deprecated in gym 0.26.2.
-# # So we use Acrobot-v1 instead and set the max_episode_len to 200.
-# python gen_few_shots_examples.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider random_actor --max_episode_len 200 --n_episodes 5
-# ## MountainCar-v0 env
-# python gen_few_shots_examples.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
-# ## LunarLander-v2 env
-# python gen_few_shots_examples.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
-# # Blacjack-v1 env
-# # (Wenhao Li, 2023-09-06, 10:00)
-# # random_actor is too weak, so we need to set the n_episodes to a larger number (100).
-# # the n_episodes should be set to a smaller number for other more powerful deciders.
-# # (Wenhao Li, 2023-09-07, 20:25)
-# # reset n_episodes to 2 (default value) for fair comparison.
-# python gen_few_shots_examples.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider random_actor --max_episode_len 200 --n_episodes 5
-# # Taxi-v3 env
-# python gen_few_shots_examples.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
-# # CliffWalking-v0 env
-# python gen_few_shots_examples.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider random_actor --max_episode_len 200 --n_episodes 5
-# # FrozenLake-v1 env
-# python gen_few_shots_examples.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider random_actor --max_episode_len 1000 --n_episodes 5
-# L4
-## Cartpole env
-python gen_few_shots_examples.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider expert --policy_path RL_based/checkpoints/CartPole-v0/expert/policy.pth --max_episode_len 200 --n_episodes 5
-python gen_few_shots_examples.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider expert --policy_path RL_based/checkpoints/LunarLander-v2/expert/policy.pth --max_episode_len 200 --n_episodes 5
-python gen_few_shots_examples.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider expert --policy_path RL_based/checkpoints/Acrobot-v1/expert/policy.pth --max_episode_len 200 --n_episodes 5
-python gen_few_shots_examples.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider expert --policy_path RL_based/checkpoints/MountainCar-v0/expert/policy.pth --max_episode_len 200 --n_episodes 5
-python gen_few_shots_examples.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider expert --policy_path RL_based/checkpoints/Blackjack-v1/expert/policy.pth --max_episode_len 200 --n_episodes 5
-python gen_few_shots_examples.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider expert --policy_path RL_based/checkpoints/Taxi-v3/expert/policy.pth --max_episode_len 200 --n_episodes 5
-python gen_few_shots_examples.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider expert --policy_path RL_based/checkpoints/CliffWalking-v0/expert/policy.pth --max_episode_len 200 --n_episodes 5
-python gen_few_shots_examples.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider expert --policy_path RL_based/checkpoints/FrozenLake-v1/expert/policy.pth --max_episode_len 200 --n_episodes 5

gen_few_shots_examples.py DELETED Viewed

@@ -1,269 +0,0 @@
-import argparse
-import envs
-import deciders
-from matplotlib import animation
-import matplotlib.pyplot as plt
-import os
-import numpy as np
-import torch as th
-from envs.translator import InitSummarizer, CurrSummarizer, FutureSummarizer, Translator
-from tianshou.data import Collector, VectorReplayBuffer, ReplayBuffer
-from tianshou.policy import PPOPolicy
-from RL_based.utils import (
-    Net_GRU_Bert_tianshou,
-    Net_Bert_CLS_tianshou,
-    Net_Bert_CNN_tianshou,
-    Net_GRU_nn_emb_tianshou,
-)
-from tianshou.utils.net.common import ActorCritic
-from tianshou.utils.net.discrete import Actor, Critic
-import gym
-import json
-ENV_CLASS = {'classic_control': ['CartPole', 'Acrobot', 'MountainCar'],
-             'box2d': ['LunarLander'],
-             'toy_text': ['Blackjack', 'Taxi', 'CliffWalking', 'FrozenLake']}
-def get_env_class(env_name):
-    for key, value in ENV_CLASS.items():
-        if env_name in value:
-            return key
-    return None
-def get_fewshot_example_path(env, decider):
-    assert decider in ['random_actor', 'expert'], "decider must be random_actor or expert"
-    prompt_level = 2 if decider == 'random_actor' else 4
-    fewshot_example_path = os.path.join(
-        'envs', get_env_class(env.spec.name), 'few_shot_examples',
-        ''.join([env.spec.name.lower(), '_l', str(prompt_level), '.json']))
-    return fewshot_example_path
-# https://colab.research.google.com/drive/1DdWsGi10232orUv-reY4wsTmT0VMoHaX?usp=sharing#scrollTo=4OfVmDKk7XvG
-# LLMs bias on 0 so make the actions greater than 1 instead.
-def gen_expert_examples(environment, policy, file_path, max_episode_len=120, n_episodes=1):
-    replaybuffer = ReplayBuffer(size=1000)
-    test_collector_1 = Collector(policy, environment, replaybuffer)
-    test_collector_1.reset_env()
-    game_description = environment.get_game_description()
-    goal_description = environment.get_goal_description()
-    action_description = environment.get_action_description()
-    policy.eval()
-    data_lst = []
-    for _ in range(n_episodes):
-        test_collector_1.reset_buffer()
-        result = test_collector_1.collect(n_episode=1)
-        sample_result = replaybuffer.sample(0)[0]
-        round = 0
-        utility = 0
-        data = []
-        for transition in sample_result:
-            round += 1
-            if round > max_episode_len:
-                break
-            question = f"{transition.obs} \n {goal_description} \n {action_description} "
-            reward = transition.rew
-            utility += reward
-            answer = f"The final answer is: {transition.act + 1}"
-            data.append(
-                {
-                    "observation": transition.obs,
-                    "goal_description": goal_description,
-                    "action_description": action_description,
-                    "game_description": game_description,
-                    "action": str(transition.act + 1),
-                    "question": question,
-                    "answer": answer,
-                    "reward": reward,
-                    "cum_reward": utility,
-                }
-            )
-            print(f"Now it is round {round}")
-        data_lst.append(data)
-    # Return the final reward
-    with open(file_path, "w") as outfile:
-        json.dump(data_lst, outfile)
-    return utility
-def gen_examples(environment, decider, file_path, max_episode_len=200, n_episodes=1):
-    game_description = environment.get_game_description()
-    goal_description = environment.get_goal_description()
-    action_description = environment.get_action_description()
-    frames = []
-    utilities = []
-    data_lst = []
-    for _ in range(n_episodes):
-        # Reset the environment
-        round = 0
-        state_description, env_info = environment.reset()
-        utility = 0
-        data = []
-        for _ in range(max_episode_len):
-            # Keep asking ChatGPT for an action until it provides a valid one
-            asking_round = 0
-            action, prompt, answer, _, _, _ = decider.act(
-                state_description,
-                action_description,
-                env_info,
-                game_description,
-                goal_description,
-            )
-            # Perform the action in the environment
-            state_description, reward, terminated, truncated, env_info = environment.step_llm(
-                action
-            )
-            question = f"{state_description} \n {goal_description} \n {action_description} "
-            utility += reward
-            answer += f"The final answer is: {action}"
-            data.append(
-                {
-                    "observation": state_description,
-                    "goal_description": goal_description,
-                    "action_description": action_description,
-                    "game_description": game_description,
-                    "action": action,
-                    "question": question,
-                    "answer": answer,
-                    "reward": reward,
-                    "cum_reward": utility,
-                }
-            )
-            print(f"Now it is round {round}")
-            round += 1
-            # If the game is over, break the loop
-            if terminated or truncated:
-                print(f"Terminated!")
-                break
-        utilities.append(utility)
-        data_lst.append(data)
-    # Return the final reward
-    with open(file_path, "w") as outfile:
-        json.dump(data_lst, outfile)
-    return utility
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Generate few shots examples of a gym environment."
-    )
-    parser.add_argument(
-        "--init_summarizer",
-        type=str,
-        required=True,
-        help="The name of the init summarizer to use.",
-    )
-    parser.add_argument(
-        "--curr_summarizer",
-        type=str,
-        required=True,
-        help="The name of the curr summarizer to use.",
-    )
-    parser.add_argument(
-        "--env",
-        type=str,
-        default="base_env",
-        help="The name of the gym environment to use.",
-    )
-    parser.add_argument(
-        "--decider",
-        type=str,
-        default="naive_actor",
-        help="The actor used to select action",
-    )
-    parser.add_argument(
-        "--env_name",
-        type=str,
-        default="CartPole-v0",
-        help="The name of the gym environment to use.",
-    )
-    parser.add_argument(
-        "--max_episode_len",
-        type=int,
-        default=200,
-        help="The maximum number of steps in an episode.",
-    )
-    parser.add_argument(
-        "--num_episodes",
-        type=int,
-        default=1,
-        help="The number of episodes to collect data.",
-    )
-    parser.add_argument(
-        "--max_length",
-        type=int,
-        default=128,
-        help="The token length of the observation",
-    )
-    parser.add_argument(
-        "--trans_model_name",
-        type=str,
-        default="/home/ubuntu/LLM-Decider-Bench/RL_based/transformer_offline_distilbert",
-        help="The name of the pretrained transformer to use.",
-    )
-    parser.add_argument(
-        "--policy_path",
-        type=str,
-        default=None,
-        help="The path to the policy to be evaluated",
-    )
-    parser.add_argument(
-        "--n_episodes",
-        type=int,
-        default=2,
-        help="The number of episodes to collect data (for env where episode is too short).",
-    )
-    args = parser.parse_args()
-    # Get the specified translator, environment, and ChatGPT model
-    device = "cuda" if th.cuda.is_available() else "cpu"
-    env_class = envs.REGISTRY[args.env]
-    init_summarizer = InitSummarizer(envs.REGISTRY[args.init_summarizer])
-    curr_summarizer = CurrSummarizer(envs.REGISTRY[args.curr_summarizer])
-    translator = Translator(init_summarizer, curr_summarizer, None, env=None)
-    environment = env_class(gym.make(args.env_name, render_mode=None), translator)
-    fewshot_example_path = get_fewshot_example_path(environment, args.decider)
-    if args.decider == "expert":
-        net = Net_GRU_nn_emb_tianshou(
-            hidden_sizes=[256, 128],
-            device=device,
-            max_length=args.max_length,
-            trans_model_name=args.trans_model_name,
-        )
-        actor = Actor(net, environment.action_space.n, device=device).to(device)
-        critic = Critic(net, device=device).to(device)
-        actor_critic = ActorCritic(actor, critic)
-        optim = th.optim.Adam(actor_critic.parameters(), lr=0.0003)
-        # PPO policy
-        dist = th.distributions.Categorical
-        policy = PPOPolicy(
-            actor,
-            critic,
-            optim,
-            dist,
-            action_space=environment.action_space,
-            deterministic_eval=True,
-        )
-        policy.load_state_dict(th.load(args.policy_path))
-        utility = gen_expert_examples(
-            environment, policy, fewshot_example_path,
-            max_episode_len=args.max_episode_len, n_episodes=args.n_episodes
-        )
-    else:
-        decider_class = deciders.REGISTRY[args.decider]
-        decider = decider_class(environment.env.action_space)
-        # Evaluate the translator
-        utility = gen_examples(
-            environment, decider, fewshot_example_path,
-            max_episode_len=args.max_episode_len,
-            n_episodes=args.n_episodes
-        )
-    print(f"(Avg.) Cummulative reward: {utility}")

main_merge.py DELETED Viewed

@@ -1,365 +0,0 @@
-import argparse
-import envs
-import deciders
-import distillers
-from matplotlib import animation
-import matplotlib.pyplot as plt
-import prompts as task_prompts
-import os
-import datetime
-import time
-from collections import deque
-from envs.translator import InitSummarizer, CurrSummarizer, FutureSummarizer, Translator
-import gym
-import json
-import pandas as pd
-import random
-import numpy as np
-import datetime
-from loguru import logger
-def set_seed(seed):
-    random.seed(seed)
-def save_frames_as_gif(frames, path="./", filename="gym_animation.gif"):
-    # Mess with this to change frame size
-    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi=72)
-    patch = plt.imshow(frames[0])
-    plt.axis("off")
-    def animate(i):
-        patch.set_data(frames[i])
-    anim = animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval=50)
-    # Ensure the folder exists, if it does not exist, create it
-    os.makedirs(path, exist_ok=True)
-    print(f"file name: {filename}")
-    print(f"path name: {path}")
-    anim.save(path + filename, writer="imagemagick", fps=60)
-def evaluate_translator(translator, environment, decider, max_episode_len, logfile, args):
-    utilities = []
-    df = pd.read_csv('record_reflexion.csv', sep=',')
-    filtered_df = df[(df['env'] == args.env_name) & (df['decider'] == 'expert') & (df['level'] == 1)]
-    expert_score = filtered_df['avg_score'].item()
-    seeds = [i*100 for i in range(100)][-args.num_trails:]
-    seeds_index = -1
-    # prompt_file = "prompt.txt"
-    # f = open(prompt_file,"w+")
-    if not "Blackjack" in args.env_name:
-        curriculums = 1
-        num_trails = args.num_trails
-    else:
-        curriculums = 20
-        num_trails = args.num_trails // 20
-    for trail in range(num_trails):
-        for curriculum in range(curriculums):
-            seeds_index += 1
-            if "Blackjack" in args.env_name:
-                seed = seeds[trail*curriculums+curriculum]
-            else:
-                seed = args.seed
-            utility = _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed)
-            utilities.append(utility)
-            # TODO: set env sucess utility threshold
-        if args.decider in ['reflexion']:
-            if utility < expert_score:
-                decider.update_mem()
-        else:
-            decider.update_mem()
-# wandb.log({'memory': decider.memory})
-    # with open('./mem.json', 'w') as f:
-    #     json.dump(decider.memory, f) #, cls=NumpyArrayEncoder)
-    # f.close()
-    return utilities
-def _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed):
-    # Reset the environment
-    if not "Blackjack" in args.env_name:
-        set_seed(args.seed)
-        # Reset the environment
-        state_description, env_info = environment.reset(seed=args.seed)
-    else:
-        set_seed(seed)
-        # Reset the environment
-        state_description, env_info = environment.reset(seed=seed)
-    game_description = environment.get_game_description()
-    goal_description = environment.get_goal_description()
-    action_description = environment.get_action_description()
-    # Initialize the history
-    if args.past_horizon:
-        raise NotImplementedError
-        history = deque(maxlen=args.past_horizon)
-        env_info['history'] = history
-    # Initialize the statistics
-    frames = []
-    utility = 0
-    current_total_tokens = 0
-    current_total_cost = 0
-    columns = ["Prompt", "Response", "Action", "Return", "#All Tokens", "All Cost"]
-    start_time = datetime.datetime.now()
-    # Run the game for a maximum number of steps
-    for round in range(max_episode_len):
-        # If the past horizon is specified, keep track of the past states, actions, and rewards
-        if args.past_horizon:
-            previous_tuples = {'state': None, 'action': None, 'reward': None}
-        # Keep asking ChatGPT for an action until it provides a valid one
-        asking_round = 0
-        error_flag = True
-        retry_num = 2
-        for error_i in range(retry_num):
-            try:
-                action, prompt, response, tokens, cost = decider.act(
-                    state_description,
-                    action_description,
-                    env_info,
-                    game_description,
-                    goal_description,
-                    logfile
-                )
-                if args.past_horizon:
-                    raise NotImplementedError
-                    previous_tuples['state'] = state_description
-                # Perform the action in the environment
-                if "Continuous" in args.env_name:
-                    action = [action]
-                state_description, reward, termination, truncation, env_info = environment.step_llm(
-                    action
-                )
-                utility += reward
-                if args.past_horizon:
-                    raise NotImplementedError
-                    previous_tuples['action'] = action
-                    previous_tuples['reward'] = reward
-                    history.append(previous_tuples)
-                    env_info['history'] = history
-                # Update the statistics
-                current_total_tokens += tokens
-                current_total_cost += cost
-                error_flag = False
-                break
-            except Exception as e:
-                print(e)
-                if error_i < retry_num-1:
-                    decider.env_history.remove_invalid_state()
-                if logger:
-                    logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
-                continue
-        # If the action is still invalid after 5 tries, use the default action
-        # file.write(prompt+"\n"+"======================================\n")
-        if error_flag:
-            if "Continuous" in args.env_name:
-                action = [decider.default_action]
-            else:
-                action = decider.default_action
-            state_description, reward, termination, truncation, env_info = environment.step_llm(
-                    action
-                )
-            utility += reward
-            if args.past_horizon:
-                raise NotImplementedError
-                previous_tuples['action'] = action
-                previous_tuples['reward'] = reward
-                history.append(previous_tuples)
-                env_info['history'] = history
-            # Update the statistics
-            decider.env_history.add('action', decider.default_action)
-            logger.info(f'The optimal action is: {decider.default_action}.')
-            logger.info(f"Now it is round {round}.")
-        else:
-            current_total_tokens += tokens
-            current_total_cost += cost
-            # print(prompt)
-            logger.info(f"current_total_tokens: {current_total_tokens}")
-            logger.info(f"current_total_cost: {current_total_cost}")
-            logger.info(f"Now it is round {round}.")
-        frames.append(environment.render())
-        # If the game is over, break the loop
-        if termination or truncation:
-            if logger:
-                logger.info(f"Terminated!")
-            # save_frames_as_gif(
-            #     frames,
-            #     path=f"./images/{environment.env_name}/",
-            #     filename=f"{translator.__class__.__name__}.gif",
-            # )
-            break
-        time.sleep(1)
-    decider.env_history.add("cummulative_reward", str(utility))
-    # Record the final reward
-    if logger:
-        logger.info(f"Cummulative reward: {utility}.")
-        end_time = datetime.datetime.now()
-        time_diff = end_time - start_time
-        logger.info(f"Time consumer: {time_diff.total_seconds()} s")
-    return utility
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Evaluate a translator in a gym environment with a ChatGPT model."
-    )
-    parser.add_argument(
-        "--init_summarizer",
-        type=str,
-        required=True,
-        help="The name of the init summarizer to use.",
-    )
-    parser.add_argument(
-        "--curr_summarizer",
-        type=str,
-        required=True,
-        help="The name of the curr summarizer to use.",
-    )
-    parser.add_argument(
-        "--future_summarizer",
-        type=str,
-        help="The name of the future summarizer to use.",
-    )
-    parser.add_argument(
-        "--env",
-        type=str,
-        default="base_env",
-        help="The name of the gym environment to use.",
-    )
-    parser.add_argument(
-        "--env_name",
-        type=str,
-        default="CartPole-v0",
-        help="The name of the gym environment to use.",
-    )
-    parser.add_argument(
-        "--decider",
-        type=str,
-        default="spp_actor",
-        help="The actor used to select action",
-    )
-    parser.add_argument(
-        "--gpt_version", type=str, default="gpt-35-turbo", help="The version of GPT to use"
-    )
-    parser.add_argument(
-        "--render", type=str, default="rgb_array", help="The render mode"
-    )
-    parser.add_argument(
-        "--max_episode_len",
-        type=int,
-        default=200,
-        help="The maximum number of steps in an episode",
-    )
-    parser.add_argument(
-        "--past_horizon", type=int, help="The horizon of looking back"
-    )
-    parser.add_argument(
-        "--future_horizon", type=int, help="The horizon of looking to the future"
-    )
-    parser.add_argument(
-        "--distiller",
-        type=str,
-        default="traj_distiller",
-        help="The distiller used to generate a few shot examples from traj",
-    )
-    parser.add_argument(
-        "--prompt_path",
-        type=str,
-        default="envs/classic_control/few_shot_examples/cartpole",
-        help="The path of prompts",
-    )
-    parser.add_argument(
-        "--prompt_level",
-        type=int,
-        default=1,
-        help="The level of prompts",
-    )
-    parser.add_argument(
-        "--num_trails",
-        type=int,
-        default=5,
-        help="The number of trials",
-    )
-    parser.add_argument(
-        "--use_short_mem",
-        type=int,
-        default=1,
-        help="Whether use short mem",
-    )
-    parser.add_argument(
-        "--seed",
-        type=int,
-        default=100,
-        help="set seed",
-    )
-    parser.add_argument(
-        "--short_mem_num",
-        type=int,
-        default=10,
-        help="Set numbers of short memories used in actor, if use_short_mem = 1"
-    )
-    args = parser.parse_args()
-    # Get the specified translator, environment, and ChatGPT model
-    env_class = envs.REGISTRY[args.env]
-    init_summarizer = InitSummarizer(envs.REGISTRY[args.init_summarizer])
-    curr_summarizer = CurrSummarizer(envs.REGISTRY[args.curr_summarizer])
-    if args.future_summarizer:
-        future_summarizer = FutureSummarizer(
-            envs.REGISTRY[args.future_summarizer],
-            envs.REGISTRY["cart_policies"],
-            future_horizon=args.future_horizon,
-        )
-    else:
-        future_summarizer = None
-    decider_class = deciders.REGISTRY[args.decider]
-    distiller_class = distillers.REGISTRY[args.distiller](args=args)
-    sampling_env = envs.REGISTRY["sampling_wrapper"](gym.make(args.env_name))
-    if args.prompt_level == 5:
-        prompts_class = task_prompts.REGISTRY[(args.env_name,args.decider)]()
-    else:
-        prompts_class = task_prompts.REGISTRY[(args.decider)]()
-    translator = Translator(
-        init_summarizer, curr_summarizer, future_summarizer, env=sampling_env
-    )
-    environment = env_class(
-        gym.make(args.env_name, render_mode=args.render), translator
-    )
-    logfile = (
-        f"llm.log/output-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
-        f"-{datetime.datetime.now().timestamp()}.log"
-    )
-    if "reflexion" in args.decider or "jarvis" in args.decider:
-        logfile_reflexion = (
-        f"llm.log/memory-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
-        f"-{datetime.datetime.now().timestamp()}.log"
-    )
-        my_distiller = distiller_class(logfile_reflexion)
-    else:
-        my_distiller = distiller_class()
-    args.game_description = environment.game_description
-    args.goal_description = environment.goal_description
-    args.action_description = environment.action_description
-    logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
-    decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger)
-    # Evaluate the translator
-    evaluate_translator(translator, environment, decider, args.max_episode_len, logfile, args)

main_merge.sh DELETED Viewed

@@ -1,123 +0,0 @@
-# L1: --prompt_level 1; L2: --prompt_level 2 --distiller traj_distiller; L4: --prompt_level 4 --distiller traj_distiller; L5: --prompt_level 5
-# Use History: --use_short_mem 1 or --use_short_mem 0 (default)
-# prompt_level default: 1
-# CartPole-v0
-# L1
-# Naive Actor
-python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --seed 0
-# PAL
-python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --seed 0
-# COT
-python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --seed 0
-# self consistency
-python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --seed 0
-# self-ask
-python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --seed 0
-# SPP
-python main_merge.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --seed 0
-# LunarLander-v2
-# L1
-# Naive Actor
-python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider naive_actor --seed 0
-# PAL
-python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider pal_actor --seed 0
-# COT
-python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider cot_actor --seed 0
-# self consistency
-python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider self_consistency_actor --seed 0
-# self-ask
-python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider selfask_actor --seed 0
-# SPP
-python main_merge.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --decider spp_actor --prompt_level 1 --seed 0
-# Acrobot-v1
-# L1
-# Naive Actor
-# python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 1
-# # PAL
-# python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 1
-# # COT
-# python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 1
-# # self consistency
-# python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 1
-# # self-ask
-# python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 1
-# # SPP
-# python main_merge.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 1
-# MountainCar-v0
-# L1
-# Naive Actor
-# python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider naive_actor --prompt_level 1
-# # PAL
-# python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider pal_actor --prompt_level 1
-# # COT
-# python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider cot_actor --prompt_level 1
-# # self consistency
-# python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider self_consistency_actor --prompt_level 1
-# # self-ask
-# python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider selfask_actor --prompt_level 1
-# # SPP
-# python main_merge.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider spp_actor --prompt_level 1
-# Blackjack-v1
-# L1
-# Naive Actor
-python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 1 --seed 0
-# PAL
-python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider pal_actor --prompt_level 1 --seed 0
-# COT
-python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 1 --seed 0
-# self consistency
-python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 1 --seed 0
-# self-ask
-python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 1 --seed 0
-# SPP
-python main_merge.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 1 --seed 0
-# Taxi-v3
-# L1
-# Naive Actor
-# python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider naive_actor --prompt_level 1
-# # PAL
-# python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider pal_actor --prompt_level 1
-# # COT
-# python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider cot_actor --prompt_level 1
-# # self consistency
-# python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider self_consistency_actor --prompt_level 1
-# # self-ask
-# python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider selfask_actor --prompt_level 1
-# # SPP
-# python main_merge.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider spp_actor --prompt_level 1
-# CliffWalking-v0
-# L1
-# Naive Actor
-# python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 1
-# # PAL
-# python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 1
-# # COT
-# python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 1
-# # self consistency
-# python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 1
-# # self-ask
-# python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 1
-# # SPP
-# python main_merge.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 1
-# FrozenLake-v1
-# L1
-# Naive Actor
-python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 1 --seed 0
-# PAL
-python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 1 --seed 0
-# COT
-python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 1 --seed 0
-# self consistency
-python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 1 --seed 0
-# self-ask
-python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 1 --seed 0
-# SPP
-python main_merge.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 1 --seed 0

main_reflexion.py CHANGED Viewed

@@ -70,10 +70,6 @@ def evaluate_translator(translator, environment, decider, max_episode_len, logfi
                 else:
                     decider.update_mem()
         decider.clear_mem()
-# wandb.log({'memory': decider.memory})
-    # with open('./mem.json', 'w') as f:
-    #     json.dump(decider.memory, f) #, cls=NumpyArrayEncoder)
-    # f.close()
     return utilities
 def _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed):
@@ -91,27 +87,15 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
     goal_description = environment.get_goal_description()
     action_description = environment.get_action_description()
-    # Initialize the history
-    if args.past_horizon:
-        raise NotImplementedError
-        history = deque(maxlen=args.past_horizon)
-        env_info['history'] = history
     # Initialize the statistics
     frames = []
     utility = 0
     current_total_tokens = 0
     current_total_cost = 0
-    columns = ["Prompt", "Response", "Action", "Return", "#All Tokens", "All Cost"]
     start_time = datetime.datetime.now()
     # Run the game for a maximum number of steps
     for round in range(max_episode_len):
-        # If the past horizon is specified, keep track of the past states, actions, and rewards
-        if args.past_horizon:
-            previous_tuples = {'state': None, 'action': None, 'reward': None}
         # Keep asking ChatGPT for an action until it provides a valid one
-        asking_round = 0
         error_flag = True
         retry_num = 1
         for error_i in range(retry_num):
@@ -125,29 +109,19 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
                     logfile
                 )
-                if args.past_horizon:
-                    raise NotImplementedError
-                    previous_tuples['state'] = state_description
-                # Perform the action in the environment
                 if "Continuous" in args.env_name:
                     action = [action]
                 state_description, reward, termination, truncation, env_info = environment.step_llm(
                     action
                 )
                 if "Cliff" in args.env_name or "Frozen" in args.env_name:
                     decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
                 utility += reward
-                if args.past_horizon:
-                    raise NotImplementedError
-                    previous_tuples['action'] = action
-                    previous_tuples['reward'] = reward
-                    history.append(previous_tuples)
-                    env_info['history'] = history
                 # Update the statistics
                 current_total_tokens += tokens
                 current_total_cost += cost
@@ -162,8 +136,6 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
                 if logger:
                     logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
                 continue
-        # If the action is still invalid after 5 tries, use the default action
-        # file.write(prompt+"\n"+"======================================\n")
         if error_flag:
             if "Continuous" in args.env_name:
                 action = [decider.default_action]
@@ -180,14 +152,6 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
                 decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
             utility += reward
-            if args.past_horizon:
-                raise NotImplementedError
-                previous_tuples['action'] = action
-                previous_tuples['reward'] = reward
-                history.append(previous_tuples)
-                env_info['history'] = history
-            # Update the statistics
             logger.info(f"Seed: {seed}")
             logger.info(f'The optimal action is: {decider.default_action}.')
@@ -195,23 +159,15 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
         else:
             current_total_tokens += tokens
             current_total_cost += cost
-            # print(prompt)
             logger.info(f"Seed: {seed}")
             logger.info(f"current_total_tokens: {current_total_tokens}")
             logger.info(f"current_total_cost: {current_total_cost}")
             logger.info(f"Now it is round {round}.")
         frames.append(environment.render())
-        # If the game is over, break the loop
         if termination or truncation:
             if logger:
                 logger.info(f"Terminated!")
-            # save_frames_as_gif(
-            #     frames,
-            #     path=f"./images/{environment.env_name}/",
-            #     filename=f"{translator.__class__.__name__}.gif",
-            # )
             break
         time.sleep(1)
     decider.env_history.add('terminate_state', environment.get_terminate_state(round+1, max_episode_len))
@@ -277,10 +233,16 @@ if __name__ == "__main__":
         help="The maximum number of steps in an episode",
     )
     parser.add_argument(
-        "--past_horizon", type=int, help="The horizon of looking back"
     )
     parser.add_argument(
-        "--future_horizon", type=int, help="The horizon of looking to the future"
     )
     parser.add_argument(
         "--distiller",
@@ -306,12 +268,6 @@ if __name__ == "__main__":
         default=5,
         help="The number of trials",
     )
-    parser.add_argument(
-        "--trajectories_num",
-        type=int,
-        default=20,
-        help="The number of trials",
-    )
     parser.add_argument(
         "--use_short_mem",
         type=int,
@@ -327,7 +283,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "--short_mem_num",
         type=int,
-        default=20,
         help="Set numbers of short memories used in actor, if use_short_mem = 1"
     )
     parser.add_argument(
@@ -370,14 +326,13 @@ if __name__ == "__main__":
         f"llm.log/output-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
         f"-{datetime.datetime.now().timestamp()}.log"
     )
-    if "reflexion" in args.decider or "jarvis" in args.decider:
-        logfile_reflexion = (
         f"llm.log/memory-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
         f"-{datetime.datetime.now().timestamp()}.log"
     )
-        my_distiller = distiller_class(logfile_reflexion,args=args)
-    else:
-        my_distiller = distiller_class(args=args)
     args.game_description = environment.game_description
     args.goal_description = environment.goal_description
     args.action_description = environment.action_description
@@ -386,11 +341,6 @@ if __name__ == "__main__":
     logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
-    fixed_suggestion = None
-    fixed_insight = None
-    if "jarvis" in args.decider:
-        decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger, fixed_suggestion=fixed_suggestion, fixed_insight=fixed_insight)
-    else:
-        decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger)
     # Evaluate the translator
     evaluate_translator(translator, environment, decider, args.max_episode_len, logfile, args)

                 else:
                     decider.update_mem()
         decider.clear_mem()
     return utilities
 def _run(translator, environment, decider, max_episode_len, logfile, args, trail, seed):
     goal_description = environment.get_goal_description()
     action_description = environment.get_action_description()
     # Initialize the statistics
     frames = []
     utility = 0
     current_total_tokens = 0
     current_total_cost = 0
     start_time = datetime.datetime.now()
     # Run the game for a maximum number of steps
     for round in range(max_episode_len):
         # Keep asking ChatGPT for an action until it provides a valid one
         error_flag = True
         retry_num = 1
         for error_i in range(retry_num):
                     logfile
                 )
                 if "Continuous" in args.env_name:
                     action = [action]
                 state_description, reward, termination, truncation, env_info = environment.step_llm(
                     action
                 )
                 if "Cliff" in args.env_name or "Frozen" in args.env_name:
                     decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
+                else:
+                    decider.env_history.add('reward', f"The player get rewards {reward}.")
                 utility += reward
                 # Update the statistics
                 current_total_tokens += tokens
                 current_total_cost += cost
                 if logger:
                     logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
                 continue
         if error_flag:
             if "Continuous" in args.env_name:
                 action = [decider.default_action]
                 decider.env_history.add('reward', env_info['potential_state'] + environment.reward_desc_dict[reward])
             utility += reward
             logger.info(f"Seed: {seed}")
             logger.info(f'The optimal action is: {decider.default_action}.')
         else:
             current_total_tokens += tokens
             current_total_cost += cost
             logger.info(f"Seed: {seed}")
             logger.info(f"current_total_tokens: {current_total_tokens}")
             logger.info(f"current_total_cost: {current_total_cost}")
             logger.info(f"Now it is round {round}.")
         frames.append(environment.render())
         if termination or truncation:
             if logger:
                 logger.info(f"Terminated!")
             break
         time.sleep(1)
     decider.env_history.add('terminate_state', environment.get_terminate_state(round+1, max_episode_len))
         help="The maximum number of steps in an episode",
     )
     parser.add_argument(
+        "--max_query_tokens",
+        type=int,
+        default=5000,
+        help="The maximum number of tokens when querying",
     )
     parser.add_argument(
+        "--max_tokens",
+        type=int,
+        default=2000,
+        help="The maximum number of tokens when responding",
     )
     parser.add_argument(
         "--distiller",
         default=5,
         help="The number of trials",
     )
     parser.add_argument(
         "--use_short_mem",
         type=int,
     parser.add_argument(
         "--short_mem_num",
         type=int,
+        default=10,
         help="Set numbers of short memories used in actor, if use_short_mem = 1"
     )
     parser.add_argument(
         f"llm.log/output-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
         f"-{datetime.datetime.now().timestamp()}.log"
     )
+    logfile_reflexion = (
         f"llm.log/memory-{args.env_name}-{args.decider}-{args.gpt_version}-l{args.prompt_level}"
         f"-{datetime.datetime.now().timestamp()}.log"
     )
+    my_distiller = distiller_class(logfile=logfile_reflexion,args=args)
     args.game_description = environment.game_description
     args.goal_description = environment.goal_description
     args.action_description = environment.action_description
     logger.add(logfile, colorize=True, enqueue=True, filter=lambda x: '[Reflexion Memory]' not in x['message'])
+    decider = decider_class(environment.env.action_space, args, prompts_class, my_distiller, temperature=0.0, logger=logger, max_tokens=args.max_tokens)
     # Evaluate the translator
     evaluate_translator(translator, environment, decider, args.max_episode_len, logfile, args)

memory/env_history.py CHANGED Viewed

@@ -17,7 +17,7 @@ class EnvironmentHistory:
     def __str__(self) -> str:
         s = ''
-        for i, item in enumerate(self._history[-150:]):
             if item['label'] == 'action':
                 s += f'He takes action: {item["value"]}'
             elif item['label'] == 'observation':
@@ -137,4 +137,5 @@ class EnvironmentHistory:
         self._history = self._history[:-1]
     def __len__(self) -> int:
-        return len(self._history)

     def __str__(self) -> str:
         s = ''
+        for i, item in enumerate(self._history):
             if item['label'] == 'action':
                 s += f'He takes action: {item["value"]}'
             elif item['label'] == 'observation':
         self._history = self._history[:-1]
     def __len__(self) -> int:
+        action = [item for item in self._history if item['label'] == 'action' ]
+        return len(action)

prompts/__init__.py CHANGED Viewed

@@ -18,10 +18,7 @@ REGISTRY[('self_consistency_actor')] = prompts.CONSISTENCY
 REGISTRY[('selfask_actor')] = prompts.SELFASK
 REGISTRY[('spp_actor')] = prompts.SPP
 REGISTRY[('reflexion_actor')] = prompts.REFLEXION
-REGISTRY[('jarvis_actor')] = prompts.JARVIS
-REGISTRY[('jarvis_actor_woi')] = prompts.JARVIS
-REGISTRY[('jarvis_actor_wosug')] = prompts.JARVIS
-REGISTRY[('jarvis_actor_wosh')] = prompts.JARVIS
 # CartPole-v0
 REGISTRY[("CartPole-v0","naive_actor")] = cartpole.ACT
@@ -31,10 +28,7 @@ REGISTRY[("CartPole-v0",'self_consistency_actor')] = cartpole.CONSISTENCY
 REGISTRY[("CartPole-v0",'selfask_actor')] = cartpole.SELFASK
 REGISTRY[("CartPole-v0",'spp_actor')] = cartpole.SPP
 REGISTRY[("CartPole-v0",'reflexion_actor')] = cartpole.REFLEXION
-REGISTRY[("CartPole-v0",'jarvis_actor')] = cartpole.EGG
-REGISTRY[("CartPole-v0",'jarvis_actor_woi')] = cartpole.EGGWithoutInsights
-REGISTRY[("CartPole-v0",'jarvis_actor_wosug')] = cartpole.EGGWithoutSuggestions
-REGISTRY[("CartPole-v0",'jarvis_actor_wosh')] = cartpole.EGG
 # LunarLander-v2
 REGISTRY[("LunarLander-v2","naive_actor")] = LunarLander.ACT
@@ -44,10 +38,7 @@ REGISTRY[("LunarLander-v2",'self_consistency_actor')] = LunarLander.CONSISTENCY
 REGISTRY[("LunarLander-v2",'selfask_actor')] = LunarLander.SELFASK
 REGISTRY[("LunarLander-v2",'spp_actor')] = LunarLander.SPP
 REGISTRY[("LunarLander-v2",'reflexion_actor')] = LunarLander.REFLEXION
-REGISTRY[("LunarLander-v2",'jarvis_actor')] = LunarLander.EGG
-REGISTRY[("LunarLander-v2",'jarvis_actor_woi')] = LunarLander.EGGWithoutInsights
-REGISTRY[("LunarLander-v2",'jarvis_actor_wosug')] = LunarLander.EGGWithoutSuggestions
-REGISTRY[("LunarLander-v2",'jarvis_actor_wosh')] = LunarLander.EGG
 # Acrobot-v1
@@ -58,10 +49,7 @@ REGISTRY[("Acrobot-v1",'self_consistency_actor')] = acrobot.CONSISTENCY
 REGISTRY[("Acrobot-v1",'selfask_actor')] = acrobot.SELFASK
 REGISTRY[("Acrobot-v1",'spp_actor')] = acrobot.SPP
 REGISTRY[("Acrobot-v1",'reflexion_actor')] = acrobot.REFLEXION
-REGISTRY[("Acrobot-v1",'jarvis_actor')] = acrobot.EGG
-REGISTRY[("Acrobot-v1",'jarvis_actor_woi')] = acrobot.EGGWithoutInsights
-REGISTRY[("Acrobot-v1",'jarvis_actor_wosug')] = acrobot.EGGWithoutSuggestions
-REGISTRY[("Acrobot-v1",'jarvis_actor_wosh')] = acrobot.EGG
 # MountainCar-v0
 REGISTRY[("MountainCar-v0","naive_actor")] = mountaincar.ACT
@@ -71,10 +59,7 @@ REGISTRY[("MountainCar-v0",'self_consistency_actor')] = mountaincar.CONSISTENCY
 REGISTRY[("MountainCar-v0",'selfask_actor')] = mountaincar.SELFASK
 REGISTRY[("MountainCar-v0",'spp_actor')] = mountaincar.SPP
 REGISTRY[("MountainCar-v0",'reflexion_actor')] = mountaincar.REFLEXION
-REGISTRY[("MountainCar-v0",'jarvis_actor')] = mountaincar.EGG
-REGISTRY[("MountainCar-v0",'jarvis_actor_woi')] = mountaincar.EGGWithoutInsights
-REGISTRY[("MountainCar-v0",'jarvis_actor_wosug')] = mountaincar.EGGWithoutSuggestions
-REGISTRY[("MountainCar-v0",'jarvis_actor_wosh')] = mountaincar.EGG
 # Blackjack-v1
 REGISTRY[("Blackjack-v1","naive_actor")] = blackjack.ACT
@@ -84,10 +69,7 @@ REGISTRY[("Blackjack-v1",'self_consistency_actor')] = blackjack.CONSISTENCY
 REGISTRY[("Blackjack-v1",'selfask_actor')] = blackjack.SELFASK
 REGISTRY[("Blackjack-v1",'spp_actor')] = blackjack.SPP
 REGISTRY[("Blackjack-v1",'reflexion_actor')] = blackjack.REFLEXION
-REGISTRY[("Blackjack-v1",'jarvis_actor')] = blackjack.EGG
-REGISTRY[("Blackjack-v1",'jarvis_actor_woi')] = blackjack.EGGWithoutInsights
-REGISTRY[("Blackjack-v1",'jarvis_actor_wosug')] = blackjack.EGGWithoutSuggestions
-REGISTRY[("Blackjack-v1",'jarvis_actor_wosh')] = blackjack.EGG
 # Taxi-v3
 REGISTRY[("Taxi-v3","naive_actor")] = taxi.ACT
@@ -97,11 +79,7 @@ REGISTRY[("Taxi-v3",'self_consistency_actor')] = taxi.CONSISTENCY
 REGISTRY[("Taxi-v3",'selfask_actor')] = taxi.SELFASK
 REGISTRY[("Taxi-v3",'spp_actor')] = taxi.SPP
 REGISTRY[("Taxi-v3",'reflexion_actor')] = taxi.REFLEXION
-REGISTRY[("Taxi-v3",'jarvis_actor')] = taxi.EGG
-REGISTRY[("Taxi-v3",'jarvis_actor_woi')] = taxi.EGGWithoutInsights
-REGISTRY[("Taxi-v3",'jarvis_actor_wosug')] = taxi.EGGWithoutSuggestions
-REGISTRY[("Taxi-v3",'jarvis_actor_wosh')] = taxi.EGG
 # CliffWalking-v0
 REGISTRY[("CliffWalking-v0","naive_actor")] = cliffwalking.ACT
 REGISTRY[("CliffWalking-v0","cot_actor")] = cliffwalking.COT
@@ -110,10 +88,7 @@ REGISTRY[("CliffWalking-v0",'self_consistency_actor')] = cliffwalking.CONSISTENC
 REGISTRY[("CliffWalking-v0",'selfask_actor')] = cliffwalking.SELFASK
 REGISTRY[("CliffWalking-v0",'spp_actor')] = cliffwalking.SPP
 REGISTRY[("CliffWalking-v0",'reflexion_actor')] = cliffwalking.REFLEXION
-REGISTRY[("CliffWalking-v0",'jarvis_actor')] = cliffwalking.EGG
-REGISTRY[("CliffWalking-v0",'jarvis_actor_woi')] = cliffwalking.EGGWithoutInsights
-REGISTRY[("CliffWalking-v0",'jarvis_actor_wosug')] = cliffwalking.EGGWithoutSuggestions
-REGISTRY[("CliffWalking-v0",'jarvis_actor_wosh')] = cliffwalking.EGG
 # FrozenLake-v1
 REGISTRY[("FrozenLake-v1","naive_actor")] = frozenlake.ACT
@@ -123,10 +98,7 @@ REGISTRY[("FrozenLake-v1",'self_consistency_actor')] = frozenlake.CONSISTENCY
 REGISTRY[("FrozenLake-v1",'selfask_actor')] = frozenlake.SELFASK
 REGISTRY[("FrozenLake-v1",'spp_actor')] = frozenlake.SPP
 REGISTRY[("FrozenLake-v1",'reflexion_actor')] = frozenlake.REFLEXION
-REGISTRY[("FrozenLake-v1",'jarvis_actor')] = frozenlake.EGG
-REGISTRY[("FrozenLake-v1",'jarvis_actor_woi')] = frozenlake.EGGWithoutInsights
-REGISTRY[("FrozenLake-v1",'jarvis_actor_wosug')] = frozenlake.EGGWithoutSuggestions
-REGISTRY[("FrozenLake-v1",'jarvis_actor_wosh')] = frozenlake.EGG
 # MountainCarContinuous-v0
 REGISTRY[("MountainCarContinuous-v0","naive_actor")] = mountaincarContinuous.ACT
@@ -136,7 +108,4 @@ REGISTRY[("MountainCarContinuous-v0",'self_consistency_actor')] = mountaincarCon
 REGISTRY[("MountainCarContinuous-v0",'selfask_actor')] = mountaincarContinuous.SELFASK
 REGISTRY[("MountainCarContinuous-v0",'spp_actor')] = mountaincarContinuous.SPP
 REGISTRY[("MountainCarContinuous-v0",'reflexion_actor')] = mountaincarContinuous.REFLEXION
-REGISTRY[("MountainCarContinuous-v0",'jarvis_actor')] = mountaincarContinuous.EGG
-REGISTRY[("MountainCarContinuous-v0",'jarvis_actor_woi')] = mountaincarContinuous.EGGWithoutInsights
-REGISTRY[("MountainCarContinuous-v0",'jarvis_actor_wosug')] = mountaincarContinuous.EGGWithoutSuggestions
-REGISTRY[("MountainCarContinuous-v0",'jarvis_actor_wosh')] = mountaincarContinuous.EGG

 REGISTRY[('selfask_actor')] = prompts.SELFASK
 REGISTRY[('spp_actor')] = prompts.SPP
 REGISTRY[('reflexion_actor')] = prompts.REFLEXION
+REGISTRY[('exe_actor')] = prompts.EXE
 # CartPole-v0
 REGISTRY[("CartPole-v0","naive_actor")] = cartpole.ACT
 REGISTRY[("CartPole-v0",'selfask_actor')] = cartpole.SELFASK
 REGISTRY[("CartPole-v0",'spp_actor')] = cartpole.SPP
 REGISTRY[("CartPole-v0",'reflexion_actor')] = cartpole.REFLEXION
+REGISTRY[("CartPole-v0",'exe_actor')] = cartpole.EXE
 # LunarLander-v2
 REGISTRY[("LunarLander-v2","naive_actor")] = LunarLander.ACT
 REGISTRY[("LunarLander-v2",'selfask_actor')] = LunarLander.SELFASK
 REGISTRY[("LunarLander-v2",'spp_actor')] = LunarLander.SPP
 REGISTRY[("LunarLander-v2",'reflexion_actor')] = LunarLander.REFLEXION
+REGISTRY[("LunarLander-v2",'exe_actor')] = LunarLander.EXE
 # Acrobot-v1
 REGISTRY[("Acrobot-v1",'selfask_actor')] = acrobot.SELFASK
 REGISTRY[("Acrobot-v1",'spp_actor')] = acrobot.SPP
 REGISTRY[("Acrobot-v1",'reflexion_actor')] = acrobot.REFLEXION
+REGISTRY[("Acrobot-v1",'exe_actor')] = acrobot.EXE
 # MountainCar-v0
 REGISTRY[("MountainCar-v0","naive_actor")] = mountaincar.ACT
 REGISTRY[("MountainCar-v0",'selfask_actor')] = mountaincar.SELFASK
 REGISTRY[("MountainCar-v0",'spp_actor')] = mountaincar.SPP
 REGISTRY[("MountainCar-v0",'reflexion_actor')] = mountaincar.REFLEXION
+REGISTRY[("MountainCar-v0",'exe_actor')] = mountaincar.EXE
 # Blackjack-v1
 REGISTRY[("Blackjack-v1","naive_actor")] = blackjack.ACT
 REGISTRY[("Blackjack-v1",'selfask_actor')] = blackjack.SELFASK
 REGISTRY[("Blackjack-v1",'spp_actor')] = blackjack.SPP
 REGISTRY[("Blackjack-v1",'reflexion_actor')] = blackjack.REFLEXION
+REGISTRY[("Blackjack-v1",'exe_actor')] = blackjack.EXE
 # Taxi-v3
 REGISTRY[("Taxi-v3","naive_actor")] = taxi.ACT
 REGISTRY[("Taxi-v3",'selfask_actor')] = taxi.SELFASK
 REGISTRY[("Taxi-v3",'spp_actor')] = taxi.SPP
 REGISTRY[("Taxi-v3",'reflexion_actor')] = taxi.REFLEXION
+REGISTRY[("Taxi-v3",'exe_actor')] = taxi.EXE
 # CliffWalking-v0
 REGISTRY[("CliffWalking-v0","naive_actor")] = cliffwalking.ACT
 REGISTRY[("CliffWalking-v0","cot_actor")] = cliffwalking.COT
 REGISTRY[("CliffWalking-v0",'selfask_actor')] = cliffwalking.SELFASK
 REGISTRY[("CliffWalking-v0",'spp_actor')] = cliffwalking.SPP
 REGISTRY[("CliffWalking-v0",'reflexion_actor')] = cliffwalking.REFLEXION
+REGISTRY[("CliffWalking-v0",'exe_actor')] = cliffwalking.EXE
 # FrozenLake-v1
 REGISTRY[("FrozenLake-v1","naive_actor")] = frozenlake.ACT
 REGISTRY[("FrozenLake-v1",'selfask_actor')] = frozenlake.SELFASK
 REGISTRY[("FrozenLake-v1",'spp_actor')] = frozenlake.SPP
 REGISTRY[("FrozenLake-v1",'reflexion_actor')] = frozenlake.REFLEXION
+REGISTRY[("FrozenLake-v1",'exe_actor')] = frozenlake.EXE
 # MountainCarContinuous-v0
 REGISTRY[("MountainCarContinuous-v0","naive_actor")] = mountaincarContinuous.ACT
 REGISTRY[("MountainCarContinuous-v0",'selfask_actor')] = mountaincarContinuous.SELFASK
 REGISTRY[("MountainCarContinuous-v0",'spp_actor')] = mountaincarContinuous.SPP
 REGISTRY[("MountainCarContinuous-v0",'reflexion_actor')] = mountaincarContinuous.REFLEXION
+REGISTRY[("MountainCarContinuous-v0",'exe_actor')] = mountaincarContinuous.EXE

prompts/task_irrelevant/prompts.py CHANGED Viewed

@@ -2,7 +2,7 @@ class ACT:
     def __init__(self):
         self.TASK_IRRELEVANT_PROMPTS = []
-class JARVIS:
     def __init__(self):
         self.TASK_IRRELEVANT_PROMPTS = []

     def __init__(self):
         self.TASK_IRRELEVANT_PROMPTS = []
+class EXE:
     def __init__(self):
         self.TASK_IRRELEVANT_PROMPTS = []

prompts/task_relevant/box2d/LunarLander.py CHANGED Viewed

@@ -464,7 +464,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -510,79 +510,3 @@ class EGG:
                     """
             },
         ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: The lander is at position (-0.01, 1.39), the horizontal speed of movement is -0.65, the vertical velocity speed of movement is -0.41. The angle is 0.01 radians, and it's rotating at 0.13 radians per second. The left leg is not in contact with ground. The right leg is not in contact with ground.
-                Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
-                Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The suggestions are listed below:
-                1. For exploration in the next episode, the player should focus on gathering more information about the lander's movement and behavior during descent. This can be done by varying the thrust and angle adjustments to see how the lander responds, and observing any patterns or trends in its movement. The player can also try different starting positions and initial forces to see how they affect the lander's trajectory.
-                2. To improve the policy for higher performance in the next episode, the player should focus on fine-tuning the thrust and angle adjustments to optimize the lander's descent and landing. This can be done by analyzing the data gathered from exploration and adjusting the policy accordingly. The player should also pay attention to the lander's rotation and angle to ensure a gentle landing on the pad.
-                3. The player should weigh exploration and exploitation equally in the next episode, as both are important for improving the policy and achieving a successful landing. The player should continue to gather information through exploration while also using that information to make informed decisions during exploitation.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
-                    """
-            },
-            {
-                "question":
-                    """
-                State description: The lander is at position (0.31, 0.04), the horizontal speed of movement is -0.21, the vertical velocity speed of movement is -0.09. The angle is 0.24 radians, and it's rotating at 0.17 radians per second. The left leg is not in contact with ground. The right leg is in contact with ground.
-                Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
-                Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The suggestions are listed below:
-                1. Exploration:
-                - The player should explore the effect of adjusting the thrust and angle of the lander during descent to ensure a gentle landing on the pad.
-                - To make the exploration, the player can try different combinations of thrust and angle adjustments during descent and observe the effect on the lander's movement and performance score.
-                2. Exploitation:
-                - The player should improve the policy by taking into account the contact with the ground and adjusting the thrust accordingly to ensure a gentle landing.
-                - Additionally, the policy should also take into account the angle and rotation of the lander to ensure that it lands upright on the pad.
-                - To improve the policy, the player can use the information obtained from the previous episodes and adjust the thrust and angle accordingly during descent.
-                3. Trade-off:
-                - The player should focus more on exploitation in the next episode as they have already explored different thrust and angle adjustments in the previous episodes.
-                - However, the player should still allocate some time for exploration to fine-tune the policy and ensure a successful landing on the pad.
-                - A good trade-off would be to allocate seventy percent of the time for exploitation and thirty percent of the time for exploration.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: The lander is at position (-0.01, 1.39), the horizontal speed of movement is -0.65, the vertical velocity speed of movement is -0.41. The angle is 0.01 radians, and it's rotating at 0.13 radians per second. The left leg is not in contact with ground. The right leg is not in contact with ground.
-                Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
-                Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The insights of the game are listed below: The key information that can be exploited to improve the player's performance includes taking into account the lander's movement and adjusting the thrust accordingly to stabilize its descent, considering the angle and rotation of the lander to ensure a gentle landing, and fine-tuning the policy to optimize the thrust and angle adjustments for a smoother landing. Additionally, the player should avoid constantly applying thrust to the lander as it may not be effective in successfully landing the lander on the pad.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
-                    """
-            },
-            {
-                "question":
-                    """
-                State description: The lander is at position (0.31, 0.04), the horizontal speed of movement is -0.21, the vertical velocity speed of movement is -0.09. The angle is 0.24 radians, and it's rotating at 0.17 radians per second. The left leg is not in contact with ground. The right leg is in contact with ground.
-                Goal description: The goal is to successfully land the lander on the landing pad which is at position (0, 0) with a vertical velocity close to 0, and make sure all two legs are up and the lander is balanced.
-                Action description: Please choose an action. Type '1' to do noting, '2' to fire left engine and make lander move to right, '3' to fire main engine and make lander move to up, or '4' to fire right engine and make lander move to left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The insights of the game are listed below: The key information that can be exploited to improve the player's performance includes taking into account the lander's movement and adjusting the thrust accordingly to stabilize its descent, considering the angle and rotation of the lander to ensure a gentle landing, and adjusting the thrust to ensure a gentle landing when the lander makes contact with the ground. Additionally, fine-tuning the policy to optimize the thrust and angle adjustments for a smoother landing can further improve performance.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to fire main engine (action 3) to reduce the descent velocity of the spacecraft. Therefore, the optimal action to take now is to fire main engine (action 3).
-                    """
-            },
-        ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                     """
             },
         ]

prompts/task_relevant/classic_control/acrobot.py CHANGED Viewed

@@ -362,7 +362,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -420,92 +420,4 @@ class EGG:
                 Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
                     """
             },
-        ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: Link1: angle theta1 -0.04 radians, rotating 0.02 radians per second clockwise. Link2: angle theta2 0.02 radians relative to Link1, rotating 0.07 radians per second counterclockwise.
-                Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
-                The suggestions are listed below:
-                1. Exploration:
-                - The player should explore different torque values and directions to find the optimal policy.
-                - To make the exploration, the player can randomly choose torque values and directions for a certain number of steps, then evaluate the performance and adjust the policy accordingly.
-                2. Exploitation:
-                - The player should adjust the torque based on the current state of the system, such as the angles and angular velocities of the links.
-                - The player should also try to apply torque in a way that helps swing the free end of the linear chain above the target height.
-                - To improve the policy, the player can use techniques such as reinforcement learning or model-based control.
-                3. Weighting:
-                - The player should focus more on exploration in the beginning of the episode to find the optimal policy.
-                - As the episode progresses, the player should shift the focus towards exploitation to gain a higher performance.
-                - The player should also adjust the weighting based on the performance and adjust the policy accordingly.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to apply -1 torque (Action 1). Therefore, the optimal action to take now is to apply -1 torque (Action 1).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: Link1: angle theta1 0.31 radians, rotating 0.76 radians per second clockwise. Link2: angle theta2 -0.73 radians relative to Link1, rotating 0.98 radians per second counterclockwise.
-                Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
-                The suggestions are listed below:
-                1. Exploration:
-                - The player should explore different torque values and directions, rather than sticking to a consistent pattern.
-                - The player should also adjust the torque based on the current state of the system, such as the angles and angular velocities of the links.
-                - To make the exploration, the player can randomly choose torque values and directions, or try to systematically vary the torque values and directions to cover a wider range of possibilities.
-                2. Exploitation:
-                - The player should use the information obtained from exploration to inform a more adaptive and effective policy.
-                - The player can use techniques such as reinforcement learning or model-based control to improve the policy.
-                - The policy should take into account the current state of the system, such as the angles and angular velocities of the links, to adjust the torque values and directions accordingly.
-                3. Weighting for exploration and exploitation:
-                - The player should balance exploration and exploitation to find the optimal policy.
-                - In the beginning of the episode, the player should focus more on exploration to gather information about the system and find a wider range of possible solutions.
-                - As the episode progresses, the player should shift the focus towards exploitation to improve the policy and achieve a higher performance.
-                - The weighting can be adjusted
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: Link1: angle theta1 -0.04 radians, rotating 0.02 radians per second clockwise. Link2: angle theta2 0.02 radians relative to Link1, rotating 0.07 radians per second counterclockwise.
-                Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
-                The insights of the game are listed below: The key information that can be exploited to improve the player's performance is that the current policy of consistently applying torque in one direction is not effective in achieving the goal of swinging the free end of the linear chain above the target height. The player may benefit from exploring different torque values and directions and adjusting its torque based on the current state of the system, such as the angles and angular velocities of the links. This information can be used to inform a more adaptive and effective policy, potentially using techniques such as reinforcement learning or model-based control.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to apply -1 torque (Action 1). Therefore, the optimal action to take now is to apply -1 torque (Action 1).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: Link1: angle theta1 0.31 radians, rotating 0.76 radians per second clockwise. Link2: angle theta2 -0.73 radians relative to Link1, rotating 0.98 radians per second counterclockwise.
-                Goal description: The goal is to apply torque on the actuator to swing the free end of the linear chain above the target height, which is constructed as: -cos(theta1) - cos(theta2 + theta1) > 1.0.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to apply -1 torque, '2' to apply 0 torque, or '3' to apply 1 torque. Ensure you provide the action number from the valid action list, i.e., [1, 2, 3].
-                The insights of the game are listed below: The key information that can be exploited to improve the performance of the player is that the current policy of consistently applying torque in one direction is not effective in achieving the goal of swinging the free end of the linear chain above the target height. The player may benefit from exploring different torque values and directions and adjusting its torque based on the current state of the system, such as the angles and angular velocities of the links. This information can be used to inform a more adaptive and effective policy, potentially using techniques such as reinforcement learning or model-based control.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
-                    """
-            },
         ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                 Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to apply 1 torque (Action 3). Therefore, the optimal action to take now is to apply 1 torque (Action 3).
                     """
             },
         ]

prompts/task_relevant/classic_control/cartpole.py CHANGED Viewed

@@ -364,7 +364,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -401,69 +401,3 @@ class EGG:
                     """
             },
         ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: The current state is: The cart is positioned at -0.053, with a velocity of 0.01 towards the right. The pole is tilted at 0.05 radians, rotating at 0.11 radians per second towards the right.
-                Goal description: The goal is to keep the pole balanced upright for as long as possible.
-                Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
-                The suggestions are listed below:Suggestion for the next episode:
-                1. Exploration: The player should explore different strategies for recovering from large tilts of the pole. This can be done by trying out different actions when the pole is tilted beyond a certain angle, such as moving the cart in the opposite direction or applying a stronger force. The player can also try to observe the behavior of the pole and cart in different situations to gain a better understanding of the dynamics of the game.
-                2. Exploitation: To improve performance, the player can adjust the policy to take into account the increasing velocity of the cart towards the end of the game. This can be done by increasing the sensitivity of the policy to changes in the velocity of the cart, or by using a different algorithm that is better suited to handling non-stationary environments. Additionally, the player can incorporate a mechanism for learning from past mistakes, such as experience replay, to help the policy recover from large tilts of the pole.
-                3. Weighting: The player should focus more on exploitation than exploration in the next episode, as they have already explored different strategies in the previous episodes. However, they should still allocate some time for exploration to gain a better understanding of the dynamics of the game and to try out new strategies for recovering from large til
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
-                    """
-            },{
-                "question":
-                    """
-                State description: The current state is: The cart is positioned at 0.033, with a velocity of 0.05 towards the left. The pole is tilted at 0.02 radians, rotating at 0.03 radians per second towards the right.
-                Goal description: The goal is to keep the pole balanced upright for as long as possible.
-                Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
-                The suggestions are listed below:
-                1. For exploration, the player should try to experiment with different action sequences to see if there are any patterns that lead to longer pole balancing times. One way to do this is to randomly select actions for a certain number of steps before returning to the current policy.
-                2. To improve policy performance, the player can try incorporating a mechanism for learning from past mistakes, such as experience replay. Additionally, the policy can be adjusted to take into account the increasing velocity of the cart by adding a penalty for large changes in cart velocity.
-                3. The player should focus more on exploitation than exploration in the next episode, as they have already tried multiple exploration strategies in previous episodes. However, they should still allocate a small portion of their actions to exploration to avoid getting stuck in a suboptimal policy. A good ratio to start with could be 80% exploitation and 20% exploration.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: The current state is: The cart is positioned at -0.053, with a velocity of 0.01 towards the right. The pole is tilted at 0.05 radians, rotating at 0.11 radians per second towards the right.
-                Goal description: The goal is to keep the pole balanced upright for as long as possible.
-                Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
-                The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the policy alternates between moving the cart left and right based on the pole's tilt, but struggles to recover from large tilts of the pole. Additionally, the velocity of the cart increases rapidly towards the end of the game, indicating that the policy may not be able to keep up with the increasing difficulty of the game. To improve performance, the policy can be adjusted to take into account the increasing velocity of the cart or a different algorithm can be used. Incorporating a mechanism for learning from past mistakes, such as experience replay, may also be beneficial.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
-                    """
-            },{
-                "question":
-                    """
-                State description: The current state is: The cart is positioned at 0.033, with a velocity of 0.05 towards the left. The pole is tilted at 0.02 radians, rotating at 0.03 radians per second towards the right.
-                Goal description: The goal is to keep the pole balanced upright for as long as possible.
-                Action description: Valid action list: [1, 2]. Action 1 means to push the cart to the left, and Action 2 means push the cart to the right.
-                The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the policy alternates between moving the cart left and right based on the pole's tilt, but struggles to recover from large tilts of the pole. Additionally, the velocity of the cart increases rapidly towards the end of the game, which may indicate that the policy is not able to keep up with the increasing difficulty of the game. To improve performance, the policy can be adjusted to take into account the increasing velocity of the cart or a different algorithm can be used. Incorporating a mechanism for learning from past mistakes, such as experience replay, may also be beneficial.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to push the cart to the right (action 2) to maintain balance of the pole. Therefore, the optimal action to take now is to push the cart to the right (action 2).
-                    """
-            },
-        ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                     """
             },
         ]

prompts/task_relevant/classic_control/mountaincar.py CHANGED Viewed

@@ -336,7 +336,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -378,74 +378,3 @@ class EGG:
                     """
             },
         ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
-                The suggestions are listed below:Suggestion for the next episode:
-                1. Exploration: In the next episode, the player should focus on exploring the environment more thoroughly by trying out different actions. Specifically, the player should try accelerating in both directions (left and right) to see if there are any hidden paths that can lead to the goal state more quickly. To encourage exploration, the player can add some randomness to the policy by choosing actions randomly with a certain probability (e.g., 10% of the time).
-                2. Exploitation: To improve performance, the player should continue to use the policy that has been successful in the past episodes, which is to constantly accelerate in the direction of the goal, with occasional changes in direction to avoid getting stuck in local minima. However, the player should also try to optimize the policy by adjusting the frequency of changes in direction based on the car's velocity and position. For example, if the car is moving slowly and is far from the goal, the player should consider changing direction more frequently to explore more of the environment.
-                3. Exploration vs. Exploitation: The player should balance exploration and exploitation by adjusting the probability of choosing actions randomly. In the beginning of the episode, the player should focus more on exploration by choosing actions randomly with a higher probability
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the right (action 3). Therefore, the optimal action to take now is to push the cart to accelerate to the right (action 3).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
-                The suggestions are listed below:
-                1. The player should explore different acceleration strategies, such as accelerating in the opposite direction of the goal or accelerating randomly, to gather information on the environment and avoid getting stuck in local minima. This can be done by adding some randomness to the policy or using a different algorithm that balances exploration and exploitation.
-                2. To improve policy performance, the player can use the information gathered from exploration to update the value function and adjust the policy accordingly. Additionally, the player can consider using a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, to better capture the dynamics of the environment.
-                3. The player should balance exploration and exploitation by allocating a certain percentage of actions to exploration and the remaining percentage to exploitation. This can be done by setting a threshold for the amount of exploration and adjusting it based on the performance of the policy. The player should also monitor the performance of the policy and adjust the balance between exploration and exploitation accordingly.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the left (action 1) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (action 1).
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
-                The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the car often gets stuck in local minima, the policy of constantly accelerating in the direction of the goal with occasional changes in direction, and the use of exploration techniques to encourage the car to try out different actions and explore the environment more thoroughly. Additionally, a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, and a different algorithm that balances exploration and exploitation more effectively can also improve performance.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the right (action 3). Therefore, the optimal action to take now is to push the cart to accelerate to the right (action 3).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move:Please choose an action. Type '1' to accelerate to the left, '2' to not accelerate, or '3' to accelerate to the right.Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3].
-                The insights of the game are listed below:
-                1. The policy of constantly accelerating towards the goal with occasional changes in direction to avoid local minima is not performing well.
-                2. Exploration techniques can be used to encourage the car to try out different actions and explore the environment more thoroughly.
-                3. A more sophisticated memory system that takes into account the car's velocity and position can improve the accuracy of the value function.
-                4. A different algorithm that balances exploration and exploitation more effectively can also be considered.
-                5. The car is able to gain enough momentum to climb the hill towards the goal, but still gets stuck in local minima at times.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the left (action 1) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (action 1).
-                    """
-            },
-        ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                     """
             },
         ]

prompts/task_relevant/classic_control/mountaincarContinuous.py CHANGED Viewed

@@ -355,7 +355,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -397,74 +397,3 @@ class EGG:
                     """
             },
         ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
-                The suggestions are listed below:Suggestion for the next episode:
-                1. Exploration: In the next episode, the player should focus on exploring the environment more thoroughly by trying out different actions. Specifically, the player should try accelerating in both directions (left and right) to see if there are any hidden paths that can lead to the goal state more quickly. To encourage exploration, the player can add some randomness to the policy by choosing actions randomly with a certain probability (e.g., 10% of the time).
-                2. Exploitation: To improve performance, the player should continue to use the policy that has been successful in the past episodes, which is to constantly accelerate in the direction of the goal, with occasional changes in direction to avoid getting stuck in local minima. However, the player should also try to optimize the policy by adjusting the frequency of changes in direction based on the car's velocity and position. For example, if the car is moving slowly and is far from the goal, the player should consider changing direction more frequently to explore more of the environment.
-                3. Exploration vs. Exploitation: The player should balance exploration and exploitation by adjusting the probability of choosing actions randomly. In the beginning of the episode, the player should focus more on exploration by choosing actions randomly with a higher probability
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the right (Action 1.0). Therefore, the optimal action to take now is to push the cart to accelerate to the right (Action 1.0).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
-                The suggestions are listed below:
-                1. The player should explore different acceleration strategies, such as accelerating in the opposite direction of the goal or accelerating randomly, to gather information on the environment and avoid getting stuck in local minima. This can be done by adding some randomness to the policy or using a different algorithm that balances exploration and exploitation.
-                2. To improve policy performance, the player can use the information gathered from exploration to update the value function and adjust the policy accordingly. Additionally, the player can consider using a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, to better capture the dynamics of the environment.
-                3. The player should balance exploration and exploitation by allocating a certain percentage of actions to exploration and the remaining percentage to exploitation. This can be done by setting a threshold for the amount of exploration and adjusting it based on the performance of the policy. The player should also monitor the performance of the policy and adjust the balance between exploration and exploitation accordingly.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to accelerate to the left (Action -1.0) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (Action -1.0).
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at 0.472, with a velocity of 0.049 towards the right.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
-                The insights of the game are listed below: The key information that can be exploited to improve performance of the player includes the fact that the car often gets stuck in local minima, the policy of constantly accelerating in the direction of the goal with occasional changes in direction, and the use of exploration techniques to encourage the car to try out different actions and explore the environment more thoroughly. Additionally, a more sophisticated memory system that takes into account the car's velocity and position, rather than just its position, and a different algorithm that balances exploration and exploitation more effectively can also improve performance.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the right (Action 1.0). Therefore, the optimal action to take now is to push the cart to accelerate to the right (Action 1.0).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The car is positioned at -0.218, with a velocity of 0.002 towards the left.
-                Goal description: The goal is to reach the flag placed on top of the right hill as quickly as possible.
-                Action description: Your Next Move: Please select a numerical value within the range of [-1,1], which represents the directional force being applied to the car. The action will be limited to the range of [-1,1], and then multiplied by a power of 0.0015..
-                The insights of the game are listed below:
-                1. The policy of constantly accelerating towards the goal with occasional changes in direction to avoid local minima is not performing well.
-                2. Exploration techniques can be used to encourage the car to try out different actions and explore the environment more thoroughly.
-                3. A more sophisticated memory system that takes into account the car's velocity and position can improve the accuracy of the value function.
-                4. A different algorithm that balances exploration and exploitation more effectively can also be considered.
-                5. The car is able to gain enough momentum to climb the hill towards the goal, but still gets stuck in local minima at times.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to accelerate to the left (Action -1.0) to obtain potential energy of rightward motion. Therefore, the optimal action to take now is to accelerate to the left (Action -1.0).
-                    """
-            },
-        ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                     """
             },
         ]

prompts/task_relevant/toy_text/blackjack.py CHANGED Viewed

@@ -374,7 +374,7 @@ class REFLEXION:
             }
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -411,69 +411,3 @@ class EGG:
                     """Based on the current game state, the insights and the suggestions, The optimal action is: 1."""
             }
         ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The player's current sum is 6, the dealer is showing 7, and the player has a usable ace: no.
-                Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
-                The suggestions are listed below:
-                1. For exploration in the next episode, the player should try to observe the dealer's face-up card and use that information to inform their decision-making process. They can do this by hitting until they have a hand value of at least 12, and then observing the dealer's card before deciding whether to hit or stick. This will help the player make more informed decisions and potentially increase their chances of winning.
-                2. To improve the policy and gain a higher performance in the next episode, the player should focus on using their usable ace to their advantage. They should aim to use the ace as 11 when their hand value is low (e.g. less than 12) and as 1 when their hand value is high (e.g. more than 17). This will help the player avoid busting and increase their chances of getting closer to 21.
-                3. The player should weight exploration and exploitation equally in the next episode. While it is important to explore and gather new information, it is also important to use that information to improve performance. By balancing exploration and exploitation, the player can make more informed decisions and potentially increase their chances of winning.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the player should hit (add a card) as their current sum is very low and they have no usable ace to use to their advantage.
-                The goal is to get closer to 21 without going over, and hitting will increase the player's chances of achieving this goal.
-                Therefore, the optimal action for the player is to choose action number 2 to hit.
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The player's current sum is 20, the dealer is showing 4, and the player has a usable ace: no.
-                Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
-                The suggestions are listed below:
-                1. Exploration: The player should explore the value of their usable ace in different scenarios. They can try using the ace as 1 or 11 and observe the outcome. To make the exploration, the player can randomly choose to use the ace as 1 or 11 in different game states and record the results.
-                2. Exploitation: The player should improve their policy by considering the dealer's face-up card and the likelihood of the dealer having a higher hand. They can also use the information from the previous trajectories to make more informed decisions. For example, if the player has a low hand value and the dealer has a high face-up card, the player may choose to hit to improve their hand. On the other hand, if the player has a high hand value and the dealer has a low face-up card, the player may choose to stick to avoid going over 21.
-                3. Weighting: The player should balance exploration and exploitation by allocating more time to exploitation. Since the player only has two episodes left, they should focus on exploiting the information they have gathered so far to increase their chances of winning. However, they should also continue to explore the value of their usable ace in different scenarios to potentially improve their policy in future games.
-                    """,
-                "answer":
-                    """Based on the current game state and the suggestions, The optimal action is: 1."""
-            }
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The player's current sum is 6, the dealer is showing 7, and the player has a usable ace: no.
-                Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
-                The insights of the game are listed below: The key information that can be exploited to improve the performance of the player is the presence of a usable ace in their hand. This information can be used to inform the player's decision-making process and potentially improve their performance by allowing them to make more informed decisions about whether to hit or stick based on the value of their hand. By taking into account the value of the ace, the player can potentially increase their chances of winning the hand.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the player should hit (add a card) as their current sum is very low and they have no usable ace to use to their advantage.
-                The goal is to get closer to 21 without going over, and hitting will increase the player's chances of achieving this goal.
-                Therefore, the optimal action for the player is to choose action number 2 to hit.
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The player's current sum is 20, the dealer is showing 4, and the player has a usable ace: no.
-                Goal description: The goal is to beat the dealer by obtaining cards that sum to closer to 21, without going over 21.
-                Action description: Your Next Move: \n Please choose an action. Type '1' to stick (stop receiving cards) or '2' to hit (add a card). Ensure you only provide the action number from the valid action list, i.e., [1, 2].
-                The insights of the game are listed below: The key information that can be exploited to improve the performance of the player includes the presence of a usable ace, the player's current hand sum, the dealer's face-up card, and the likelihood of the dealer having a higher hand. By taking into account these factors, the player can make more informed decisions on whether to hit or stick, potentially increasing their chances of winning.
-                    """,
-                "answer":
-                    """Based on the current game state and the insights, The optimal action is: 1."""
-            }
-        ]

             }
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                     """Based on the current game state, the insights and the suggestions, The optimal action is: 1."""
             }
         ]

prompts/task_relevant/toy_text/cliffwalking.py CHANGED Viewed

@@ -365,7 +365,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -408,77 +408,3 @@ class EGG:
             },
         ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The player is at location [2, 11] in the grid world.
-                Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
-                Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The suggestions are listed below:Suggestion for the next episode:
-                1. Exploration: The player should explore different paths towards the goal location, rather than always moving in a straight line. This can be done by randomly selecting actions with a certain probability, such as 10%, to encourage the player to try different paths. The player should also try to gather information about the rewards and penalties associated with different actions, especially those near the cliff.
-                2. Exploitation: The player should prioritize avoiding the cliff over reaching the goal quickly. This can be done by updating the policy to include a penalty for moving towards the cliff or a reward for moving away from it. The player should also try to incorporate a more cautious approach that considers the potential consequences of each action.
-                3. Weighting: The player should balance exploration and exploitation by assigning a higher weight to exploitation. This is because the player has already tried two episodes and needs to focus on improving performance. However, some level of exploration is still necessary to avoid getting stuck in a suboptimal policy. A reasonable weighting could be 70% exploitation and 30% exploration.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to move down (action 3) to reach the goal location at [3, 11]. Therefore, the optimal action to take now is to move down (Action 3).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The player is at location [3,0] in the grid world.
-                Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
-                Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The suggestions are listed below:
-                1. The player should explore different paths towards the goal location, especially those that are not in a straight line or zigzag pattern. This can be done by randomly selecting actions or by using a more exploratory policy, such as epsilon-greedy or softmax. The player should also pay attention to the rewards and penalties associated with each action, especially those related to cliff avoidance.
-                2. To improve performance, the policy should prioritize both reaching the goal location and avoiding the cliff. This can be done by incorporating a penalty for moving towards the cliff or a reward for moving away from it. The policy should also encourage diagonal movements towards the goal location, which would reduce the number of steps needed to reach the goal. Additionally, the policy should include a penalty for falling off the cliff, which would further incentivize the player to avoid it.
-                3. The player should balance exploration and exploitation by using a more exploratory policy, such as epsilon-greedy or softmax, with a low exploration rate. This would allow the player to explore different paths towards the goal location while still prioritizing cliff avoidance and efficient movements. The player should also periodically evaluate the policy and adjust the exploration rate accordingly.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, it seems like the player is stuck in a loop and not making any progress towards the goal location.
-                To improve performance, the player should prioritize both reaching the goal location and avoiding the cliff.
-                It is recommended to incorporate a penalty for moving towards the cliff or a reward for moving away from it.
-                The policy should also encourage diagonal movements towards the goal location, which would reduce the number of steps needed to reach the goal.
-                For the next action, I suggest the player to move up (action 1) to explore a different path towards the goal location.
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The player is at location [2, 11] in the grid world.
-                Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
-                Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The insights of the game are listed below: The key information that can be exploited to improve the player's performance is that they tend to move in a straight line towards the goal location, even if it means getting closer to the cliff. This behavior can be addressed by incorporating a more cautious approach that prioritizes avoiding the cliff over reaching the goal quickly. Additionally, the policy could be updated to encourage exploration of different paths towards the goal location, rather than always moving in a straight line. The negative consequences of not prioritizing cliff avoidance are highlighted in the player's poor performance and multiple penalties.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to move down (action 3) to reach the goal location at [3, 11]. Therefore, the optimal action to take now is to move down (Action 3).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: The player is at location [3,0] in the grid world.
-                Goal description: The goal is to navigate from the starting point to an target which locate at (3,11), avoiding the cliff, in as few steps as possible.
-                Action description: Your Next Move:\nPlease choose an action. Type '1' to move up, '2' to move right, '3' to move down, or '4' to move left. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The insights of the game are listed below: The key information that can be exploited to improve performance of the player is that they tend to move in a straight line or zigzag pattern towards the goal location, even if it means getting closer to the cliff. This behavior can be improved by incorporating a more cautious approach that prioritizes avoiding the cliff over reaching the goal quickly, and by encouraging exploration of different paths towards the goal location. Additionally, the policy could be updated to include a penalty for falling off the cliff, which would further incentivize the player to avoid it.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, it seems like the player is stuck in a loop and not making any progress towards the goal location.
-                To improve performance, the player should prioritize both reaching the goal location and avoiding the cliff.
-                It is recommended to incorporate a penalty for moving towards the cliff or a reward for moving away from it.
-                The policy should also encourage diagonal movements towards the goal location, which would reduce the number of steps needed to reach the goal.
-                For the next action, I suggest the player to move up (action 1) to explore a different path towards the goal location.
-                    """
-            },
-        ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
             },
         ]

prompts/task_relevant/toy_text/frozenlake.py CHANGED Viewed

@@ -446,7 +446,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -472,52 +472,3 @@ class EGG:
                     """
             },
         ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The current position of the player is at row 2, column 0.
-                Goal description: The goal is to navigate across the frozen lake and reach the goal position located at (3,3) without falling into any holes, which are located at (1,1), (1,3), (2,3) and (0,3).
-                Action description: Your Next Move:
-                Please choose an action. The possible actions are:
-                '1': Move left (Decrease the horizontal coordinate by 1)
-                '2': Move down (Increase the vertical coordinate by 1)
-                '3': Move right (Increase the horizontal coordinate by 1)
-                '4': Move up (Decrease the vertical coordinate by 1). Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The suggestions are listed below:Suggestion for the next episode:
-                1. Exploration: The player should explore the environment to understand the slippery nature of the frozen lake. The player can do this by taking random actions and observing the resulting movements. This will help the player to develop a better understanding of the environment and adjust its actions accordingly.
-                2. Exploitation: The player should use a Q-learning algorithm to improve its policy. The Q-learning algorithm will help the player to learn from its experiences and develop a better policy over time. The player should also consider the slippery nature of the frozen lake when updating its Q-values.
-                3. Weighting: The player should balance exploration and exploitation by using an epsilon-greedy policy. The player should set a high value of epsilon initially to encourage exploration and gradually decrease it over time to encourage exploitation. This will help the player to explore the environment initially and then exploit its knowledge to gain a higher performance.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to move right (action 3) to navigate around the holes and reach the goal position. Therefore, the optimal action to take now is to move right (action 3).
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: The current position of the player is at row 2, column 0.
-                Goal description: The goal is to navigate across the frozen lake and reach the goal position located at (3,3) without falling into any holes, which are located at (1,1), (1,3), (2,3) and (0,3).
-                Action description: Your Next Move:
-                Please choose an action. The possible actions are:
-                '1': Move left (Decrease the horizontal coordinate by 1)
-                '2': Move down (Increase the vertical coordinate by 1)
-                '3': Move right (Increase the horizontal coordinate by 1)
-                '4': Move up (Decrease the vertical coordinate by 1). Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4].
-                The insights of the game are listed below: The key information that can be exploited to improve the performance of the player includes the fact that the current policy is random and ineffective, the player is not learning from its experiences, and the slippery nature of the frozen lake is causing unintended movements. To improve performance, the player needs to incorporate a learning algorithm to improve its policy over time and develop a strategy that takes into account the slippery nature of the ice.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to move right (action 3) to navigate around the holes and reach the goal position. Therefore, the optimal action to take now is to move right (action 3).
-                    """
-            },
-        ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                     """
             },
         ]

prompts/task_relevant/toy_text/taxi.py CHANGED Viewed

@@ -481,7 +481,7 @@ class REFLEXION:
             },
         ]
-class EGG:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
@@ -528,81 +528,4 @@ class EGG:
                 Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
                     """
             },
-        ]
-class EGGWithoutInsights:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: Taxi is at Row 0, Col 1. The passenger is at the Green location. The passenger wants to go to the Blue location.
-                Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
-                Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
-                The suggestions are listed below:
-                1. The player should explore the environment by trying out different routes to reach the pick-up and drop-off locations. They should also try to identify any patterns in the locations of the pick-up and drop-off points, and use this information to plan their route. The exploration can be done by randomly selecting actions and observing the rewards obtained.
-                2. To improve the policy, the player should use a Q-learning algorithm to learn the optimal policy. They should use the information obtained through exploration to update the Q-values for each state-action pair. The player should also incorporate a penalty for repeating the same action multiple times in a row and a heuristic that encourages the taxi to move towards the passenger or destination.
-                3. The player should weight the exploration and exploitation differently by using an epsilon-greedy policy. They should set a high value of epsilon initially to encourage exploration and gradually decrease it over time to encourage exploitation. The player should also monitor the performance of the policy and adjust the exploration-exploitation trade-off accordingly.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to move down (action 1) to navigate around the obstacles and pick up the passenager. Therefore, the optimal action to take now is to move down (Action 1).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: Taxi is at Row 4, Col 4. The passenger is at the Red location. The passenger wants to go to the Green location.
-                Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
-                Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
-                The suggestions are listed below:Suggestion for the next episode:
-                1. Exploration: The player should explore the shortest path to reach the pick-up and drop-off points. This can be done by using a heuristic algorithm such as A* search to find the shortest path. The player should also explore the penalty for repeating the same action multiple times in a row to avoid getting stuck in a loop.
-                2. Exploitation: The player should use a Q-learning algorithm to improve the policy. The Q-learning algorithm should take into account the shortest path to reach the pick-up and drop-off points, as well as the penalty for repeating the same action multiple times in a row. The player should also incorporate a heuristic that encourages the taxi to move towards the passenger or destination.
-                3. Weighting: The player should focus more on exploitation than exploration in this episode, as they have already explored the random policy in the previous episodes. The player should use the information obtained from the exploration to improve the policy and gain a higher performance. However, the player should still explore the penalty for repeating the same action multiple times in a row to avoid getting stuck in a loop.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the suggestions, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
-                    """
-            },
-        ]
-class EGGWithoutSuggestions:
-    def __init__(self):
-        self.PERCEPTRON_BASIC_FS_EXAMPLES = [
-            {
-                "question":
-                    """
-                State description: Current Game State: Taxi is at Row 0, Col 1. The passenger is at the Green location. The passenger wants to go to the Blue location.
-                Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
-                Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
-                The insights of the game are listed below:
-                1. The current policy is random and not using any information about the locations of the pick-up and drop-off points or the shortest path to reach them.
-                2. The taxi is often stuck in a loop of repeatedly taking the same action, which could be addressed by implementing a penalty for repeating the same action multiple times in a row.
-                3. The taxi often moves in the opposite direction of the passenger or destination, which could be addressed by incorporating a heuristic that encourages the taxi to move towards the passenger or destination.
-                4. The performance of the current policy is very poor, as indicated by the negative rewards received in each episode.
-                5. A more informed policy, such as a Q-learning algorithm, could be used to improve performance.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to move down (action 1) to navigate around the obstacles and pick up the passenager. Therefore, the optimal action to take now is to move down (Action 1).
-                    """
-            },{
-                "question":
-                    """
-                State description: Current Game State: Taxi is at Row 4, Col 4. The passenger is at the Red location. The passenger wants to go to the Green location.
-                Goal description: The goal is to navigate the taxi to the passenger, pick them up, and drop them off at their destination as efficiently as possible.
-                Action description: Your Next Move: Please choose an action. Type '1' to move south (down), '2' to move north (up), '3' to move east (right), '4' to move west (left), '5' to pick up the passenger or '6' to drop off the passenger. Ensure you only provide the action number from the valid action list, i.e., [1, 2, 3, 4, 5, 6].
-                The insights of the game are listed below:
-                1. The current policy is random and not using any information about the locations of the pick-up and drop-off points or the shortest path to reach them.
-                2. The taxi is often stuck in a loop of repeating the same action multiple times in a row.
-                3. The taxi often moves in the opposite direction of the passenger or destination.
-                4. A more informed policy, such as a Q-learning algorithm, could be used to improve performance.
-                5. Implementing a penalty for repeating the same action multiple times in a row could address the issue of being stuck in a loop.
-                6. Incorporating a heuristic that encourages the taxi to move towards the passenger or destination could address the issue of moving in the opposite direction.
-                    """,
-                "answer":
-                    """
-                Based on the current game state and the insights, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
-                    """
-            },
         ]

             },
         ]
+class EXE:
     def __init__(self):
         self.PERCEPTRON_BASIC_FS_EXAMPLES = [
             {
                 Based on the current game state, the insights and the suggestions, the optimal action for the player to take would be to move left (action 4) to pick up the passenager. Therefore, the optimal action to take now is to move left (action 4).
                     """
             },
         ]

record_reflexion.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+env,level,decider,avg_score
+CartPole-v0,1,expert,200.0
+LunarLander-v2,1,expert,200.0
+Acrobot-v1,1,expert,200.0
+MountainCar-v0,1,expert,200.0
+Blackjack-v1,1,expert,200.0
+Taxi-v3,1,expert,200.0
+CliffWalking-v0,1,expert,200.0
+FrozenLake-v1,1,expert,200.0
+MountainCarContinuous-v0,1,expert,200.0

requirements.txt DELETED Viewed

@@ -1,98 +0,0 @@
-absl-py==1.4.0
-aiohttp==3.8.4
-ale-py==0.8.1
-annotated-types==0.5.0
-appdirs==1.4.4
-beautifulsoup4==4.12.2
-box2d-py==2.3.5
-cachetools==5.3.1
-cchardet==2.1.7
-charset-normalizer==3.1.0
-click==8.1.3
-cloudpickle==2.2.1
-contourpy==1.1.0
-cycler==0.11.0
-cython==3.0.1
-dataclasses-json==0.5.14
-decorator==4.4.2
-docker-pycreds==0.4.0
-fasteners==0.18
-filelock==3.12.2
-fonttools==4.40.0
-fsspec==2023.6.0
-gitdb==4.0.10
-gitpython==3.1.31
-glfw==2.6.2
-google-auth==2.21.0
-google-auth-oauthlib==1.0.0
-greenlet==2.0.2
-grpcio==1.56.0
-gym[box2d]==0.26.2
-gym-notices==0.0.8
-h5py==3.9.0
-huggingface-hub==0.15.1
-imageio==2.31.2
-imageio-ffmpeg==0.4.8
-importlib-metadata==6.6.0
-importlib-resources==5.12.0
-iniconfig==2.0.0
-kiwisolver==1.4.4
-langchain==0.0.284
-langsmith==0.0.33
-llvmlite==0.40.1
-lz4==4.3.2
-markdown==3.4.3
-markupsafe==2.1.1
-marshmallow==3.20.1
-matplotlib==3.7.1
-moviepy==1.0.3
-mujoco==2.2.0
-mujoco-py==2.1.2.14
-multidict==6.0.4
-numba==0.57.1
-numexpr==2.8.5
-numpy==1.24.4
-oauthlib==3.2.2
-openai==0.27.8
-opencv-python==4.8.0.76
-pathtools==0.1.2
-pillow==9.5.0
-pluggy==1.2.0
-proglog==0.1.10
-protobuf==3.19.6
-py==1.11.0
-pyasn1==0.5.0
-pyasn1-modules==0.3.0
-pydantic==2.3.0
-pydantic-core==2.6.3
-pyopengl==3.1.7
-pyparsing==3.0.9
-pytest==7.0.1
-regex==2023.6.3
-requests==2.31.0
-requests-oauthlib==1.3.1
-rsa==4.9
-safetensors==0.3.1
-sentry-sdk==1.26.0
-setproctitle==1.3.2
-smmap==5.0.0
-soupsieve==2.4.1
-sqlalchemy==2.0.20
-swig==4.1.1
-tenacity==8.2.3
-tensorboard==2.14.0
-tensorboard-data-server==0.7.1
-tianshou==0.4.10
-tokenizers==0.13.3
-tqdm==4.65.0
-transformers==4.30.2
-typing==3.7.4.3
-typing-extensions==4.7.1
-typing-inspect==0.9.0
-urllib3
-v==1
-wandb==0.15.4
-werkzeug==2.3.6
-yarl==1.9.2
-zipp==3.15.0
-aquarel==0.0.5

shell/test_acrobot.sh CHANGED Viewed

@@ -3,42 +3,42 @@
 # Naive Actor
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # PAL
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
@@ -49,9 +49,9 @@ python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_tr
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
-# Jarvis
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

 # Naive Actor
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # PAL
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/acrobot"
 python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_blackjack.sh CHANGED Viewed

@@ -3,34 +3,34 @@
 # Naive Actor
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --use_short_mem 1 --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
@@ -41,9 +41,9 @@ python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_ini
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
-# Jarvis
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

 # Naive Actor
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5  --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5  --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5  --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5  --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5  --distiller traj_distiller
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/blackjack"
 python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_cartpole.sh CHANGED Viewed

@@ -3,42 +3,35 @@
 # Naive Actor
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
-# PAL
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
@@ -49,9 +42,9 @@ python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_tran
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
-# Jarvis
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

 # Naive Actor
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/classic_control/few_shot_examples/cartpole"
 python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_cliffwalking.sh CHANGED Viewed

@@ -3,42 +3,42 @@
 # Naive Actor
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # PAL
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
@@ -49,9 +49,9 @@ python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalki
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
-# Jarvis
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

 # Naive Actor
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # PAL
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
 python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/{test_jarvis.sh → test_exe.sh} RENAMED Viewed

@@ -1,55 +1,55 @@
 # Acrobot-v1
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # Blackjack-v1
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 50 --distiller guide_generator
 # CartPole-v0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
 # CliffWalking-v0
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # LunarLander-v2
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
 # MountainCar-v0
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # MountainCarContinuous-v0
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # Taxi-v3
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

 # Acrobot-v1
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
+python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # Blackjack-v1
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 1 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 2 --num_trails 5 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 4 --num_trails 5 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
+python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider exe_actor --prompt_level 5 --num_trails 5 --distiller guide_generator
 # CartPole-v0
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole"
+python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # CliffWalking-v0
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
+python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # LunarLander-v2
+python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator   --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator   --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander"
+python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator   --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator   --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander"
+python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator   --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # MountainCar-v0
+python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
+python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
+python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # MountainCarContinuous-v0
+python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
+python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
+python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
 # Taxi-v3
+python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
+python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
+python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_frozenlake.sh CHANGED Viewed

@@ -3,42 +3,42 @@
 # Naive Actor
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # PAL
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller --use_short_mem 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
@@ -49,9 +49,9 @@ python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_i
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
-# Jarvis
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
-python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider jarvis_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

 # Naive Actor
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider naive_actor --prompt_level 5 --num_trails 1
 # PAL
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider pal_actor --prompt_level 5 --num_trails 1
 # COT
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider cot_actor --prompt_level 5 --num_trails 1
 # self consistency
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider self_consistency_actor --prompt_level 5 --num_trails 1
 # self-ask
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider selfask_actor --prompt_level 5 --num_trails 1
 # SPP
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 2 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 4 --num_trails 1 --distiller traj_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider spp_actor --prompt_level 5 --num_trails 1
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 4 --num_trails 1 --distiller reflect_distiller --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
 python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
+# exe
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/frozenlake"
+python main_reflexion.py --env_name FrozenLake-v1 --init_summarizer frozenlake_init_translator --curr_summarizer frozenlake_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_jarvis_woi.sh DELETED Viewed

@@ -1,55 +0,0 @@
-# Acrobot-v1
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
-# Blackjack-v1
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 50 --distiller guide_generator
-# CartPole-v0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
-# CliffWalking-v0
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
-# LunarLander-v2
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
-# MountainCar-v0
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
-# MountainCarContinuous-v0
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator
-# Taxi-v3
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_woi --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_jarvis_wosh.sh DELETED Viewed

@@ -1,55 +0,0 @@
-# Acrobot-v1
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
-# Blackjack-v1
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 50 --distiller guide_generator
-# CartPole-v0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
-# CliffWalking-v0
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
-# LunarLander-v2
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
-# MountainCar-v0
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
-# MountainCarContinuous-v0
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator
-# Taxi-v3
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosh --prompt_level 5 --num_trails 1 --distiller guide_generator

shell/test_jarvis_wosug.sh DELETED Viewed

@@ -1,55 +0,0 @@
-# Acrobot-v1
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/acrobot"
-python main_reflexion.py --env_name Acrobot-v1 --init_summarizer acrobot_init_translator --curr_summarizer acrobot_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
-# Blackjack-v1
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 50 --distiller guide_generator
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 50 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/blackjack"
-python main_reflexion.py --env_name Blackjack-v1 --init_summarizer blackjack_init_translator --curr_summarizer blackjack_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 50 --distiller guide_generator
-# CartPole-v0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/cartpole" --seed 0
-python main_reflexion.py --env_name CartPole-v0 --init_summarizer cart_init_translator --curr_summarizer cart_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
-# CliffWalking-v0
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/cliffwalking"
-python main_reflexion.py --env_name CliffWalking-v0 --init_summarizer cliffwalking_init_translator --curr_summarizer cliffwalking_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
-# LunarLander-v2
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/box2d/few_shot_examples/lunarlander" --seed 0
-python main_reflexion.py --env_name LunarLander-v2 --init_summarizer lunarLander_init_translator --curr_summarizer lunarLander_basic_translator --seed 0  --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator --seed 0
-# MountainCar-v0
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincar"
-python main_reflexion.py --env_name MountainCar-v0 --init_summarizer mountaincar_init_translator --curr_summarizer mountaincar_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
-# MountainCarContinuous-v0
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/classic_control/few_shot_examples/mountaincarContinuous"
-python main_reflexion.py --env_name MountainCarContinuous-v0 --init_summarizer mountaincarContinuous_init_translator --curr_summarizer mountaincarContinuous_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator
-# Taxi-v3
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 1 --num_trails 1 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 2 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 3 --num_trails 3 --distiller guide_generator
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 4 --num_trails 1 --distiller guide_generator --prompt_path "envs/toy_text/few_shot_examples/taxi"
-python main_reflexion.py --env_name Taxi-v3 --init_summarizer taxi_init_translator --curr_summarizer taxi_basic_translator --decider jarvis_actor_wosug --prompt_level 5 --num_trails 1 --distiller guide_generator