diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1,50 +1,586 @@ -[2024-12-18 23:21:58,688][00179] Saving configuration to /content/train_dir/default_experiment/config.json... -[2024-12-18 23:21:58,692][00179] Rollout worker 0 uses device cpu -[2024-12-18 23:21:58,695][00179] Rollout worker 1 uses device cpu -[2024-12-18 23:21:58,697][00179] Rollout worker 2 uses device cpu -[2024-12-18 23:21:58,699][00179] Rollout worker 3 uses device cpu -[2024-12-18 23:21:58,702][00179] Rollout worker 4 uses device cpu -[2024-12-18 23:21:58,703][00179] Rollout worker 5 uses device cpu -[2024-12-18 23:21:58,704][00179] Rollout worker 6 uses device cpu -[2024-12-18 23:21:58,706][00179] Rollout worker 7 uses device cpu -[2024-12-18 23:21:58,902][00179] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-12-18 23:21:58,904][00179] InferenceWorker_p0-w0: min num requests: 2 -[2024-12-18 23:21:58,950][00179] Starting all processes... -[2024-12-18 23:21:58,952][00179] Starting process learner_proc0 -[2024-12-18 23:21:59,027][00179] Starting all processes... -[2024-12-18 23:21:59,080][00179] Starting process inference_proc0-0 -[2024-12-18 23:21:59,085][00179] Starting process rollout_proc0 -[2024-12-18 23:21:59,092][00179] Starting process rollout_proc1 -[2024-12-18 23:21:59,092][00179] Starting process rollout_proc2 -[2024-12-18 23:21:59,092][00179] Starting process rollout_proc3 -[2024-12-18 23:21:59,092][00179] Starting process rollout_proc4 -[2024-12-18 23:21:59,092][00179] Starting process rollout_proc5 -[2024-12-18 23:21:59,092][00179] Starting process rollout_proc6 -[2024-12-18 23:21:59,092][00179] Starting process rollout_proc7 -[2024-12-18 23:22:17,081][02163] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-12-18 23:22:17,087][02163] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-12-18 23:22:17,149][02163] Num visible devices: 1 -[2024-12-18 23:22:17,173][02182] Worker 4 uses CPU cores [0] -[2024-12-18 23:22:17,188][02163] Starting seed is not provided -[2024-12-18 23:22:17,188][02163] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-12-18 23:22:17,188][02163] Initializing actor-critic model on device cuda:0 -[2024-12-18 23:22:17,189][02163] RunningMeanStd input shape: (3, 72, 128) -[2024-12-18 23:22:17,192][02163] RunningMeanStd input shape: (1,) -[2024-12-18 23:22:17,276][02178] Worker 1 uses CPU cores [1] -[2024-12-18 23:22:17,276][02163] ConvEncoder: input_channels=3 -[2024-12-18 23:22:17,327][02176] Worker 0 uses CPU cores [0] -[2024-12-18 23:22:17,360][02183] Worker 6 uses CPU cores [0] -[2024-12-18 23:22:17,386][02184] Worker 7 uses CPU cores [1] -[2024-12-18 23:22:17,442][02179] Worker 2 uses CPU cores [0] -[2024-12-18 23:22:17,483][02180] Worker 3 uses CPU cores [1] -[2024-12-18 23:22:17,566][02177] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-12-18 23:22:17,566][02177] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-12-18 23:22:17,602][02177] Num visible devices: 1 -[2024-12-18 23:22:17,636][02181] Worker 5 uses CPU cores [1] -[2024-12-18 23:22:17,687][02163] Conv encoder output size: 512 -[2024-12-18 23:22:17,687][02163] Policy head output size: 512 -[2024-12-18 23:22:17,751][02163] Created Actor Critic model with architecture: -[2024-12-18 23:22:17,751][02163] ActorCriticSharedWeights( +[2024-12-19 09:48:11,879][00337] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-12-19 09:48:11,882][00337] Rollout worker 0 uses device cpu +[2024-12-19 09:48:11,885][00337] Rollout worker 1 uses device cpu +[2024-12-19 09:48:11,888][00337] Rollout worker 2 uses device cpu +[2024-12-19 09:48:11,891][00337] Rollout worker 3 uses device cpu +[2024-12-19 09:48:11,893][00337] Rollout worker 4 uses device cpu +[2024-12-19 09:48:11,894][00337] Rollout worker 5 uses device cpu +[2024-12-19 09:48:11,896][00337] Rollout worker 6 uses device cpu +[2024-12-19 09:48:11,897][00337] Rollout worker 7 uses device cpu +[2024-12-19 09:48:12,083][00337] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:48:12,085][00337] InferenceWorker_p0-w0: min num requests: 2 +[2024-12-19 09:48:12,119][00337] Starting all processes... +[2024-12-19 09:48:12,121][00337] Starting process learner_proc0 +[2024-12-19 09:48:12,126][00337] EvtLoop [Runner_EvtLoop, process=main process 337] unhandled exception in slot='_on_start' connected to emitter=Emitter(object_id='Runner_EvtLoop', signal_name='start'), args=() +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 49, in _on_start + self._start_processes() + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 56, in _start_processes + p.start() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 515, in start + self._process.start() + File "/usr/lib/python3.10/multiprocessing/process.py", line 121, in start + self._popen = self._Popen(self) + File "/usr/lib/python3.10/multiprocessing/context.py", line 288, in _Popen + return Popen(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__ + super().__init__(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__ + self._launch(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch + reduction.dump(process_obj, fp) + File "/usr/lib/python3.10/multiprocessing/reduction.py", line 60, in dump + ForkingPickler(file, protocol).dump(obj) +TypeError: cannot pickle 'TLSBuffer' object +[2024-12-19 09:48:12,130][00337] Unhandled exception cannot pickle 'TLSBuffer' object in evt loop Runner_EvtLoop +[2024-12-19 09:48:12,134][00337] Uncaught exception in Runner evt loop +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner.py", line 770, in run + evt_loop_status = self.event_loop.exec() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 403, in exec + raise exc + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 399, in exec + while self._loop_iteration(): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 383, in _loop_iteration + self._process_signal(s) + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 358, in _process_signal + raise exc + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 49, in _on_start + self._start_processes() + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 56, in _start_processes + p.start() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 515, in start + self._process.start() + File "/usr/lib/python3.10/multiprocessing/process.py", line 121, in start + self._popen = self._Popen(self) + File "/usr/lib/python3.10/multiprocessing/context.py", line 288, in _Popen + return Popen(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__ + super().__init__(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__ + self._launch(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch + reduction.dump(process_obj, fp) + File "/usr/lib/python3.10/multiprocessing/reduction.py", line 60, in dump + ForkingPickler(file, protocol).dump(obj) +TypeError: cannot pickle 'TLSBuffer' object +[2024-12-19 09:48:12,135][00337] Runner profile tree view: +main_loop: 0.0165 +[2024-12-19 09:48:12,139][00337] Collected {}, FPS: 0.0 +[2024-12-19 09:56:20,083][00337] Environment doom_basic already registered, overwriting... +[2024-12-19 09:56:20,086][00337] Environment doom_two_colors_easy already registered, overwriting... +[2024-12-19 09:56:20,087][00337] Environment doom_two_colors_hard already registered, overwriting... +[2024-12-19 09:56:20,089][00337] Environment doom_dm already registered, overwriting... +[2024-12-19 09:56:20,091][00337] Environment doom_dwango5 already registered, overwriting... +[2024-12-19 09:56:20,091][00337] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-12-19 09:56:20,094][00337] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-12-19 09:56:20,095][00337] Environment doom_my_way_home already registered, overwriting... +[2024-12-19 09:56:20,096][00337] Environment doom_deadly_corridor already registered, overwriting... +[2024-12-19 09:56:20,097][00337] Environment doom_defend_the_center already registered, overwriting... +[2024-12-19 09:56:20,098][00337] Environment doom_defend_the_line already registered, overwriting... +[2024-12-19 09:56:20,099][00337] Environment doom_health_gathering already registered, overwriting... +[2024-12-19 09:56:20,100][00337] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-12-19 09:56:20,102][00337] Environment doom_battle already registered, overwriting... +[2024-12-19 09:56:20,103][00337] Environment doom_battle2 already registered, overwriting... +[2024-12-19 09:56:20,104][00337] Environment doom_duel_bots already registered, overwriting... +[2024-12-19 09:56:20,105][00337] Environment doom_deathmatch_bots already registered, overwriting... +[2024-12-19 09:56:20,106][00337] Environment doom_duel already registered, overwriting... +[2024-12-19 09:56:20,107][00337] Environment doom_deathmatch_full already registered, overwriting... +[2024-12-19 09:56:20,108][00337] Environment doom_benchmark already registered, overwriting... +[2024-12-19 09:56:20,110][00337] register_encoder_factory: +[2024-12-19 09:56:20,135][00337] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 09:56:20,141][00337] Experiment dir /content/train_dir/default_experiment already exists! +[2024-12-19 09:56:20,143][00337] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-12-19 09:56:20,145][00337] Weights and Biases integration disabled +[2024-12-19 09:56:20,149][00337] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2024-12-19 09:56:22,335][00337] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=10000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=10000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 10000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-12-19 09:56:22,337][00337] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-12-19 09:56:22,340][00337] Rollout worker 0 uses device cpu +[2024-12-19 09:56:22,341][00337] Rollout worker 1 uses device cpu +[2024-12-19 09:56:22,343][00337] Rollout worker 2 uses device cpu +[2024-12-19 09:56:22,345][00337] Rollout worker 3 uses device cpu +[2024-12-19 09:56:22,346][00337] Rollout worker 4 uses device cpu +[2024-12-19 09:56:22,347][00337] Rollout worker 5 uses device cpu +[2024-12-19 09:56:22,348][00337] Rollout worker 6 uses device cpu +[2024-12-19 09:56:22,349][00337] Rollout worker 7 uses device cpu +[2024-12-19 09:56:22,446][00337] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:56:22,447][00337] InferenceWorker_p0-w0: min num requests: 2 +[2024-12-19 09:56:22,486][00337] Starting all processes... +[2024-12-19 09:56:22,487][00337] Starting process learner_proc0 +[2024-12-19 09:56:22,492][00337] EvtLoop [Runner_EvtLoop, process=main process 337] unhandled exception in slot='_on_start' connected to emitter=Emitter(object_id='Runner_EvtLoop', signal_name='start'), args=() +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 49, in _on_start + self._start_processes() + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 56, in _start_processes + p.start() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 515, in start + self._process.start() + File "/usr/lib/python3.10/multiprocessing/process.py", line 121, in start + self._popen = self._Popen(self) + File "/usr/lib/python3.10/multiprocessing/context.py", line 288, in _Popen + return Popen(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__ + super().__init__(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__ + self._launch(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch + reduction.dump(process_obj, fp) + File "/usr/lib/python3.10/multiprocessing/reduction.py", line 60, in dump + ForkingPickler(file, protocol).dump(obj) +TypeError: cannot pickle 'TLSBuffer' object +[2024-12-19 09:56:22,494][00337] Unhandled exception cannot pickle 'TLSBuffer' object in evt loop Runner_EvtLoop +[2024-12-19 09:56:22,496][00337] Uncaught exception in Runner evt loop +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner.py", line 770, in run + evt_loop_status = self.event_loop.exec() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 403, in exec + raise exc + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 399, in exec + while self._loop_iteration(): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 383, in _loop_iteration + self._process_signal(s) + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 358, in _process_signal + raise exc + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 49, in _on_start + self._start_processes() + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 56, in _start_processes + p.start() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 515, in start + self._process.start() + File "/usr/lib/python3.10/multiprocessing/process.py", line 121, in start + self._popen = self._Popen(self) + File "/usr/lib/python3.10/multiprocessing/context.py", line 288, in _Popen + return Popen(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__ + super().__init__(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__ + self._launch(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch + reduction.dump(process_obj, fp) + File "/usr/lib/python3.10/multiprocessing/reduction.py", line 60, in dump + ForkingPickler(file, protocol).dump(obj) +TypeError: cannot pickle 'TLSBuffer' object +[2024-12-19 09:56:22,499][00337] Runner profile tree view: +main_loop: 0.0129 +[2024-12-19 09:56:22,500][00337] Collected {}, FPS: 0.0 +[2024-12-19 09:56:39,683][00337] Environment doom_basic already registered, overwriting... +[2024-12-19 09:56:39,685][00337] Environment doom_two_colors_easy already registered, overwriting... +[2024-12-19 09:56:39,687][00337] Environment doom_two_colors_hard already registered, overwriting... +[2024-12-19 09:56:39,689][00337] Environment doom_dm already registered, overwriting... +[2024-12-19 09:56:39,691][00337] Environment doom_dwango5 already registered, overwriting... +[2024-12-19 09:56:39,692][00337] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-12-19 09:56:39,694][00337] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-12-19 09:56:39,695][00337] Environment doom_my_way_home already registered, overwriting... +[2024-12-19 09:56:39,698][00337] Environment doom_deadly_corridor already registered, overwriting... +[2024-12-19 09:56:39,699][00337] Environment doom_defend_the_center already registered, overwriting... +[2024-12-19 09:56:39,702][00337] Environment doom_defend_the_line already registered, overwriting... +[2024-12-19 09:56:39,703][00337] Environment doom_health_gathering already registered, overwriting... +[2024-12-19 09:56:39,704][00337] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-12-19 09:56:39,707][00337] Environment doom_battle already registered, overwriting... +[2024-12-19 09:56:39,709][00337] Environment doom_battle2 already registered, overwriting... +[2024-12-19 09:56:39,709][00337] Environment doom_duel_bots already registered, overwriting... +[2024-12-19 09:56:39,710][00337] Environment doom_deathmatch_bots already registered, overwriting... +[2024-12-19 09:56:39,713][00337] Environment doom_duel already registered, overwriting... +[2024-12-19 09:56:39,714][00337] Environment doom_deathmatch_full already registered, overwriting... +[2024-12-19 09:56:39,716][00337] Environment doom_benchmark already registered, overwriting... +[2024-12-19 09:56:39,717][00337] register_encoder_factory: +[2024-12-19 09:56:39,733][00337] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 09:56:39,736][00337] Overriding arg 'train_for_env_steps' with value 4000000 passed from command line +[2024-12-19 09:56:39,742][00337] Experiment dir /content/train_dir/default_experiment already exists! +[2024-12-19 09:56:39,743][00337] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-12-19 09:56:39,745][00337] Weights and Biases integration disabled +[2024-12-19 09:56:39,748][00337] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2024-12-19 09:56:41,915][00337] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=4000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=10000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 10000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-12-19 09:56:41,916][00337] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-12-19 09:56:41,920][00337] Rollout worker 0 uses device cpu +[2024-12-19 09:56:41,921][00337] Rollout worker 1 uses device cpu +[2024-12-19 09:56:41,922][00337] Rollout worker 2 uses device cpu +[2024-12-19 09:56:41,924][00337] Rollout worker 3 uses device cpu +[2024-12-19 09:56:41,925][00337] Rollout worker 4 uses device cpu +[2024-12-19 09:56:41,926][00337] Rollout worker 5 uses device cpu +[2024-12-19 09:56:41,928][00337] Rollout worker 6 uses device cpu +[2024-12-19 09:56:41,929][00337] Rollout worker 7 uses device cpu +[2024-12-19 09:56:42,028][00337] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:56:42,030][00337] InferenceWorker_p0-w0: min num requests: 2 +[2024-12-19 09:56:42,064][00337] Starting all processes... +[2024-12-19 09:56:42,066][00337] Starting process learner_proc0 +[2024-12-19 09:56:42,071][00337] EvtLoop [Runner_EvtLoop, process=main process 337] unhandled exception in slot='_on_start' connected to emitter=Emitter(object_id='Runner_EvtLoop', signal_name='start'), args=() +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 49, in _on_start + self._start_processes() + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 56, in _start_processes + p.start() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 515, in start + self._process.start() + File "/usr/lib/python3.10/multiprocessing/process.py", line 121, in start + self._popen = self._Popen(self) + File "/usr/lib/python3.10/multiprocessing/context.py", line 288, in _Popen + return Popen(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__ + super().__init__(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__ + self._launch(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch + reduction.dump(process_obj, fp) + File "/usr/lib/python3.10/multiprocessing/reduction.py", line 60, in dump + ForkingPickler(file, protocol).dump(obj) +TypeError: cannot pickle 'TLSBuffer' object +[2024-12-19 09:56:42,072][00337] Unhandled exception cannot pickle 'TLSBuffer' object in evt loop Runner_EvtLoop +[2024-12-19 09:56:42,074][00337] Uncaught exception in Runner evt loop +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner.py", line 770, in run + evt_loop_status = self.event_loop.exec() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 403, in exec + raise exc + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 399, in exec + while self._loop_iteration(): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 383, in _loop_iteration + self._process_signal(s) + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 358, in _process_signal + raise exc + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 49, in _on_start + self._start_processes() + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/runners/runner_parallel.py", line 56, in _start_processes + p.start() + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 515, in start + self._process.start() + File "/usr/lib/python3.10/multiprocessing/process.py", line 121, in start + self._popen = self._Popen(self) + File "/usr/lib/python3.10/multiprocessing/context.py", line 288, in _Popen + return Popen(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__ + super().__init__(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__ + self._launch(process_obj) + File "/usr/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch + reduction.dump(process_obj, fp) + File "/usr/lib/python3.10/multiprocessing/reduction.py", line 60, in dump + ForkingPickler(file, protocol).dump(obj) +TypeError: cannot pickle 'TLSBuffer' object +[2024-12-19 09:56:42,076][00337] Runner profile tree view: +main_loop: 0.0125 +[2024-12-19 09:56:42,078][00337] Collected {}, FPS: 0.0 +[2024-12-19 09:57:56,175][07135] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-12-19 09:57:56,182][07135] Rollout worker 0 uses device cpu +[2024-12-19 09:57:56,183][07135] Rollout worker 1 uses device cpu +[2024-12-19 09:57:56,188][07135] Rollout worker 2 uses device cpu +[2024-12-19 09:57:56,189][07135] Rollout worker 3 uses device cpu +[2024-12-19 09:57:56,191][07135] Rollout worker 4 uses device cpu +[2024-12-19 09:57:56,194][07135] Rollout worker 5 uses device cpu +[2024-12-19 09:57:56,203][07135] Rollout worker 6 uses device cpu +[2024-12-19 09:57:56,205][07135] Rollout worker 7 uses device cpu +[2024-12-19 09:57:56,382][07135] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:57:56,385][07135] InferenceWorker_p0-w0: min num requests: 2 +[2024-12-19 09:57:56,479][07135] Starting all processes... +[2024-12-19 09:57:56,484][07135] Starting process learner_proc0 +[2024-12-19 09:57:56,620][07135] Starting all processes... +[2024-12-19 09:57:56,651][07135] Starting process inference_proc0-0 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc0 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc1 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc2 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc3 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc4 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc5 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc6 +[2024-12-19 09:57:56,654][07135] Starting process rollout_proc7 +[2024-12-19 09:58:12,921][07461] Worker 0 uses CPU cores [0] +[2024-12-19 09:58:13,085][07469] Worker 3 uses CPU cores [1] +[2024-12-19 09:58:13,097][07462] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:58:13,097][07462] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-12-19 09:58:13,175][07465] Worker 4 uses CPU cores [0] +[2024-12-19 09:58:13,177][07462] Num visible devices: 1 +[2024-12-19 09:58:13,178][07464] Worker 2 uses CPU cores [0] +[2024-12-19 09:58:13,257][07468] Worker 6 uses CPU cores [0] +[2024-12-19 09:58:13,285][07448] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:58:13,285][07448] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-12-19 09:58:13,286][07466] Worker 7 uses CPU cores [1] +[2024-12-19 09:58:13,315][07467] Worker 5 uses CPU cores [1] +[2024-12-19 09:58:13,320][07448] Num visible devices: 1 +[2024-12-19 09:58:13,341][07448] Starting seed is not provided +[2024-12-19 09:58:13,341][07448] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:58:13,341][07448] Initializing actor-critic model on device cuda:0 +[2024-12-19 09:58:13,342][07448] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 09:58:13,343][07463] Worker 1 uses CPU cores [1] +[2024-12-19 09:58:13,345][07448] RunningMeanStd input shape: (1,) +[2024-12-19 09:58:13,360][07448] ConvEncoder: input_channels=3 +[2024-12-19 09:58:13,690][07448] Conv encoder output size: 512 +[2024-12-19 09:58:13,690][07448] Policy head output size: 512 +[2024-12-19 09:58:13,756][07448] Created Actor Critic model with architecture: +[2024-12-19 09:58:13,756][07448] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -85,1613 +621,2537 @@ (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-12-18 23:22:18,041][02163] Using optimizer -[2024-12-18 23:22:18,900][00179] Heartbeat connected on Batcher_0 -[2024-12-18 23:22:18,905][00179] Heartbeat connected on InferenceWorker_p0-w0 -[2024-12-18 23:22:18,914][00179] Heartbeat connected on RolloutWorker_w0 -[2024-12-18 23:22:18,918][00179] Heartbeat connected on RolloutWorker_w1 -[2024-12-18 23:22:18,923][00179] Heartbeat connected on RolloutWorker_w2 -[2024-12-18 23:22:18,928][00179] Heartbeat connected on RolloutWorker_w3 -[2024-12-18 23:22:18,934][00179] Heartbeat connected on RolloutWorker_w4 -[2024-12-18 23:22:18,944][00179] Heartbeat connected on RolloutWorker_w5 -[2024-12-18 23:22:18,946][00179] Heartbeat connected on RolloutWorker_w6 -[2024-12-18 23:22:18,951][00179] Heartbeat connected on RolloutWorker_w7 -[2024-12-18 23:22:21,352][02163] No checkpoints found -[2024-12-18 23:22:21,352][02163] Did not load from checkpoint, starting from scratch! -[2024-12-18 23:22:21,352][02163] Initialized policy 0 weights for model version 0 -[2024-12-18 23:22:21,356][02163] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-12-18 23:22:21,362][02163] LearnerWorker_p0 finished initialization! -[2024-12-18 23:22:21,365][00179] Heartbeat connected on LearnerWorker_p0 -[2024-12-18 23:22:21,450][02177] RunningMeanStd input shape: (3, 72, 128) -[2024-12-18 23:22:21,452][02177] RunningMeanStd input shape: (1,) -[2024-12-18 23:22:21,464][02177] ConvEncoder: input_channels=3 -[2024-12-18 23:22:21,572][02177] Conv encoder output size: 512 -[2024-12-18 23:22:21,572][02177] Policy head output size: 512 -[2024-12-18 23:22:21,623][00179] Inference worker 0-0 is ready! -[2024-12-18 23:22:21,625][00179] All inference workers are ready! Signal rollout workers to start! -[2024-12-18 23:22:21,827][02184] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:21,826][02181] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:21,830][02178] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:21,832][02180] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:21,832][02176] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:21,830][02183] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:21,836][02182] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:21,827][02179] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-18 23:22:22,866][02179] Decorrelating experience for 0 frames... -[2024-12-18 23:22:22,864][02176] Decorrelating experience for 0 frames... -[2024-12-18 23:22:23,223][02180] Decorrelating experience for 0 frames... -[2024-12-18 23:22:23,214][02184] Decorrelating experience for 0 frames... -[2024-12-18 23:22:23,237][02181] Decorrelating experience for 0 frames... -[2024-12-18 23:22:23,787][00179] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-12-18 23:22:23,909][02179] Decorrelating experience for 32 frames... -[2024-12-18 23:22:23,994][02183] Decorrelating experience for 0 frames... -[2024-12-18 23:22:24,613][02182] Decorrelating experience for 0 frames... -[2024-12-18 23:22:25,450][02180] Decorrelating experience for 32 frames... -[2024-12-18 23:22:25,448][02178] Decorrelating experience for 0 frames... -[2024-12-18 23:22:25,586][02183] Decorrelating experience for 32 frames... -[2024-12-18 23:22:25,904][02179] Decorrelating experience for 64 frames... -[2024-12-18 23:22:26,190][02184] Decorrelating experience for 32 frames... -[2024-12-18 23:22:26,353][02181] Decorrelating experience for 32 frames... -[2024-12-18 23:22:27,755][02178] Decorrelating experience for 32 frames... -[2024-12-18 23:22:28,162][02180] Decorrelating experience for 64 frames... -[2024-12-18 23:22:28,458][02184] Decorrelating experience for 64 frames... -[2024-12-18 23:22:28,515][02179] Decorrelating experience for 96 frames... -[2024-12-18 23:22:28,521][02183] Decorrelating experience for 64 frames... -[2024-12-18 23:22:28,553][02176] Decorrelating experience for 32 frames... -[2024-12-18 23:22:28,788][00179] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-12-18 23:22:28,904][02182] Decorrelating experience for 32 frames... -[2024-12-18 23:22:30,374][02181] Decorrelating experience for 64 frames... -[2024-12-18 23:22:30,430][02180] Decorrelating experience for 96 frames... -[2024-12-18 23:22:30,610][02183] Decorrelating experience for 96 frames... -[2024-12-18 23:22:30,889][02176] Decorrelating experience for 64 frames... -[2024-12-18 23:22:31,011][02178] Decorrelating experience for 64 frames... -[2024-12-18 23:22:31,060][02182] Decorrelating experience for 64 frames... -[2024-12-18 23:22:32,032][02184] Decorrelating experience for 96 frames... -[2024-12-18 23:22:32,170][02181] Decorrelating experience for 96 frames... -[2024-12-18 23:22:33,047][02178] Decorrelating experience for 96 frames... -[2024-12-18 23:22:33,589][02176] Decorrelating experience for 96 frames... -[2024-12-18 23:22:33,788][00179] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 153.4. Samples: 1534. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-12-18 23:22:33,791][00179] Avg episode reward: [(0, '2.489')] -[2024-12-18 23:22:33,937][02182] Decorrelating experience for 96 frames... -[2024-12-18 23:22:35,124][02163] Signal inference workers to stop experience collection... -[2024-12-18 23:22:35,137][02177] InferenceWorker_p0-w0: stopping experience collection -[2024-12-18 23:22:37,746][02163] Signal inference workers to resume experience collection... -[2024-12-18 23:22:37,747][02177] InferenceWorker_p0-w0: resuming experience collection -[2024-12-18 23:22:38,788][00179] Fps is (10 sec: 819.2, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 8192. Throughput: 0: 169.1. Samples: 2536. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-12-18 23:22:38,793][00179] Avg episode reward: [(0, '2.707')] -[2024-12-18 23:22:43,788][00179] Fps is (10 sec: 2457.6, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 24576. Throughput: 0: 321.3. Samples: 6426. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:22:43,792][00179] Avg episode reward: [(0, '3.552')] -[2024-12-18 23:22:48,343][02177] Updated weights for policy 0, policy_version 10 (0.0163) -[2024-12-18 23:22:48,788][00179] Fps is (10 sec: 3276.9, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 40960. Throughput: 0: 449.0. Samples: 11224. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:22:48,790][00179] Avg episode reward: [(0, '4.012')] -[2024-12-18 23:22:53,788][00179] Fps is (10 sec: 4096.0, 60 sec: 2184.5, 300 sec: 2184.5). Total num frames: 65536. Throughput: 0: 489.3. Samples: 14678. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:22:53,790][00179] Avg episode reward: [(0, '4.452')] -[2024-12-18 23:22:58,790][00179] Fps is (10 sec: 3685.4, 60 sec: 2223.4, 300 sec: 2223.4). Total num frames: 77824. Throughput: 0: 570.6. Samples: 19974. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:22:58,793][00179] Avg episode reward: [(0, '4.307')] -[2024-12-18 23:22:59,338][02177] Updated weights for policy 0, policy_version 20 (0.0028) -[2024-12-18 23:23:03,790][00179] Fps is (10 sec: 2456.9, 60 sec: 2252.6, 300 sec: 2252.6). Total num frames: 90112. Throughput: 0: 576.5. Samples: 23062. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-12-18 23:23:03,793][00179] Avg episode reward: [(0, '4.218')] -[2024-12-18 23:23:08,788][00179] Fps is (10 sec: 2868.0, 60 sec: 2366.6, 300 sec: 2366.6). Total num frames: 106496. Throughput: 0: 555.8. Samples: 25012. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:23:08,790][00179] Avg episode reward: [(0, '4.284')] -[2024-12-18 23:23:08,800][02163] Saving new best policy, reward=4.284! -[2024-12-18 23:23:12,870][02177] Updated weights for policy 0, policy_version 30 (0.0035) -[2024-12-18 23:23:13,787][00179] Fps is (10 sec: 3277.8, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 122880. Throughput: 0: 686.8. Samples: 30906. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:23:13,792][00179] Avg episode reward: [(0, '4.485')] -[2024-12-18 23:23:13,795][02163] Saving new best policy, reward=4.485! -[2024-12-18 23:23:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 2606.5, 300 sec: 2606.5). Total num frames: 143360. Throughput: 0: 785.8. Samples: 36894. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:23:18,797][00179] Avg episode reward: [(0, '4.311')] -[2024-12-18 23:23:23,788][00179] Fps is (10 sec: 3686.4, 60 sec: 2662.4, 300 sec: 2662.4). Total num frames: 159744. Throughput: 0: 807.2. Samples: 38860. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:23:23,798][00179] Avg episode reward: [(0, '4.367')] -[2024-12-18 23:23:24,760][02177] Updated weights for policy 0, policy_version 40 (0.0023) -[2024-12-18 23:23:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3003.7, 300 sec: 2772.7). Total num frames: 180224. Throughput: 0: 851.2. Samples: 44732. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:23:28,790][00179] Avg episode reward: [(0, '4.400')] -[2024-12-18 23:23:33,787][00179] Fps is (10 sec: 4096.1, 60 sec: 3345.1, 300 sec: 2867.2). Total num frames: 200704. Throughput: 0: 893.7. Samples: 51440. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:23:33,789][00179] Avg episode reward: [(0, '4.376')] -[2024-12-18 23:23:33,996][02177] Updated weights for policy 0, policy_version 50 (0.0021) -[2024-12-18 23:23:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 2894.5). Total num frames: 217088. Throughput: 0: 870.0. Samples: 53828. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:23:38,790][00179] Avg episode reward: [(0, '4.442')] -[2024-12-18 23:23:43,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 2969.6). Total num frames: 237568. Throughput: 0: 863.5. Samples: 58828. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:23:43,791][00179] Avg episode reward: [(0, '4.340')] -[2024-12-18 23:23:45,467][02177] Updated weights for policy 0, policy_version 60 (0.0028) -[2024-12-18 23:23:48,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3035.9). Total num frames: 258048. Throughput: 0: 947.6. Samples: 65702. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:23:48,792][00179] Avg episode reward: [(0, '4.323')] -[2024-12-18 23:23:48,799][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000063_258048.pth... -[2024-12-18 23:23:53,788][00179] Fps is (10 sec: 4095.8, 60 sec: 3549.8, 300 sec: 3094.7). Total num frames: 278528. Throughput: 0: 974.7. Samples: 68876. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:23:53,791][00179] Avg episode reward: [(0, '4.370')] -[2024-12-18 23:23:56,392][02177] Updated weights for policy 0, policy_version 70 (0.0026) -[2024-12-18 23:23:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3061.2). Total num frames: 290816. Throughput: 0: 935.3. Samples: 72994. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:23:58,790][00179] Avg episode reward: [(0, '4.486')] -[2024-12-18 23:24:03,787][00179] Fps is (10 sec: 3686.6, 60 sec: 3754.9, 300 sec: 3153.9). Total num frames: 315392. Throughput: 0: 948.0. Samples: 79552. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:24:03,792][00179] Avg episode reward: [(0, '4.394')] -[2024-12-18 23:24:06,272][02177] Updated weights for policy 0, policy_version 80 (0.0043) -[2024-12-18 23:24:08,789][00179] Fps is (10 sec: 4505.0, 60 sec: 3822.9, 300 sec: 3198.7). Total num frames: 335872. Throughput: 0: 979.0. Samples: 82916. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:24:08,798][00179] Avg episode reward: [(0, '4.373')] -[2024-12-18 23:24:13,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3165.1). Total num frames: 348160. Throughput: 0: 952.6. Samples: 87600. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:24:13,790][00179] Avg episode reward: [(0, '4.510')] -[2024-12-18 23:24:13,791][02163] Saving new best policy, reward=4.510! -[2024-12-18 23:24:18,072][02177] Updated weights for policy 0, policy_version 90 (0.0022) -[2024-12-18 23:24:18,788][00179] Fps is (10 sec: 3277.2, 60 sec: 3754.7, 300 sec: 3205.6). Total num frames: 368640. Throughput: 0: 933.8. Samples: 93460. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:24:18,789][00179] Avg episode reward: [(0, '4.477')] -[2024-12-18 23:24:23,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3276.8). Total num frames: 393216. Throughput: 0: 956.6. Samples: 96874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:24:23,794][00179] Avg episode reward: [(0, '4.559')] -[2024-12-18 23:24:23,796][02163] Saving new best policy, reward=4.559! -[2024-12-18 23:24:28,756][02177] Updated weights for policy 0, policy_version 100 (0.0025) -[2024-12-18 23:24:28,788][00179] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3276.8). Total num frames: 409600. Throughput: 0: 967.7. Samples: 102376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:24:28,792][00179] Avg episode reward: [(0, '4.592')] -[2024-12-18 23:24:28,802][02163] Saving new best policy, reward=4.592! -[2024-12-18 23:24:33,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3618.1, 300 sec: 3213.8). Total num frames: 417792. Throughput: 0: 887.4. Samples: 105634. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:24:33,793][00179] Avg episode reward: [(0, '4.667')] -[2024-12-18 23:24:33,795][02163] Saving new best policy, reward=4.667! -[2024-12-18 23:24:38,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3246.5). Total num frames: 438272. Throughput: 0: 862.7. Samples: 107696. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:24:38,793][00179] Avg episode reward: [(0, '4.610')] -[2024-12-18 23:24:41,378][02177] Updated weights for policy 0, policy_version 110 (0.0020) -[2024-12-18 23:24:43,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3276.8). Total num frames: 458752. Throughput: 0: 927.0. Samples: 114710. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:24:43,790][00179] Avg episode reward: [(0, '4.528')] -[2024-12-18 23:24:48,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 475136. Throughput: 0: 887.9. Samples: 119508. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:24:48,792][00179] Avg episode reward: [(0, '4.498')] -[2024-12-18 23:24:53,037][02177] Updated weights for policy 0, policy_version 120 (0.0026) -[2024-12-18 23:24:53,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 491520. Throughput: 0: 863.5. Samples: 121774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:24:53,790][00179] Avg episode reward: [(0, '4.583')] -[2024-12-18 23:24:58,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3329.6). Total num frames: 516096. Throughput: 0: 912.4. Samples: 128656. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:24:58,790][00179] Avg episode reward: [(0, '4.676')] -[2024-12-18 23:24:58,799][02163] Saving new best policy, reward=4.676! -[2024-12-18 23:25:02,731][02177] Updated weights for policy 0, policy_version 130 (0.0015) -[2024-12-18 23:25:03,791][00179] Fps is (10 sec: 4094.5, 60 sec: 3617.9, 300 sec: 3327.9). Total num frames: 532480. Throughput: 0: 907.4. Samples: 134294. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:25:03,793][00179] Avg episode reward: [(0, '4.689')] -[2024-12-18 23:25:03,797][02163] Saving new best policy, reward=4.689! -[2024-12-18 23:25:08,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3326.4). Total num frames: 548864. Throughput: 0: 875.8. Samples: 136284. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:25:08,789][00179] Avg episode reward: [(0, '4.743')] -[2024-12-18 23:25:08,805][02163] Saving new best policy, reward=4.743! -[2024-12-18 23:25:13,788][00179] Fps is (10 sec: 3687.8, 60 sec: 3686.4, 300 sec: 3349.1). Total num frames: 569344. Throughput: 0: 888.0. Samples: 142338. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:25:13,790][00179] Avg episode reward: [(0, '4.720')] -[2024-12-18 23:25:14,143][02177] Updated weights for policy 0, policy_version 140 (0.0034) -[2024-12-18 23:25:18,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3370.4). Total num frames: 589824. Throughput: 0: 963.6. Samples: 148994. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:25:18,794][00179] Avg episode reward: [(0, '4.474')] -[2024-12-18 23:25:23,788][00179] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3345.1). Total num frames: 602112. Throughput: 0: 956.0. Samples: 150718. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) -[2024-12-18 23:25:23,792][00179] Avg episode reward: [(0, '4.439')] -[2024-12-18 23:25:28,211][02177] Updated weights for policy 0, policy_version 150 (0.0025) -[2024-12-18 23:25:28,790][00179] Fps is (10 sec: 2457.0, 60 sec: 3413.2, 300 sec: 3321.0). Total num frames: 614400. Throughput: 0: 874.7. Samples: 154074. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) -[2024-12-18 23:25:28,792][00179] Avg episode reward: [(0, '4.349')] -[2024-12-18 23:25:33,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3341.5). Total num frames: 634880. Throughput: 0: 897.9. Samples: 159916. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:25:33,794][00179] Avg episode reward: [(0, '4.456')] -[2024-12-18 23:25:37,527][02177] Updated weights for policy 0, policy_version 160 (0.0013) -[2024-12-18 23:25:38,788][00179] Fps is (10 sec: 4506.7, 60 sec: 3686.4, 300 sec: 3381.8). Total num frames: 659456. Throughput: 0: 924.6. Samples: 163382. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:25:38,796][00179] Avg episode reward: [(0, '4.531')] -[2024-12-18 23:25:43,788][00179] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3358.7). Total num frames: 671744. Throughput: 0: 885.2. Samples: 168492. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:25:43,791][00179] Avg episode reward: [(0, '4.631')] -[2024-12-18 23:25:48,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3376.7). Total num frames: 692224. Throughput: 0: 883.0. Samples: 174024. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:25:48,794][00179] Avg episode reward: [(0, '4.461')] -[2024-12-18 23:25:48,803][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000169_692224.pth... -[2024-12-18 23:25:49,236][02177] Updated weights for policy 0, policy_version 170 (0.0029) -[2024-12-18 23:25:53,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3413.3). Total num frames: 716800. Throughput: 0: 916.0. Samples: 177506. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:25:53,793][00179] Avg episode reward: [(0, '4.379')] -[2024-12-18 23:25:58,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3410.2). Total num frames: 733184. Throughput: 0: 915.3. Samples: 183528. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:25:58,791][00179] Avg episode reward: [(0, '4.605')] -[2024-12-18 23:25:59,543][02177] Updated weights for policy 0, policy_version 180 (0.0024) -[2024-12-18 23:26:03,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.4, 300 sec: 3407.1). Total num frames: 749568. Throughput: 0: 867.7. Samples: 188042. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:26:03,794][00179] Avg episode reward: [(0, '4.750')] -[2024-12-18 23:26:03,797][02163] Saving new best policy, reward=4.750! -[2024-12-18 23:26:08,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3422.4). Total num frames: 770048. Throughput: 0: 903.8. Samples: 191390. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:26:08,794][00179] Avg episode reward: [(0, '4.850')] -[2024-12-18 23:26:08,801][02163] Saving new best policy, reward=4.850! -[2024-12-18 23:26:09,987][02177] Updated weights for policy 0, policy_version 190 (0.0039) -[2024-12-18 23:26:13,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3437.1). Total num frames: 790528. Throughput: 0: 975.6. Samples: 197972. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:26:13,795][00179] Avg episode reward: [(0, '4.817')] -[2024-12-18 23:26:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3433.7). Total num frames: 806912. Throughput: 0: 937.3. Samples: 202092. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:26:18,791][00179] Avg episode reward: [(0, '4.764')] -[2024-12-18 23:26:21,995][02177] Updated weights for policy 0, policy_version 200 (0.0019) -[2024-12-18 23:26:23,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3430.4). Total num frames: 823296. Throughput: 0: 924.5. Samples: 204986. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:26:23,790][00179] Avg episode reward: [(0, '4.554')] -[2024-12-18 23:26:28,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3891.4, 300 sec: 3460.7). Total num frames: 847872. Throughput: 0: 962.4. Samples: 211798. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:26:28,790][00179] Avg episode reward: [(0, '4.390')] -[2024-12-18 23:26:31,804][02177] Updated weights for policy 0, policy_version 210 (0.0039) -[2024-12-18 23:26:33,791][00179] Fps is (10 sec: 4094.5, 60 sec: 3822.7, 300 sec: 3457.0). Total num frames: 864256. Throughput: 0: 950.4. Samples: 216796. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:26:33,794][00179] Avg episode reward: [(0, '4.443')] -[2024-12-18 23:26:38,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3453.5). Total num frames: 880640. Throughput: 0: 919.2. Samples: 218870. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:26:38,789][00179] Avg episode reward: [(0, '4.585')] -[2024-12-18 23:26:42,880][02177] Updated weights for policy 0, policy_version 220 (0.0027) -[2024-12-18 23:26:43,788][00179] Fps is (10 sec: 4097.5, 60 sec: 3891.2, 300 sec: 3481.6). Total num frames: 905216. Throughput: 0: 938.3. Samples: 225750. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:26:43,789][00179] Avg episode reward: [(0, '4.710')] -[2024-12-18 23:26:48,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3477.7). Total num frames: 921600. Throughput: 0: 971.1. Samples: 231742. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:26:48,790][00179] Avg episode reward: [(0, '4.567')] -[2024-12-18 23:26:53,789][00179] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3458.8). Total num frames: 933888. Throughput: 0: 939.4. Samples: 233662. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:26:53,791][00179] Avg episode reward: [(0, '4.625')] -[2024-12-18 23:26:56,289][02177] Updated weights for policy 0, policy_version 230 (0.0021) -[2024-12-18 23:26:58,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3549.9, 300 sec: 3440.6). Total num frames: 946176. Throughput: 0: 872.2. Samples: 237222. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:26:58,791][00179] Avg episode reward: [(0, '4.570')] -[2024-12-18 23:27:03,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3467.0). Total num frames: 970752. Throughput: 0: 913.2. Samples: 243186. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:27:03,792][00179] Avg episode reward: [(0, '4.666')] -[2024-12-18 23:27:07,058][02177] Updated weights for policy 0, policy_version 240 (0.0015) -[2024-12-18 23:27:08,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3463.6). Total num frames: 987136. Throughput: 0: 914.2. Samples: 246126. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:27:08,794][00179] Avg episode reward: [(0, '4.909')] -[2024-12-18 23:27:08,803][02163] Saving new best policy, reward=4.909! -[2024-12-18 23:27:13,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3460.4). Total num frames: 1003520. Throughput: 0: 857.1. Samples: 250368. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:27:13,790][00179] Avg episode reward: [(0, '5.003')] -[2024-12-18 23:27:13,792][02163] Saving new best policy, reward=5.003! -[2024-12-18 23:27:18,222][02177] Updated weights for policy 0, policy_version 250 (0.0019) -[2024-12-18 23:27:18,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3471.2). Total num frames: 1024000. Throughput: 0: 897.8. Samples: 257192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:27:18,789][00179] Avg episode reward: [(0, '4.850')] -[2024-12-18 23:27:23,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3540.6). Total num frames: 1044480. Throughput: 0: 930.4. Samples: 260738. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) -[2024-12-18 23:27:23,793][00179] Avg episode reward: [(0, '4.784')] -[2024-12-18 23:27:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3596.2). Total num frames: 1060864. Throughput: 0: 874.9. Samples: 265122. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:27:28,794][00179] Avg episode reward: [(0, '4.844')] -[2024-12-18 23:27:30,051][02177] Updated weights for policy 0, policy_version 260 (0.0014) -[2024-12-18 23:27:33,788][00179] Fps is (10 sec: 3686.2, 60 sec: 3618.3, 300 sec: 3637.8). Total num frames: 1081344. Throughput: 0: 873.0. Samples: 271026. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:27:33,796][00179] Avg episode reward: [(0, '5.055')] -[2024-12-18 23:27:33,799][02163] Saving new best policy, reward=5.055! -[2024-12-18 23:27:38,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 1101824. Throughput: 0: 905.4. Samples: 274404. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:27:38,790][00179] Avg episode reward: [(0, '5.210')] -[2024-12-18 23:27:38,797][02163] Saving new best policy, reward=5.210! -[2024-12-18 23:27:39,061][02177] Updated weights for policy 0, policy_version 270 (0.0014) -[2024-12-18 23:27:43,788][00179] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1118208. Throughput: 0: 948.0. Samples: 279882. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:27:43,790][00179] Avg episode reward: [(0, '5.058')] -[2024-12-18 23:27:48,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 1130496. Throughput: 0: 897.0. Samples: 283552. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:27:48,793][00179] Avg episode reward: [(0, '5.277')] -[2024-12-18 23:27:48,808][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000276_1130496.pth... -[2024-12-18 23:27:48,969][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000063_258048.pth -[2024-12-18 23:27:48,987][02163] Saving new best policy, reward=5.277! -[2024-12-18 23:27:53,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3610.1). Total num frames: 1142784. Throughput: 0: 874.5. Samples: 285480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:27:53,789][00179] Avg episode reward: [(0, '5.358')] -[2024-12-18 23:27:53,816][02177] Updated weights for policy 0, policy_version 280 (0.0046) -[2024-12-18 23:27:53,827][02163] Saving new best policy, reward=5.358! -[2024-12-18 23:27:58,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 1167360. Throughput: 0: 918.2. Samples: 291688. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:27:58,793][00179] Avg episode reward: [(0, '5.069')] -[2024-12-18 23:28:03,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 1179648. Throughput: 0: 858.4. Samples: 295818. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:28:03,794][00179] Avg episode reward: [(0, '4.876')] -[2024-12-18 23:28:05,711][02177] Updated weights for policy 0, policy_version 290 (0.0030) -[2024-12-18 23:28:08,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1200128. Throughput: 0: 843.2. Samples: 298682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:28:08,792][00179] Avg episode reward: [(0, '4.804')] -[2024-12-18 23:28:13,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1224704. Throughput: 0: 899.9. Samples: 305616. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:28:13,795][00179] Avg episode reward: [(0, '5.197')] -[2024-12-18 23:28:14,604][02177] Updated weights for policy 0, policy_version 300 (0.0014) -[2024-12-18 23:28:18,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 1241088. Throughput: 0: 885.6. Samples: 310878. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:28:18,794][00179] Avg episode reward: [(0, '5.197')] -[2024-12-18 23:28:23,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1257472. Throughput: 0: 856.3. Samples: 312938. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:28:23,790][00179] Avg episode reward: [(0, '5.343')] -[2024-12-18 23:28:25,995][02177] Updated weights for policy 0, policy_version 310 (0.0021) -[2024-12-18 23:28:28,788][00179] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1282048. Throughput: 0: 890.8. Samples: 319968. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:28:28,790][00179] Avg episode reward: [(0, '5.347')] -[2024-12-18 23:28:33,788][00179] Fps is (10 sec: 4505.5, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 1302528. Throughput: 0: 944.9. Samples: 326072. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:28:33,790][00179] Avg episode reward: [(0, '5.335')] -[2024-12-18 23:28:36,620][02177] Updated weights for policy 0, policy_version 320 (0.0020) -[2024-12-18 23:28:38,791][00179] Fps is (10 sec: 3275.7, 60 sec: 3549.7, 300 sec: 3651.6). Total num frames: 1314816. Throughput: 0: 948.0. Samples: 328144. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:28:38,794][00179] Avg episode reward: [(0, '5.327')] -[2024-12-18 23:28:43,788][00179] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1339392. Throughput: 0: 943.6. Samples: 334150. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:28:43,795][00179] Avg episode reward: [(0, '5.570')] -[2024-12-18 23:28:43,799][02163] Saving new best policy, reward=5.570! -[2024-12-18 23:28:46,402][02177] Updated weights for policy 0, policy_version 330 (0.0025) -[2024-12-18 23:28:48,788][00179] Fps is (10 sec: 4507.1, 60 sec: 3822.9, 300 sec: 3665.6). Total num frames: 1359872. Throughput: 0: 1007.1. Samples: 341138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:28:48,794][00179] Avg episode reward: [(0, '5.848')] -[2024-12-18 23:28:48,806][02163] Saving new best policy, reward=5.848! -[2024-12-18 23:28:53,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3679.5). Total num frames: 1376256. Throughput: 0: 990.3. Samples: 343244. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:28:53,794][00179] Avg episode reward: [(0, '5.939')] -[2024-12-18 23:28:53,796][02163] Saving new best policy, reward=5.939! -[2024-12-18 23:28:58,128][02177] Updated weights for policy 0, policy_version 340 (0.0024) -[2024-12-18 23:28:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 1392640. Throughput: 0: 946.3. Samples: 348200. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:28:58,797][00179] Avg episode reward: [(0, '5.807')] -[2024-12-18 23:29:03,788][00179] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3665.6). Total num frames: 1417216. Throughput: 0: 984.5. Samples: 355180. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:29:03,794][00179] Avg episode reward: [(0, '5.911')] -[2024-12-18 23:29:07,951][02177] Updated weights for policy 0, policy_version 350 (0.0015) -[2024-12-18 23:29:08,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3679.5). Total num frames: 1433600. Throughput: 0: 1003.4. Samples: 358090. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:29:08,790][00179] Avg episode reward: [(0, '5.991')] -[2024-12-18 23:29:08,802][02163] Saving new best policy, reward=5.991! -[2024-12-18 23:29:13,788][00179] Fps is (10 sec: 3276.7, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 1449984. Throughput: 0: 937.6. Samples: 362162. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:29:13,790][00179] Avg episode reward: [(0, '5.870')] -[2024-12-18 23:29:18,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3637.8). Total num frames: 1466368. Throughput: 0: 912.4. Samples: 367130. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:29:18,791][00179] Avg episode reward: [(0, '5.724')] -[2024-12-18 23:29:21,622][02177] Updated weights for policy 0, policy_version 360 (0.0016) -[2024-12-18 23:29:23,788][00179] Fps is (10 sec: 2867.3, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 1478656. Throughput: 0: 909.8. Samples: 369082. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:29:23,790][00179] Avg episode reward: [(0, '5.693')] -[2024-12-18 23:29:28,789][00179] Fps is (10 sec: 2457.4, 60 sec: 3481.5, 300 sec: 3637.8). Total num frames: 1490944. Throughput: 0: 873.1. Samples: 373440. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:29:28,791][00179] Avg episode reward: [(0, '5.973')] -[2024-12-18 23:29:33,627][02177] Updated weights for policy 0, policy_version 370 (0.0018) -[2024-12-18 23:29:33,790][00179] Fps is (10 sec: 3685.7, 60 sec: 3549.8, 300 sec: 3651.7). Total num frames: 1515520. Throughput: 0: 851.0. Samples: 379436. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:29:33,792][00179] Avg episode reward: [(0, '6.104')] -[2024-12-18 23:29:33,795][02163] Saving new best policy, reward=6.104! -[2024-12-18 23:29:38,789][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.6, 300 sec: 3651.7). Total num frames: 1536000. Throughput: 0: 880.1. Samples: 382848. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:29:38,793][00179] Avg episode reward: [(0, '5.679')] -[2024-12-18 23:29:43,788][00179] Fps is (10 sec: 3687.1, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1552384. Throughput: 0: 896.8. Samples: 388556. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:29:43,790][00179] Avg episode reward: [(0, '5.695')] -[2024-12-18 23:29:44,335][02177] Updated weights for policy 0, policy_version 380 (0.0038) -[2024-12-18 23:29:48,788][00179] Fps is (10 sec: 3277.1, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 1568768. Throughput: 0: 855.9. Samples: 393694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:29:48,790][00179] Avg episode reward: [(0, '6.163')] -[2024-12-18 23:29:48,848][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000384_1572864.pth... -[2024-12-18 23:29:48,971][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000169_692224.pth -[2024-12-18 23:29:48,990][02163] Saving new best policy, reward=6.163! -[2024-12-18 23:29:53,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 1593344. Throughput: 0: 865.9. Samples: 397054. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:29:53,790][00179] Avg episode reward: [(0, '6.559')] -[2024-12-18 23:29:53,799][02163] Saving new best policy, reward=6.559! -[2024-12-18 23:29:54,318][02177] Updated weights for policy 0, policy_version 390 (0.0032) -[2024-12-18 23:29:58,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 1613824. Throughput: 0: 914.8. Samples: 403330. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:29:58,792][00179] Avg episode reward: [(0, '7.489')] -[2024-12-18 23:29:58,804][02163] Saving new best policy, reward=7.489! -[2024-12-18 23:30:03,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 1626112. Throughput: 0: 895.4. Samples: 407424. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:30:03,793][00179] Avg episode reward: [(0, '7.835')] -[2024-12-18 23:30:03,798][02163] Saving new best policy, reward=7.835! -[2024-12-18 23:30:06,350][02177] Updated weights for policy 0, policy_version 400 (0.0019) -[2024-12-18 23:30:08,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1646592. Throughput: 0: 919.9. Samples: 410478. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:30:08,790][00179] Avg episode reward: [(0, '8.444')] -[2024-12-18 23:30:08,801][02163] Saving new best policy, reward=8.444! -[2024-12-18 23:30:13,788][00179] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3637.8). Total num frames: 1662976. Throughput: 0: 937.0. Samples: 415602. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:30:13,790][00179] Avg episode reward: [(0, '8.114')] -[2024-12-18 23:30:18,791][00179] Fps is (10 sec: 2866.3, 60 sec: 3481.4, 300 sec: 3637.8). Total num frames: 1675264. Throughput: 0: 885.1. Samples: 419268. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:30:18,797][00179] Avg episode reward: [(0, '7.994')] -[2024-12-18 23:30:20,102][02177] Updated weights for policy 0, policy_version 410 (0.0024) -[2024-12-18 23:30:23,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1691648. Throughput: 0: 856.7. Samples: 421400. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:30:23,790][00179] Avg episode reward: [(0, '8.017')] -[2024-12-18 23:30:28,788][00179] Fps is (10 sec: 3687.6, 60 sec: 3686.5, 300 sec: 3651.7). Total num frames: 1712128. Throughput: 0: 881.3. Samples: 428216. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:30:28,790][00179] Avg episode reward: [(0, '8.576')] -[2024-12-18 23:30:28,804][02163] Saving new best policy, reward=8.576! -[2024-12-18 23:30:29,734][02177] Updated weights for policy 0, policy_version 420 (0.0015) -[2024-12-18 23:30:33,789][00179] Fps is (10 sec: 4095.4, 60 sec: 3618.2, 300 sec: 3637.8). Total num frames: 1732608. Throughput: 0: 904.9. Samples: 434414. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:30:33,796][00179] Avg episode reward: [(0, '8.205')] -[2024-12-18 23:30:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1748992. Throughput: 0: 875.6. Samples: 436458. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:30:38,791][00179] Avg episode reward: [(0, '8.709')] -[2024-12-18 23:30:38,798][02163] Saving new best policy, reward=8.709! -[2024-12-18 23:30:41,568][02177] Updated weights for policy 0, policy_version 430 (0.0043) -[2024-12-18 23:30:43,788][00179] Fps is (10 sec: 3687.0, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 1769472. Throughput: 0: 864.4. Samples: 442230. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:30:43,790][00179] Avg episode reward: [(0, '8.681')] -[2024-12-18 23:30:48,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 1794048. Throughput: 0: 926.9. Samples: 449134. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:30:48,794][00179] Avg episode reward: [(0, '9.291')] -[2024-12-18 23:30:48,804][02163] Saving new best policy, reward=9.291! -[2024-12-18 23:30:51,371][02177] Updated weights for policy 0, policy_version 440 (0.0018) -[2024-12-18 23:30:53,788][00179] Fps is (10 sec: 3686.2, 60 sec: 3549.8, 300 sec: 3637.8). Total num frames: 1806336. Throughput: 0: 909.6. Samples: 451412. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:30:53,791][00179] Avg episode reward: [(0, '9.245')] -[2024-12-18 23:30:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 1826816. Throughput: 0: 906.0. Samples: 456374. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:30:58,793][00179] Avg episode reward: [(0, '9.049')] -[2024-12-18 23:31:02,051][02177] Updated weights for policy 0, policy_version 450 (0.0019) -[2024-12-18 23:31:03,787][00179] Fps is (10 sec: 4505.9, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 1851392. Throughput: 0: 981.7. Samples: 463442. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:31:03,790][00179] Avg episode reward: [(0, '10.515')] -[2024-12-18 23:31:03,792][02163] Saving new best policy, reward=10.515! -[2024-12-18 23:31:08,796][00179] Fps is (10 sec: 4092.5, 60 sec: 3685.9, 300 sec: 3651.6). Total num frames: 1867776. Throughput: 0: 1000.9. Samples: 466450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:31:08,802][00179] Avg episode reward: [(0, '10.628')] -[2024-12-18 23:31:08,810][02163] Saving new best policy, reward=10.628! -[2024-12-18 23:31:13,735][02177] Updated weights for policy 0, policy_version 460 (0.0028) -[2024-12-18 23:31:13,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 1884160. Throughput: 0: 940.3. Samples: 470528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:31:13,792][00179] Avg episode reward: [(0, '11.054')] -[2024-12-18 23:31:13,795][02163] Saving new best policy, reward=11.054! -[2024-12-18 23:31:18,788][00179] Fps is (10 sec: 3689.5, 60 sec: 3823.1, 300 sec: 3665.6). Total num frames: 1904640. Throughput: 0: 954.7. Samples: 477376. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:31:18,794][00179] Avg episode reward: [(0, '11.053')] -[2024-12-18 23:31:22,723][02177] Updated weights for policy 0, policy_version 470 (0.0026) -[2024-12-18 23:31:23,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3651.7). Total num frames: 1925120. Throughput: 0: 984.7. Samples: 480768. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:31:23,793][00179] Avg episode reward: [(0, '10.907')] -[2024-12-18 23:31:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3651.7). Total num frames: 1941504. Throughput: 0: 962.7. Samples: 485552. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:31:28,790][00179] Avg episode reward: [(0, '11.142')] -[2024-12-18 23:31:28,800][02163] Saving new best policy, reward=11.142! -[2024-12-18 23:31:33,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3823.0, 300 sec: 3665.6). Total num frames: 1961984. Throughput: 0: 940.5. Samples: 491456. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:31:33,795][00179] Avg episode reward: [(0, '11.298')] -[2024-12-18 23:31:33,797][02163] Saving new best policy, reward=11.298! -[2024-12-18 23:31:34,377][02177] Updated weights for policy 0, policy_version 480 (0.0015) -[2024-12-18 23:31:38,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3651.7). Total num frames: 1982464. Throughput: 0: 962.1. Samples: 494704. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) -[2024-12-18 23:31:38,790][00179] Avg episode reward: [(0, '11.813')] -[2024-12-18 23:31:38,810][02163] Saving new best policy, reward=11.813! -[2024-12-18 23:31:43,790][00179] Fps is (10 sec: 3276.1, 60 sec: 3754.5, 300 sec: 3637.8). Total num frames: 1994752. Throughput: 0: 957.3. Samples: 499454. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:31:43,793][00179] Avg episode reward: [(0, '12.141')] -[2024-12-18 23:31:43,802][02163] Saving new best policy, reward=12.141! -[2024-12-18 23:31:48,175][02177] Updated weights for policy 0, policy_version 490 (0.0023) -[2024-12-18 23:31:48,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3549.9, 300 sec: 3637.8). Total num frames: 2007040. Throughput: 0: 873.3. Samples: 502742. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:31:48,792][00179] Avg episode reward: [(0, '11.862')] -[2024-12-18 23:31:48,809][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth... -[2024-12-18 23:31:49,031][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000276_1130496.pth -[2024-12-18 23:31:53,788][00179] Fps is (10 sec: 3277.5, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 2027520. Throughput: 0: 867.5. Samples: 505480. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:31:53,790][00179] Avg episode reward: [(0, '12.478')] -[2024-12-18 23:31:53,796][02163] Saving new best policy, reward=12.478! -[2024-12-18 23:31:57,898][02177] Updated weights for policy 0, policy_version 500 (0.0016) -[2024-12-18 23:31:58,787][00179] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 2052096. Throughput: 0: 932.2. Samples: 512478. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) -[2024-12-18 23:31:58,789][00179] Avg episode reward: [(0, '12.454')] -[2024-12-18 23:32:03,789][00179] Fps is (10 sec: 4095.4, 60 sec: 3618.0, 300 sec: 3665.6). Total num frames: 2068480. Throughput: 0: 893.4. Samples: 517578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:32:03,791][00179] Avg episode reward: [(0, '12.351')] -[2024-12-18 23:32:08,788][00179] Fps is (10 sec: 3276.7, 60 sec: 3618.6, 300 sec: 3665.6). Total num frames: 2084864. Throughput: 0: 863.5. Samples: 519626. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:32:08,790][00179] Avg episode reward: [(0, '13.306')] -[2024-12-18 23:32:08,798][02163] Saving new best policy, reward=13.306! -[2024-12-18 23:32:09,582][02177] Updated weights for policy 0, policy_version 510 (0.0028) -[2024-12-18 23:32:13,788][00179] Fps is (10 sec: 3686.9, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 2105344. Throughput: 0: 906.9. Samples: 526364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:32:13,792][00179] Avg episode reward: [(0, '13.595')] -[2024-12-18 23:32:13,795][02163] Saving new best policy, reward=13.595! -[2024-12-18 23:32:18,788][00179] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 2125824. Throughput: 0: 912.6. Samples: 532522. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:32:18,792][00179] Avg episode reward: [(0, '13.906')] -[2024-12-18 23:32:18,803][02163] Saving new best policy, reward=13.906! -[2024-12-18 23:32:19,685][02177] Updated weights for policy 0, policy_version 520 (0.0016) -[2024-12-18 23:32:23,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 2138112. Throughput: 0: 884.4. Samples: 534504. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) -[2024-12-18 23:32:23,796][00179] Avg episode reward: [(0, '14.913')] -[2024-12-18 23:32:23,798][02163] Saving new best policy, reward=14.913! -[2024-12-18 23:32:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 2162688. Throughput: 0: 908.4. Samples: 540328. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) -[2024-12-18 23:32:28,793][00179] Avg episode reward: [(0, '14.909')] -[2024-12-18 23:32:30,275][02177] Updated weights for policy 0, policy_version 530 (0.0021) -[2024-12-18 23:32:33,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 2183168. Throughput: 0: 975.6. Samples: 546646. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) -[2024-12-18 23:32:33,794][00179] Avg episode reward: [(0, '15.098')] -[2024-12-18 23:32:33,800][02163] Saving new best policy, reward=15.098! -[2024-12-18 23:32:38,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 2191360. Throughput: 0: 950.0. Samples: 548230. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:32:38,791][00179] Avg episode reward: [(0, '15.097')] -[2024-12-18 23:32:43,788][00179] Fps is (10 sec: 2048.0, 60 sec: 3481.7, 300 sec: 3637.8). Total num frames: 2203648. Throughput: 0: 870.4. Samples: 551648. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:32:43,790][00179] Avg episode reward: [(0, '14.621')] -[2024-12-18 23:32:44,748][02177] Updated weights for policy 0, policy_version 540 (0.0035) -[2024-12-18 23:32:48,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 2228224. Throughput: 0: 902.2. Samples: 558176. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:32:48,798][00179] Avg episode reward: [(0, '14.112')] -[2024-12-18 23:32:53,425][02177] Updated weights for policy 0, policy_version 550 (0.0019) -[2024-12-18 23:32:53,788][00179] Fps is (10 sec: 4915.2, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2252800. Throughput: 0: 935.0. Samples: 561700. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:32:53,792][00179] Avg episode reward: [(0, '13.461')] -[2024-12-18 23:32:58,790][00179] Fps is (10 sec: 3685.7, 60 sec: 3549.7, 300 sec: 3679.4). Total num frames: 2265088. Throughput: 0: 896.9. Samples: 566724. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:32:58,792][00179] Avg episode reward: [(0, '13.895')] -[2024-12-18 23:33:03,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3679.5). Total num frames: 2285568. Throughput: 0: 885.6. Samples: 572376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:33:03,790][00179] Avg episode reward: [(0, '14.260')] -[2024-12-18 23:33:05,041][02177] Updated weights for policy 0, policy_version 560 (0.0023) -[2024-12-18 23:33:08,788][00179] Fps is (10 sec: 4506.5, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2310144. Throughput: 0: 914.3. Samples: 575648. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:33:08,790][00179] Avg episode reward: [(0, '14.449')] -[2024-12-18 23:33:13,789][00179] Fps is (10 sec: 4095.4, 60 sec: 3686.3, 300 sec: 3679.4). Total num frames: 2326528. Throughput: 0: 921.2. Samples: 581782. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:33:13,793][00179] Avg episode reward: [(0, '14.849')] -[2024-12-18 23:33:16,174][02177] Updated weights for policy 0, policy_version 570 (0.0034) -[2024-12-18 23:33:18,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 2342912. Throughput: 0: 882.0. Samples: 586334. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:33:18,794][00179] Avg episode reward: [(0, '14.602')] -[2024-12-18 23:33:23,788][00179] Fps is (10 sec: 3686.9, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 2363392. Throughput: 0: 924.4. Samples: 589830. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:33:23,793][00179] Avg episode reward: [(0, '16.404')] -[2024-12-18 23:33:23,862][02163] Saving new best policy, reward=16.404! -[2024-12-18 23:33:25,711][02177] Updated weights for policy 0, policy_version 580 (0.0013) -[2024-12-18 23:33:28,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 2383872. Throughput: 0: 1000.9. Samples: 596688. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:33:28,791][00179] Avg episode reward: [(0, '16.802')] -[2024-12-18 23:33:28,799][02163] Saving new best policy, reward=16.802! -[2024-12-18 23:33:33,796][00179] Fps is (10 sec: 3683.3, 60 sec: 3617.6, 300 sec: 3679.4). Total num frames: 2400256. Throughput: 0: 947.6. Samples: 600828. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:33:33,798][00179] Avg episode reward: [(0, '17.570')] -[2024-12-18 23:33:33,805][02163] Saving new best policy, reward=17.570! -[2024-12-18 23:33:37,625][02177] Updated weights for policy 0, policy_version 590 (0.0034) -[2024-12-18 23:33:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3665.6). Total num frames: 2420736. Throughput: 0: 932.1. Samples: 603646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:33:38,790][00179] Avg episode reward: [(0, '18.641')] -[2024-12-18 23:33:38,803][02163] Saving new best policy, reward=18.641! -[2024-12-18 23:33:43,788][00179] Fps is (10 sec: 4509.4, 60 sec: 4027.7, 300 sec: 3679.5). Total num frames: 2445312. Throughput: 0: 975.7. Samples: 610628. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:33:43,793][00179] Avg episode reward: [(0, '18.708')] -[2024-12-18 23:33:43,796][02163] Saving new best policy, reward=18.708! -[2024-12-18 23:33:47,624][02177] Updated weights for policy 0, policy_version 600 (0.0016) -[2024-12-18 23:33:48,789][00179] Fps is (10 sec: 3686.0, 60 sec: 3822.8, 300 sec: 3665.6). Total num frames: 2457600. Throughput: 0: 960.6. Samples: 615606. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:33:48,793][00179] Avg episode reward: [(0, '18.812')] -[2024-12-18 23:33:48,812][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000600_2457600.pth... -[2024-12-18 23:33:48,999][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000384_1572864.pth -[2024-12-18 23:33:49,022][02163] Saving new best policy, reward=18.812! -[2024-12-18 23:33:53,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 2478080. Throughput: 0: 932.8. Samples: 617626. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:33:53,792][00179] Avg episode reward: [(0, '17.970')] -[2024-12-18 23:33:58,173][02177] Updated weights for policy 0, policy_version 610 (0.0017) -[2024-12-18 23:33:58,788][00179] Fps is (10 sec: 4096.6, 60 sec: 3891.3, 300 sec: 3665.6). Total num frames: 2498560. Throughput: 0: 949.5. Samples: 624508. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:33:58,797][00179] Avg episode reward: [(0, '16.402')] -[2024-12-18 23:34:03,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3665.6). Total num frames: 2514944. Throughput: 0: 970.2. Samples: 629994. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:34:03,794][00179] Avg episode reward: [(0, '15.970')] -[2024-12-18 23:34:08,789][00179] Fps is (10 sec: 2866.8, 60 sec: 3618.0, 300 sec: 3651.7). Total num frames: 2527232. Throughput: 0: 931.8. Samples: 631764. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:34:08,796][00179] Avg episode reward: [(0, '15.781')] -[2024-12-18 23:34:12,464][02177] Updated weights for policy 0, policy_version 620 (0.0019) -[2024-12-18 23:34:13,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3618.2, 300 sec: 3651.7). Total num frames: 2543616. Throughput: 0: 866.0. Samples: 635658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:34:13,791][00179] Avg episode reward: [(0, '15.554')] -[2024-12-18 23:34:18,788][00179] Fps is (10 sec: 4096.6, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 2568192. Throughput: 0: 936.4. Samples: 642958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:34:18,789][00179] Avg episode reward: [(0, '17.244')] -[2024-12-18 23:34:20,933][02177] Updated weights for policy 0, policy_version 630 (0.0026) -[2024-12-18 23:34:23,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2588672. Throughput: 0: 955.7. Samples: 646654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:34:23,796][00179] Avg episode reward: [(0, '17.112')] -[2024-12-18 23:34:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3693.4). Total num frames: 2605056. Throughput: 0: 902.2. Samples: 651228. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:34:28,795][00179] Avg episode reward: [(0, '18.274')] -[2024-12-18 23:34:31,994][02177] Updated weights for policy 0, policy_version 640 (0.0031) -[2024-12-18 23:34:33,787][00179] Fps is (10 sec: 4096.0, 60 sec: 3823.5, 300 sec: 3707.2). Total num frames: 2629632. Throughput: 0: 940.8. Samples: 657940. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:34:33,794][00179] Avg episode reward: [(0, '17.670')] -[2024-12-18 23:34:38,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3823.0, 300 sec: 3721.1). Total num frames: 2650112. Throughput: 0: 976.5. Samples: 661570. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:34:38,792][00179] Avg episode reward: [(0, '17.217')] -[2024-12-18 23:34:41,521][02177] Updated weights for policy 0, policy_version 650 (0.0020) -[2024-12-18 23:34:43,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 2666496. Throughput: 0: 944.9. Samples: 667028. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:34:43,794][00179] Avg episode reward: [(0, '16.989')] -[2024-12-18 23:34:48,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3707.2). Total num frames: 2686976. Throughput: 0: 948.9. Samples: 672696. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:34:48,792][00179] Avg episode reward: [(0, '18.329')] -[2024-12-18 23:34:51,761][02177] Updated weights for policy 0, policy_version 660 (0.0020) -[2024-12-18 23:34:53,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3721.1). Total num frames: 2711552. Throughput: 0: 988.8. Samples: 676260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:34:53,795][00179] Avg episode reward: [(0, '18.817')] -[2024-12-18 23:34:53,797][02163] Saving new best policy, reward=18.817! -[2024-12-18 23:34:58,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2723840. Throughput: 0: 1019.5. Samples: 681536. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:34:58,799][00179] Avg episode reward: [(0, '19.259')] -[2024-12-18 23:34:58,811][02163] Saving new best policy, reward=19.259! -[2024-12-18 23:35:03,791][00179] Fps is (10 sec: 2456.7, 60 sec: 3686.2, 300 sec: 3693.3). Total num frames: 2736128. Throughput: 0: 929.9. Samples: 684808. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:35:03,793][00179] Avg episode reward: [(0, '19.610')] -[2024-12-18 23:35:03,795][02163] Saving new best policy, reward=19.610! -[2024-12-18 23:35:06,201][02177] Updated weights for policy 0, policy_version 670 (0.0033) -[2024-12-18 23:35:08,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3754.8, 300 sec: 3693.3). Total num frames: 2752512. Throughput: 0: 901.1. Samples: 687202. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:35:08,794][00179] Avg episode reward: [(0, '20.955')] -[2024-12-18 23:35:08,803][02163] Saving new best policy, reward=20.955! -[2024-12-18 23:35:13,788][00179] Fps is (10 sec: 4097.3, 60 sec: 3891.2, 300 sec: 3735.0). Total num frames: 2777088. Throughput: 0: 951.4. Samples: 694042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:35:13,790][00179] Avg episode reward: [(0, '21.130')] -[2024-12-18 23:35:13,796][02163] Saving new best policy, reward=21.130! -[2024-12-18 23:35:15,213][02177] Updated weights for policy 0, policy_version 680 (0.0018) -[2024-12-18 23:35:18,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 2793472. Throughput: 0: 926.3. Samples: 699622. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:35:18,790][00179] Avg episode reward: [(0, '20.889')] -[2024-12-18 23:35:23,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 2809856. Throughput: 0: 891.5. Samples: 701686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:35:23,790][00179] Avg episode reward: [(0, '20.747')] -[2024-12-18 23:35:26,703][02177] Updated weights for policy 0, policy_version 690 (0.0019) -[2024-12-18 23:35:28,788][00179] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 2834432. Throughput: 0: 917.4. Samples: 708312. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:35:28,790][00179] Avg episode reward: [(0, '19.917')] -[2024-12-18 23:35:33,791][00179] Fps is (10 sec: 4504.2, 60 sec: 3754.5, 300 sec: 3748.8). Total num frames: 2854912. Throughput: 0: 938.5. Samples: 714930. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:35:33,797][00179] Avg episode reward: [(0, '18.836')] -[2024-12-18 23:35:37,064][02177] Updated weights for policy 0, policy_version 700 (0.0022) -[2024-12-18 23:35:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 2871296. Throughput: 0: 905.5. Samples: 717008. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:35:38,790][00179] Avg episode reward: [(0, '18.316')] -[2024-12-18 23:35:43,788][00179] Fps is (10 sec: 3687.5, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2891776. Throughput: 0: 910.8. Samples: 722524. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:35:43,789][00179] Avg episode reward: [(0, '18.098')] -[2024-12-18 23:35:46,908][02177] Updated weights for policy 0, policy_version 710 (0.0018) -[2024-12-18 23:35:48,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2916352. Throughput: 0: 997.0. Samples: 729668. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:35:48,795][00179] Avg episode reward: [(0, '17.953')] -[2024-12-18 23:35:48,805][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000712_2916352.pth... -[2024-12-18 23:35:48,928][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000490_2007040.pth -[2024-12-18 23:35:53,788][00179] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 2932736. Throughput: 0: 1003.0. Samples: 732338. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:35:53,795][00179] Avg episode reward: [(0, '18.830')] -[2024-12-18 23:35:58,264][02177] Updated weights for policy 0, policy_version 720 (0.0029) -[2024-12-18 23:35:58,788][00179] Fps is (10 sec: 3276.7, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 2949120. Throughput: 0: 956.0. Samples: 737062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:35:58,797][00179] Avg episode reward: [(0, '18.714')] -[2024-12-18 23:36:03,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3959.7, 300 sec: 3749.0). Total num frames: 2973696. Throughput: 0: 990.5. Samples: 744196. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:36:03,793][00179] Avg episode reward: [(0, '20.243')] -[2024-12-18 23:36:07,257][02177] Updated weights for policy 0, policy_version 730 (0.0017) -[2024-12-18 23:36:08,788][00179] Fps is (10 sec: 4505.7, 60 sec: 4027.7, 300 sec: 3762.8). Total num frames: 2994176. Throughput: 0: 1023.2. Samples: 747728. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:36:08,792][00179] Avg episode reward: [(0, '19.867')] -[2024-12-18 23:36:13,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 3006464. Throughput: 0: 965.6. Samples: 751764. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:36:13,790][00179] Avg episode reward: [(0, '20.103')] -[2024-12-18 23:36:18,664][02177] Updated weights for policy 0, policy_version 740 (0.0032) -[2024-12-18 23:36:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3748.9). Total num frames: 3031040. Throughput: 0: 962.5. Samples: 758238. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:36:18,790][00179] Avg episode reward: [(0, '20.175')] -[2024-12-18 23:36:23,787][00179] Fps is (10 sec: 4505.6, 60 sec: 4027.8, 300 sec: 3762.8). Total num frames: 3051520. Throughput: 0: 994.8. Samples: 761774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:36:23,793][00179] Avg episode reward: [(0, '20.087')] -[2024-12-18 23:36:28,792][00179] Fps is (10 sec: 2866.0, 60 sec: 3754.4, 300 sec: 3721.1). Total num frames: 3059712. Throughput: 0: 961.3. Samples: 765784. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:36:28,794][00179] Avg episode reward: [(0, '20.025')] -[2024-12-18 23:36:32,479][02177] Updated weights for policy 0, policy_version 750 (0.0032) -[2024-12-18 23:36:33,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3686.6, 300 sec: 3707.2). Total num frames: 3076096. Throughput: 0: 887.2. Samples: 769592. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:36:33,793][00179] Avg episode reward: [(0, '20.081')] -[2024-12-18 23:36:38,788][00179] Fps is (10 sec: 3687.9, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3096576. Throughput: 0: 906.9. Samples: 773148. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:36:38,795][00179] Avg episode reward: [(0, '20.956')] -[2024-12-18 23:36:41,595][02177] Updated weights for policy 0, policy_version 760 (0.0021) -[2024-12-18 23:36:43,788][00179] Fps is (10 sec: 4505.7, 60 sec: 3823.0, 300 sec: 3776.7). Total num frames: 3121152. Throughput: 0: 955.4. Samples: 780056. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:36:43,790][00179] Avg episode reward: [(0, '20.686')] -[2024-12-18 23:36:48,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3748.9). Total num frames: 3133440. Throughput: 0: 896.5. Samples: 784538. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:36:48,790][00179] Avg episode reward: [(0, '20.668')] -[2024-12-18 23:36:53,080][02177] Updated weights for policy 0, policy_version 770 (0.0015) -[2024-12-18 23:36:53,791][00179] Fps is (10 sec: 3275.6, 60 sec: 3686.2, 300 sec: 3734.9). Total num frames: 3153920. Throughput: 0: 879.7. Samples: 787318. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:36:53,795][00179] Avg episode reward: [(0, '20.113')] -[2024-12-18 23:36:58,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3178496. Throughput: 0: 946.8. Samples: 794372. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:36:58,792][00179] Avg episode reward: [(0, '19.152')] -[2024-12-18 23:37:03,013][02177] Updated weights for policy 0, policy_version 780 (0.0039) -[2024-12-18 23:37:03,790][00179] Fps is (10 sec: 4096.5, 60 sec: 3686.3, 300 sec: 3762.7). Total num frames: 3194880. Throughput: 0: 922.9. Samples: 799772. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:37:03,798][00179] Avg episode reward: [(0, '19.548')] -[2024-12-18 23:37:08,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3748.9). Total num frames: 3211264. Throughput: 0: 890.3. Samples: 801836. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:37:08,794][00179] Avg episode reward: [(0, '19.703')] -[2024-12-18 23:37:13,788][00179] Fps is (10 sec: 3687.3, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3231744. Throughput: 0: 942.1. Samples: 808176. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:37:13,791][00179] Avg episode reward: [(0, '19.929')] -[2024-12-18 23:37:13,801][02177] Updated weights for policy 0, policy_version 790 (0.0020) -[2024-12-18 23:37:18,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3776.6). Total num frames: 3252224. Throughput: 0: 986.6. Samples: 813988. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:37:18,790][00179] Avg episode reward: [(0, '19.952')] -[2024-12-18 23:37:23,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3735.0). Total num frames: 3264512. Throughput: 0: 944.4. Samples: 815646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:37:23,790][00179] Avg episode reward: [(0, '20.518')] -[2024-12-18 23:37:28,631][02177] Updated weights for policy 0, policy_version 800 (0.0021) -[2024-12-18 23:37:28,788][00179] Fps is (10 sec: 2457.7, 60 sec: 3618.4, 300 sec: 3707.2). Total num frames: 3276800. Throughput: 0: 866.4. Samples: 819044. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:37:28,790][00179] Avg episode reward: [(0, '20.730')] -[2024-12-18 23:37:33,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 3297280. Throughput: 0: 912.8. Samples: 825614. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:37:33,791][00179] Avg episode reward: [(0, '19.949')] -[2024-12-18 23:37:37,425][02177] Updated weights for policy 0, policy_version 810 (0.0018) -[2024-12-18 23:37:38,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 3321856. Throughput: 0: 928.4. Samples: 829094. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:37:38,795][00179] Avg episode reward: [(0, '19.051')] -[2024-12-18 23:37:43,791][00179] Fps is (10 sec: 3685.1, 60 sec: 3549.7, 300 sec: 3748.8). Total num frames: 3334144. Throughput: 0: 875.3. Samples: 833762. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:37:43,793][00179] Avg episode reward: [(0, '20.039')] -[2024-12-18 23:37:48,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 3354624. Throughput: 0: 886.7. Samples: 839672. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:37:48,792][00179] Avg episode reward: [(0, '19.192')] -[2024-12-18 23:37:48,809][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000819_3354624.pth... -[2024-12-18 23:37:48,928][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000600_2457600.pth -[2024-12-18 23:37:49,230][02177] Updated weights for policy 0, policy_version 820 (0.0017) -[2024-12-18 23:37:53,788][00179] Fps is (10 sec: 4507.1, 60 sec: 3754.9, 300 sec: 3776.7). Total num frames: 3379200. Throughput: 0: 918.0. Samples: 843146. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:37:53,793][00179] Avg episode reward: [(0, '19.733')] -[2024-12-18 23:37:58,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 3395584. Throughput: 0: 903.2. Samples: 848818. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:37:58,795][00179] Avg episode reward: [(0, '19.385')] -[2024-12-18 23:38:00,063][02177] Updated weights for policy 0, policy_version 830 (0.0015) -[2024-12-18 23:38:03,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3735.0). Total num frames: 3411968. Throughput: 0: 883.6. Samples: 853748. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:38:03,790][00179] Avg episode reward: [(0, '20.232')] -[2024-12-18 23:38:08,787][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3436544. Throughput: 0: 923.0. Samples: 857180. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:38:08,792][00179] Avg episode reward: [(0, '20.375')] -[2024-12-18 23:38:09,772][02177] Updated weights for policy 0, policy_version 840 (0.0017) -[2024-12-18 23:38:13,794][00179] Fps is (10 sec: 4093.4, 60 sec: 3686.0, 300 sec: 3762.7). Total num frames: 3452928. Throughput: 0: 993.0. Samples: 863734. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:38:13,796][00179] Avg episode reward: [(0, '20.837')] -[2024-12-18 23:38:18,788][00179] Fps is (10 sec: 3276.7, 60 sec: 3618.2, 300 sec: 3748.9). Total num frames: 3469312. Throughput: 0: 939.8. Samples: 867904. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:38:18,792][00179] Avg episode reward: [(0, '20.578')] -[2024-12-18 23:38:21,398][02177] Updated weights for policy 0, policy_version 850 (0.0023) -[2024-12-18 23:38:23,788][00179] Fps is (10 sec: 3688.7, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3489792. Throughput: 0: 935.6. Samples: 871196. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:38:23,791][00179] Avg episode reward: [(0, '21.186')] -[2024-12-18 23:38:23,795][02163] Saving new best policy, reward=21.186! -[2024-12-18 23:38:28,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3776.8). Total num frames: 3514368. Throughput: 0: 985.8. Samples: 878118. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:38:28,790][00179] Avg episode reward: [(0, '20.515')] -[2024-12-18 23:38:31,308][02177] Updated weights for policy 0, policy_version 860 (0.0020) -[2024-12-18 23:38:33,790][00179] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3748.9). Total num frames: 3526656. Throughput: 0: 956.7. Samples: 882726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:38:33,792][00179] Avg episode reward: [(0, '20.638')] -[2024-12-18 23:38:38,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3547136. Throughput: 0: 933.8. Samples: 885168. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:38:38,790][00179] Avg episode reward: [(0, '20.522')] -[2024-12-18 23:38:42,182][02177] Updated weights for policy 0, policy_version 870 (0.0022) -[2024-12-18 23:38:43,788][00179] Fps is (10 sec: 4097.0, 60 sec: 3891.4, 300 sec: 3762.8). Total num frames: 3567616. Throughput: 0: 958.7. Samples: 891958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:38:43,796][00179] Avg episode reward: [(0, '20.439')] -[2024-12-18 23:38:48,790][00179] Fps is (10 sec: 3276.0, 60 sec: 3754.5, 300 sec: 3735.0). Total num frames: 3579904. Throughput: 0: 941.2. Samples: 896102. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:38:48,792][00179] Avg episode reward: [(0, '19.348')] -[2024-12-18 23:38:53,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 3592192. Throughput: 0: 901.6. Samples: 897750. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:38:53,795][00179] Avg episode reward: [(0, '19.267')] -[2024-12-18 23:38:56,438][02177] Updated weights for policy 0, policy_version 880 (0.0026) -[2024-12-18 23:38:58,788][00179] Fps is (10 sec: 3277.6, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 3612672. Throughput: 0: 873.5. Samples: 903038. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:38:58,795][00179] Avg episode reward: [(0, '20.869')] -[2024-12-18 23:39:03,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3637248. Throughput: 0: 937.6. Samples: 910094. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-12-18 23:39:03,792][00179] Avg episode reward: [(0, '20.739')] -[2024-12-18 23:39:05,048][02177] Updated weights for policy 0, policy_version 890 (0.0024) -[2024-12-18 23:39:08,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 3653632. Throughput: 0: 925.1. Samples: 912826. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-12-18 23:39:08,790][00179] Avg episode reward: [(0, '20.915')] -[2024-12-18 23:39:13,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.5, 300 sec: 3735.0). Total num frames: 3670016. Throughput: 0: 868.3. Samples: 917192. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:39:13,794][00179] Avg episode reward: [(0, '21.836')] -[2024-12-18 23:39:13,797][02163] Saving new best policy, reward=21.836! -[2024-12-18 23:39:16,779][02177] Updated weights for policy 0, policy_version 900 (0.0024) -[2024-12-18 23:39:18,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3694592. Throughput: 0: 919.3. Samples: 924092. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:39:18,797][00179] Avg episode reward: [(0, '22.105')] -[2024-12-18 23:39:18,808][02163] Saving new best policy, reward=22.105! -[2024-12-18 23:39:23,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3715072. Throughput: 0: 939.6. Samples: 927448. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-12-18 23:39:23,792][00179] Avg episode reward: [(0, '21.016')] -[2024-12-18 23:39:28,230][02177] Updated weights for policy 0, policy_version 910 (0.0023) -[2024-12-18 23:39:28,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 3727360. Throughput: 0: 887.2. Samples: 931884. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-12-18 23:39:28,790][00179] Avg episode reward: [(0, '20.072')] -[2024-12-18 23:39:33,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3721.1). Total num frames: 3747840. Throughput: 0: 933.0. Samples: 938084. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:39:33,790][00179] Avg episode reward: [(0, '19.526')] -[2024-12-18 23:39:37,519][02177] Updated weights for policy 0, policy_version 920 (0.0023) -[2024-12-18 23:39:38,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3772416. Throughput: 0: 974.1. Samples: 941586. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:39:38,794][00179] Avg episode reward: [(0, '20.110')] -[2024-12-18 23:39:43,791][00179] Fps is (10 sec: 3275.7, 60 sec: 3549.7, 300 sec: 3707.2). Total num frames: 3780608. Throughput: 0: 955.0. Samples: 946018. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:39:43,793][00179] Avg episode reward: [(0, '19.864')] -[2024-12-18 23:39:48,788][00179] Fps is (10 sec: 2048.0, 60 sec: 3550.0, 300 sec: 3665.6). Total num frames: 3792896. Throughput: 0: 874.9. Samples: 949464. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:39:48,790][00179] Avg episode reward: [(0, '18.625')] -[2024-12-18 23:39:48,798][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000926_3792896.pth... -[2024-12-18 23:39:48,966][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000712_2916352.pth -[2024-12-18 23:39:51,959][02177] Updated weights for policy 0, policy_version 930 (0.0036) -[2024-12-18 23:39:53,788][00179] Fps is (10 sec: 3687.6, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 3817472. Throughput: 0: 882.8. Samples: 952550. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:39:53,791][00179] Avg episode reward: [(0, '19.429')] -[2024-12-18 23:39:58,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3837952. Throughput: 0: 938.8. Samples: 959438. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:39:58,790][00179] Avg episode reward: [(0, '21.696')] -[2024-12-18 23:40:01,856][02177] Updated weights for policy 0, policy_version 940 (0.0016) -[2024-12-18 23:40:03,791][00179] Fps is (10 sec: 3685.1, 60 sec: 3617.9, 300 sec: 3735.0). Total num frames: 3854336. Throughput: 0: 892.4. Samples: 964254. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:40:03,793][00179] Avg episode reward: [(0, '19.930')] -[2024-12-18 23:40:08,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 3870720. Throughput: 0: 868.7. Samples: 966540. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:40:08,791][00179] Avg episode reward: [(0, '20.390')] -[2024-12-18 23:40:12,688][02177] Updated weights for policy 0, policy_version 950 (0.0021) -[2024-12-18 23:40:13,788][00179] Fps is (10 sec: 4097.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 3895296. Throughput: 0: 921.7. Samples: 973362. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:40:13,790][00179] Avg episode reward: [(0, '20.003')] -[2024-12-18 23:40:18,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 3911680. Throughput: 0: 918.2. Samples: 979404. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:40:18,790][00179] Avg episode reward: [(0, '18.513')] -[2024-12-18 23:40:23,788][00179] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3707.2). Total num frames: 3928064. Throughput: 0: 884.5. Samples: 981388. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:40:23,796][00179] Avg episode reward: [(0, '17.805')] -[2024-12-18 23:40:24,300][02177] Updated weights for policy 0, policy_version 960 (0.0029) -[2024-12-18 23:40:28,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3721.2). Total num frames: 3952640. Throughput: 0: 922.2. Samples: 987512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:40:28,790][00179] Avg episode reward: [(0, '18.976')] -[2024-12-18 23:40:33,189][02177] Updated weights for policy 0, policy_version 970 (0.0023) -[2024-12-18 23:40:33,790][00179] Fps is (10 sec: 4504.7, 60 sec: 3754.5, 300 sec: 3735.0). Total num frames: 3973120. Throughput: 0: 996.7. Samples: 994320. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:40:33,794][00179] Avg episode reward: [(0, '21.638')] -[2024-12-18 23:40:38,789][00179] Fps is (10 sec: 3276.4, 60 sec: 3549.8, 300 sec: 3707.2). Total num frames: 3985408. Throughput: 0: 973.6. Samples: 996364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:40:38,791][00179] Avg episode reward: [(0, '22.342')] -[2024-12-18 23:40:38,857][02163] Saving new best policy, reward=22.342! -[2024-12-18 23:40:43,787][00179] Fps is (10 sec: 3277.6, 60 sec: 3754.9, 300 sec: 3693.3). Total num frames: 4005888. Throughput: 0: 931.7. Samples: 1001364. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:40:43,790][00179] Avg episode reward: [(0, '22.324')] -[2024-12-18 23:40:44,910][02177] Updated weights for policy 0, policy_version 980 (0.0023) -[2024-12-18 23:40:48,788][00179] Fps is (10 sec: 4506.2, 60 sec: 3959.5, 300 sec: 3721.1). Total num frames: 4030464. Throughput: 0: 981.0. Samples: 1008394. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:40:48,795][00179] Avg episode reward: [(0, '23.925')] -[2024-12-18 23:40:48,804][02163] Saving new best policy, reward=23.925! -[2024-12-18 23:40:53,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 4046848. Throughput: 0: 992.4. Samples: 1011196. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:40:53,794][00179] Avg episode reward: [(0, '25.279')] -[2024-12-18 23:40:53,801][02163] Saving new best policy, reward=25.279! -[2024-12-18 23:40:56,214][02177] Updated weights for policy 0, policy_version 990 (0.0032) -[2024-12-18 23:40:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 4063232. Throughput: 0: 932.4. Samples: 1015322. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:40:58,795][00179] Avg episode reward: [(0, '23.411')] -[2024-12-18 23:41:03,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3823.1, 300 sec: 3693.3). Total num frames: 4083712. Throughput: 0: 953.8. Samples: 1022324. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:41:03,794][00179] Avg episode reward: [(0, '22.475')] -[2024-12-18 23:41:05,775][02177] Updated weights for policy 0, policy_version 1000 (0.0026) -[2024-12-18 23:41:08,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3707.2). Total num frames: 4100096. Throughput: 0: 973.4. Samples: 1025190. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:41:08,790][00179] Avg episode reward: [(0, '22.496')] -[2024-12-18 23:41:13,787][00179] Fps is (10 sec: 2867.3, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 4112384. Throughput: 0: 912.7. Samples: 1028584. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:41:13,796][00179] Avg episode reward: [(0, '21.930')] -[2024-12-18 23:41:18,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 4128768. Throughput: 0: 864.4. Samples: 1033214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:41:18,794][00179] Avg episode reward: [(0, '19.762')] -[2024-12-18 23:41:20,094][02177] Updated weights for policy 0, policy_version 1010 (0.0033) -[2024-12-18 23:41:23,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 4153344. Throughput: 0: 894.3. Samples: 1036608. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:41:23,797][00179] Avg episode reward: [(0, '19.477')] -[2024-12-18 23:41:28,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 4173824. Throughput: 0: 933.2. Samples: 1043360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:41:28,794][00179] Avg episode reward: [(0, '19.747')] -[2024-12-18 23:41:30,007][02177] Updated weights for policy 0, policy_version 1020 (0.0021) -[2024-12-18 23:41:33,789][00179] Fps is (10 sec: 3276.2, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 4186112. Throughput: 0: 870.6. Samples: 1047572. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:41:33,798][00179] Avg episode reward: [(0, '19.339')] -[2024-12-18 23:41:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3693.3). Total num frames: 4210688. Throughput: 0: 881.6. Samples: 1050866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:41:38,794][00179] Avg episode reward: [(0, '19.178')] -[2024-12-18 23:41:40,414][02177] Updated weights for policy 0, policy_version 1030 (0.0027) -[2024-12-18 23:41:43,788][00179] Fps is (10 sec: 4506.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 4231168. Throughput: 0: 944.0. Samples: 1057800. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:41:43,796][00179] Avg episode reward: [(0, '21.052')] -[2024-12-18 23:41:48,789][00179] Fps is (10 sec: 3685.8, 60 sec: 3618.0, 300 sec: 3707.3). Total num frames: 4247552. Throughput: 0: 891.8. Samples: 1062456. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:41:48,792][00179] Avg episode reward: [(0, '20.437')] -[2024-12-18 23:41:48,802][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001037_4247552.pth... -[2024-12-18 23:41:49,004][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000819_3354624.pth -[2024-12-18 23:41:52,238][02177] Updated weights for policy 0, policy_version 1040 (0.0018) -[2024-12-18 23:41:53,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 4263936. Throughput: 0: 882.3. Samples: 1064894. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:41:53,790][00179] Avg episode reward: [(0, '20.793')] -[2024-12-18 23:41:58,788][00179] Fps is (10 sec: 4096.6, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 4288512. Throughput: 0: 959.8. Samples: 1071776. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:41:58,790][00179] Avg episode reward: [(0, '20.869')] -[2024-12-18 23:42:03,735][02177] Updated weights for policy 0, policy_version 1050 (0.0034) -[2024-12-18 23:42:03,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 4300800. Throughput: 0: 945.7. Samples: 1075770. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:42:03,793][00179] Avg episode reward: [(0, '20.523')] -[2024-12-18 23:42:08,788][00179] Fps is (10 sec: 2048.0, 60 sec: 3481.6, 300 sec: 3651.7). Total num frames: 4308992. Throughput: 0: 907.0. Samples: 1077422. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:42:08,794][00179] Avg episode reward: [(0, '20.277')] -[2024-12-18 23:42:13,787][00179] Fps is (10 sec: 2867.3, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 4329472. Throughput: 0: 870.2. Samples: 1082520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:42:13,790][00179] Avg episode reward: [(0, '20.016')] -[2024-12-18 23:42:15,623][02177] Updated weights for policy 0, policy_version 1060 (0.0014) -[2024-12-18 23:42:18,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 4354048. Throughput: 0: 936.0. Samples: 1089692. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:42:18,790][00179] Avg episode reward: [(0, '20.831')] -[2024-12-18 23:42:23,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 4370432. Throughput: 0: 924.1. Samples: 1092450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:42:23,792][00179] Avg episode reward: [(0, '21.656')] -[2024-12-18 23:42:27,143][02177] Updated weights for policy 0, policy_version 1070 (0.0019) -[2024-12-18 23:42:28,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 4386816. Throughput: 0: 866.1. Samples: 1096776. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:42:28,789][00179] Avg episode reward: [(0, '22.366')] -[2024-12-18 23:42:33,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.8, 300 sec: 3693.3). Total num frames: 4411392. Throughput: 0: 920.2. Samples: 1103866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:42:33,790][00179] Avg episode reward: [(0, '23.722')] -[2024-12-18 23:42:36,007][02177] Updated weights for policy 0, policy_version 1080 (0.0015) -[2024-12-18 23:42:38,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3721.2). Total num frames: 4431872. Throughput: 0: 943.2. Samples: 1107336. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:42:38,794][00179] Avg episode reward: [(0, '23.526')] -[2024-12-18 23:42:43,788][00179] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 4444160. Throughput: 0: 885.2. Samples: 1111608. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:42:43,794][00179] Avg episode reward: [(0, '23.018')] -[2024-12-18 23:42:47,606][02177] Updated weights for policy 0, policy_version 1090 (0.0015) -[2024-12-18 23:42:48,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3686.5, 300 sec: 3693.3). Total num frames: 4468736. Throughput: 0: 934.8. Samples: 1117838. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:42:48,795][00179] Avg episode reward: [(0, '23.355')] -[2024-12-18 23:42:53,788][00179] Fps is (10 sec: 4915.2, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 4493312. Throughput: 0: 976.7. Samples: 1121374. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:42:53,790][00179] Avg episode reward: [(0, '23.658')] -[2024-12-18 23:42:57,995][02177] Updated weights for policy 0, policy_version 1100 (0.0027) -[2024-12-18 23:42:58,790][00179] Fps is (10 sec: 3685.7, 60 sec: 3618.0, 300 sec: 3707.2). Total num frames: 4505600. Throughput: 0: 984.0. Samples: 1126802. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:42:58,792][00179] Avg episode reward: [(0, '24.253')] -[2024-12-18 23:43:03,789][00179] Fps is (10 sec: 3276.5, 60 sec: 3754.6, 300 sec: 3693.3). Total num frames: 4526080. Throughput: 0: 941.8. Samples: 1132072. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:43:03,791][00179] Avg episode reward: [(0, '24.143')] -[2024-12-18 23:43:07,951][02177] Updated weights for policy 0, policy_version 1110 (0.0028) -[2024-12-18 23:43:08,788][00179] Fps is (10 sec: 4096.9, 60 sec: 3959.5, 300 sec: 3707.3). Total num frames: 4546560. Throughput: 0: 959.9. Samples: 1135644. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:43:08,795][00179] Avg episode reward: [(0, '26.714')] -[2024-12-18 23:43:08,807][02163] Saving new best policy, reward=26.714! -[2024-12-18 23:43:13,788][00179] Fps is (10 sec: 4096.4, 60 sec: 3959.5, 300 sec: 3721.1). Total num frames: 4567040. Throughput: 0: 1002.8. Samples: 1141902. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:43:13,793][00179] Avg episode reward: [(0, '26.823')] -[2024-12-18 23:43:13,797][02163] Saving new best policy, reward=26.823! -[2024-12-18 23:43:18,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 4579328. Throughput: 0: 934.9. Samples: 1145934. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:43:18,789][00179] Avg episode reward: [(0, '26.806')] -[2024-12-18 23:43:19,893][02177] Updated weights for policy 0, policy_version 1120 (0.0045) -[2024-12-18 23:43:23,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3693.3). Total num frames: 4603904. Throughput: 0: 934.9. Samples: 1149408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:43:23,790][00179] Avg episode reward: [(0, '25.788')] -[2024-12-18 23:43:28,790][00179] Fps is (10 sec: 4095.2, 60 sec: 3891.1, 300 sec: 3707.2). Total num frames: 4620288. Throughput: 0: 981.2. Samples: 1155766. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:43:28,795][00179] Avg episode reward: [(0, '26.272')] -[2024-12-18 23:43:30,829][02177] Updated weights for policy 0, policy_version 1130 (0.0033) -[2024-12-18 23:43:33,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 4632576. Throughput: 0: 922.0. Samples: 1159328. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:43:33,789][00179] Avg episode reward: [(0, '25.859')] -[2024-12-18 23:43:38,788][00179] Fps is (10 sec: 2867.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 4648960. Throughput: 0: 882.7. Samples: 1161094. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:43:38,790][00179] Avg episode reward: [(0, '25.610')] -[2024-12-18 23:43:42,961][02177] Updated weights for policy 0, policy_version 1140 (0.0020) -[2024-12-18 23:43:43,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.4). Total num frames: 4669440. Throughput: 0: 904.6. Samples: 1167506. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:43:43,793][00179] Avg episode reward: [(0, '25.315')] -[2024-12-18 23:43:48,792][00179] Fps is (10 sec: 4503.6, 60 sec: 3754.4, 300 sec: 3734.9). Total num frames: 4694016. Throughput: 0: 937.6. Samples: 1174268. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:43:48,799][00179] Avg episode reward: [(0, '24.375')] -[2024-12-18 23:43:48,809][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001146_4694016.pth... -[2024-12-18 23:43:48,983][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000926_3792896.pth -[2024-12-18 23:43:53,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 4706304. Throughput: 0: 902.9. Samples: 1176274. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:43:53,792][00179] Avg episode reward: [(0, '24.404')] -[2024-12-18 23:43:54,112][02177] Updated weights for policy 0, policy_version 1150 (0.0017) -[2024-12-18 23:43:58,788][00179] Fps is (10 sec: 3278.2, 60 sec: 3686.5, 300 sec: 3693.3). Total num frames: 4726784. Throughput: 0: 884.5. Samples: 1181704. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:43:58,793][00179] Avg episode reward: [(0, '25.416')] -[2024-12-18 23:44:03,522][02177] Updated weights for policy 0, policy_version 1160 (0.0023) -[2024-12-18 23:44:03,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 4751360. Throughput: 0: 947.8. Samples: 1188586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:44:03,794][00179] Avg episode reward: [(0, '25.798')] -[2024-12-18 23:44:08,788][00179] Fps is (10 sec: 4095.8, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 4767744. Throughput: 0: 934.3. Samples: 1191454. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:44:08,790][00179] Avg episode reward: [(0, '25.705')] -[2024-12-18 23:44:13,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 4784128. Throughput: 0: 890.1. Samples: 1195818. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:44:13,794][00179] Avg episode reward: [(0, '25.018')] -[2024-12-18 23:44:15,158][02177] Updated weights for policy 0, policy_version 1170 (0.0022) -[2024-12-18 23:44:18,788][00179] Fps is (10 sec: 3686.6, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 4804608. Throughput: 0: 956.5. Samples: 1202370. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:44:18,796][00179] Avg episode reward: [(0, '25.445')] -[2024-12-18 23:44:23,791][00179] Fps is (10 sec: 3275.7, 60 sec: 3549.7, 300 sec: 3693.3). Total num frames: 4816896. Throughput: 0: 961.6. Samples: 1204370. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:44:23,793][00179] Avg episode reward: [(0, '24.630')] -[2024-12-18 23:44:28,755][02177] Updated weights for policy 0, policy_version 1180 (0.0022) -[2024-12-18 23:44:28,787][00179] Fps is (10 sec: 2867.2, 60 sec: 3550.0, 300 sec: 3679.5). Total num frames: 4833280. Throughput: 0: 900.0. Samples: 1208006. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:44:28,798][00179] Avg episode reward: [(0, '23.889')] -[2024-12-18 23:44:33,787][00179] Fps is (10 sec: 3277.9, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 4849664. Throughput: 0: 880.3. Samples: 1213876. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:44:33,794][00179] Avg episode reward: [(0, '23.094')] -[2024-12-18 23:44:38,210][02177] Updated weights for policy 0, policy_version 1190 (0.0022) -[2024-12-18 23:44:38,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 4874240. Throughput: 0: 913.0. Samples: 1217358. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:44:38,790][00179] Avg episode reward: [(0, '22.288')] -[2024-12-18 23:44:43,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 4890624. Throughput: 0: 924.4. Samples: 1223300. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:44:43,790][00179] Avg episode reward: [(0, '22.094')] -[2024-12-18 23:44:48,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3550.1, 300 sec: 3693.3). Total num frames: 4907008. Throughput: 0: 877.2. Samples: 1228060. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:44:48,790][00179] Avg episode reward: [(0, '20.656')] -[2024-12-18 23:44:49,863][02177] Updated weights for policy 0, policy_version 1200 (0.0034) -[2024-12-18 23:44:53,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 4931584. Throughput: 0: 891.5. Samples: 1231572. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:44:53,795][00179] Avg episode reward: [(0, '20.414')] -[2024-12-18 23:44:58,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3721.2). Total num frames: 4952064. Throughput: 0: 948.8. Samples: 1238516. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:44:58,794][00179] Avg episode reward: [(0, '20.614')] -[2024-12-18 23:44:59,034][02177] Updated weights for policy 0, policy_version 1210 (0.0018) -[2024-12-18 23:45:03,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 4968448. Throughput: 0: 896.3. Samples: 1242704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:45:03,790][00179] Avg episode reward: [(0, '20.103')] -[2024-12-18 23:45:08,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 4988928. Throughput: 0: 921.9. Samples: 1245852. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:45:08,790][00179] Avg episode reward: [(0, '21.326')] -[2024-12-18 23:45:10,134][02177] Updated weights for policy 0, policy_version 1220 (0.0020) -[2024-12-18 23:45:13,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 5013504. Throughput: 0: 997.2. Samples: 1252880. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:45:13,790][00179] Avg episode reward: [(0, '23.346')] -[2024-12-18 23:45:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 5025792. Throughput: 0: 974.1. Samples: 1257710. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:45:18,795][00179] Avg episode reward: [(0, '22.678')] -[2024-12-18 23:45:21,762][02177] Updated weights for policy 0, policy_version 1230 (0.0023) -[2024-12-18 23:45:23,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3823.1, 300 sec: 3707.2). Total num frames: 5046272. Throughput: 0: 946.8. Samples: 1259962. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:45:23,790][00179] Avg episode reward: [(0, '22.534')] -[2024-12-18 23:45:28,788][00179] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3721.1). Total num frames: 5070848. Throughput: 0: 971.9. Samples: 1267034. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:45:28,789][00179] Avg episode reward: [(0, '23.126')] -[2024-12-18 23:45:30,403][02177] Updated weights for policy 0, policy_version 1240 (0.0018) -[2024-12-18 23:45:33,787][00179] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3735.0). Total num frames: 5087232. Throughput: 0: 999.2. Samples: 1273024. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:45:33,795][00179] Avg episode reward: [(0, '21.520')] -[2024-12-18 23:45:38,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 5099520. Throughput: 0: 964.6. Samples: 1274980. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:45:38,790][00179] Avg episode reward: [(0, '21.905')] -[2024-12-18 23:45:42,096][02177] Updated weights for policy 0, policy_version 1250 (0.0021) -[2024-12-18 23:45:43,787][00179] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3721.1). Total num frames: 5128192. Throughput: 0: 949.2. Samples: 1281230. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:45:43,790][00179] Avg episode reward: [(0, '21.380')] -[2024-12-18 23:45:48,791][00179] Fps is (10 sec: 4504.1, 60 sec: 3959.2, 300 sec: 3721.1). Total num frames: 5144576. Throughput: 0: 981.3. Samples: 1286866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:45:48,797][00179] Avg episode reward: [(0, '21.457')] -[2024-12-18 23:45:48,807][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001256_5144576.pth... -[2024-12-18 23:45:49,005][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001037_4247552.pth -[2024-12-18 23:45:53,791][00179] Fps is (10 sec: 2456.8, 60 sec: 3686.2, 300 sec: 3693.3). Total num frames: 5152768. Throughput: 0: 947.9. Samples: 1288512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:45:53,800][00179] Avg episode reward: [(0, '21.563')] -[2024-12-18 23:45:55,730][02177] Updated weights for policy 0, policy_version 1260 (0.0043) -[2024-12-18 23:45:58,788][00179] Fps is (10 sec: 2458.4, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 5169152. Throughput: 0: 875.3. Samples: 1292268. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:45:58,789][00179] Avg episode reward: [(0, '21.389')] -[2024-12-18 23:46:03,788][00179] Fps is (10 sec: 4097.4, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 5193728. Throughput: 0: 918.9. Samples: 1299060. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:46:03,796][00179] Avg episode reward: [(0, '21.828')] -[2024-12-18 23:46:05,432][02177] Updated weights for policy 0, policy_version 1270 (0.0015) -[2024-12-18 23:46:08,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 5214208. Throughput: 0: 946.4. Samples: 1302552. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) -[2024-12-18 23:46:08,797][00179] Avg episode reward: [(0, '22.056')] -[2024-12-18 23:46:13,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 5226496. Throughput: 0: 895.8. Samples: 1307346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:46:13,792][00179] Avg episode reward: [(0, '22.535')] -[2024-12-18 23:46:17,018][02177] Updated weights for policy 0, policy_version 1280 (0.0026) -[2024-12-18 23:46:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 5251072. Throughput: 0: 894.1. Samples: 1313260. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:46:18,790][00179] Avg episode reward: [(0, '24.434')] -[2024-12-18 23:46:23,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 5271552. Throughput: 0: 928.7. Samples: 1316770. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:46:23,794][00179] Avg episode reward: [(0, '25.294')] -[2024-12-18 23:46:26,225][02177] Updated weights for policy 0, policy_version 1290 (0.0018) -[2024-12-18 23:46:28,789][00179] Fps is (10 sec: 3685.9, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 5287936. Throughput: 0: 914.7. Samples: 1322394. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:46:28,797][00179] Avg episode reward: [(0, '26.240')] -[2024-12-18 23:46:33,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 5304320. Throughput: 0: 902.3. Samples: 1327466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:46:33,797][00179] Avg episode reward: [(0, '26.874')] -[2024-12-18 23:46:33,872][02163] Saving new best policy, reward=26.874! -[2024-12-18 23:46:38,370][02177] Updated weights for policy 0, policy_version 1300 (0.0026) -[2024-12-18 23:46:38,788][00179] Fps is (10 sec: 3686.9, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 5324800. Throughput: 0: 936.2. Samples: 1330636. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:46:38,796][00179] Avg episode reward: [(0, '26.051')] -[2024-12-18 23:46:43,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3693.4). Total num frames: 5337088. Throughput: 0: 942.7. Samples: 1334690. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:46:43,793][00179] Avg episode reward: [(0, '25.366')] -[2024-12-18 23:46:48,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3413.5, 300 sec: 3679.5). Total num frames: 5349376. Throughput: 0: 881.6. Samples: 1338732. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:46:48,791][00179] Avg episode reward: [(0, '24.594')] -[2024-12-18 23:46:51,820][02177] Updated weights for policy 0, policy_version 1310 (0.0024) -[2024-12-18 23:46:53,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.6, 300 sec: 3679.5). Total num frames: 5373952. Throughput: 0: 872.0. Samples: 1341792. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:46:53,794][00179] Avg episode reward: [(0, '23.005')] -[2024-12-18 23:46:58,788][00179] Fps is (10 sec: 4915.2, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 5398528. Throughput: 0: 920.0. Samples: 1348744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:46:58,791][00179] Avg episode reward: [(0, '23.560')] -[2024-12-18 23:47:01,143][02177] Updated weights for policy 0, policy_version 1320 (0.0026) -[2024-12-18 23:47:03,789][00179] Fps is (10 sec: 3686.0, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 5410816. Throughput: 0: 901.0. Samples: 1353808. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:47:03,792][00179] Avg episode reward: [(0, '23.556')] -[2024-12-18 23:47:08,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 5431296. Throughput: 0: 870.5. Samples: 1355944. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:47:08,790][00179] Avg episode reward: [(0, '22.953')] -[2024-12-18 23:47:12,492][02177] Updated weights for policy 0, policy_version 1330 (0.0015) -[2024-12-18 23:47:13,788][00179] Fps is (10 sec: 4096.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 5451776. Throughput: 0: 894.7. Samples: 1362654. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:47:13,796][00179] Avg episode reward: [(0, '22.420')] -[2024-12-18 23:47:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 5468160. Throughput: 0: 909.1. Samples: 1368376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:47:18,790][00179] Avg episode reward: [(0, '22.405')] -[2024-12-18 23:47:23,787][00179] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 5484544. Throughput: 0: 883.9. Samples: 1370410. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:47:23,790][00179] Avg episode reward: [(0, '22.326')] -[2024-12-18 23:47:24,358][02177] Updated weights for policy 0, policy_version 1340 (0.0029) -[2024-12-18 23:47:28,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3686.5, 300 sec: 3721.1). Total num frames: 5509120. Throughput: 0: 931.8. Samples: 1376620. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:47:28,790][00179] Avg episode reward: [(0, '21.341')] -[2024-12-18 23:47:33,172][02177] Updated weights for policy 0, policy_version 1350 (0.0014) -[2024-12-18 23:47:33,788][00179] Fps is (10 sec: 4505.4, 60 sec: 3754.6, 300 sec: 3721.1). Total num frames: 5529600. Throughput: 0: 993.4. Samples: 1383434. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:47:33,796][00179] Avg episode reward: [(0, '22.072')] -[2024-12-18 23:47:38,790][00179] Fps is (10 sec: 3276.2, 60 sec: 3618.0, 300 sec: 3721.1). Total num frames: 5541888. Throughput: 0: 968.2. Samples: 1385362. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:47:38,797][00179] Avg episode reward: [(0, '21.615')] -[2024-12-18 23:47:43,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 5562368. Throughput: 0: 930.0. Samples: 1390596. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:47:43,794][00179] Avg episode reward: [(0, '22.062')] -[2024-12-18 23:47:44,936][02177] Updated weights for policy 0, policy_version 1360 (0.0013) -[2024-12-18 23:47:48,788][00179] Fps is (10 sec: 4506.5, 60 sec: 3959.5, 300 sec: 3707.2). Total num frames: 5586944. Throughput: 0: 971.1. Samples: 1397508. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:47:48,792][00179] Avg episode reward: [(0, '21.597')] -[2024-12-18 23:47:48,804][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001364_5586944.pth... -[2024-12-18 23:47:48,928][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001146_4694016.pth -[2024-12-18 23:47:53,788][00179] Fps is (10 sec: 4095.8, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 5603328. Throughput: 0: 985.3. Samples: 1400282. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:47:53,791][00179] Avg episode reward: [(0, '22.935')] -[2024-12-18 23:47:56,381][02177] Updated weights for policy 0, policy_version 1370 (0.0023) -[2024-12-18 23:47:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 5619712. Throughput: 0: 929.0. Samples: 1404460. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:47:58,790][00179] Avg episode reward: [(0, '23.270')] -[2024-12-18 23:48:03,788][00179] Fps is (10 sec: 4096.2, 60 sec: 3891.3, 300 sec: 3721.1). Total num frames: 5644288. Throughput: 0: 958.3. Samples: 1411500. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:48:03,790][00179] Avg episode reward: [(0, '23.868')] -[2024-12-18 23:48:06,196][02177] Updated weights for policy 0, policy_version 1380 (0.0023) -[2024-12-18 23:48:08,789][00179] Fps is (10 sec: 3685.8, 60 sec: 3754.6, 300 sec: 3693.3). Total num frames: 5656576. Throughput: 0: 969.6. Samples: 1414042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:48:08,795][00179] Avg episode reward: [(0, '23.091')] -[2024-12-18 23:48:13,788][00179] Fps is (10 sec: 2457.7, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 5668864. Throughput: 0: 907.4. Samples: 1417452. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:48:13,792][00179] Avg episode reward: [(0, '23.869')] -[2024-12-18 23:48:18,788][00179] Fps is (10 sec: 2867.7, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 5685248. Throughput: 0: 866.7. Samples: 1422434. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:48:18,790][00179] Avg episode reward: [(0, '24.626')] -[2024-12-18 23:48:20,001][02177] Updated weights for policy 0, policy_version 1390 (0.0019) -[2024-12-18 23:48:23,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3693.4). Total num frames: 5709824. Throughput: 0: 901.6. Samples: 1425932. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:48:23,789][00179] Avg episode reward: [(0, '23.589')] -[2024-12-18 23:48:28,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 5730304. Throughput: 0: 929.7. Samples: 1432430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:48:28,792][00179] Avg episode reward: [(0, '23.594')] -[2024-12-18 23:48:30,143][02177] Updated weights for policy 0, policy_version 1400 (0.0020) -[2024-12-18 23:48:33,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 5742592. Throughput: 0: 873.5. Samples: 1436816. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:48:33,790][00179] Avg episode reward: [(0, '23.920')] -[2024-12-18 23:48:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3721.1). Total num frames: 5767168. Throughput: 0: 889.2. Samples: 1440294. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:48:38,790][00179] Avg episode reward: [(0, '25.671')] -[2024-12-18 23:48:40,226][02177] Updated weights for policy 0, policy_version 1410 (0.0016) -[2024-12-18 23:48:43,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 5787648. Throughput: 0: 951.9. Samples: 1447294. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:48:43,795][00179] Avg episode reward: [(0, '26.214')] -[2024-12-18 23:48:48,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 5804032. Throughput: 0: 894.5. Samples: 1451752. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:48:48,790][00179] Avg episode reward: [(0, '26.237')] -[2024-12-18 23:48:51,996][02177] Updated weights for policy 0, policy_version 1420 (0.0013) -[2024-12-18 23:48:53,791][00179] Fps is (10 sec: 3275.6, 60 sec: 3618.0, 300 sec: 3707.2). Total num frames: 5820416. Throughput: 0: 894.2. Samples: 1454284. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:48:53,794][00179] Avg episode reward: [(0, '26.422')] -[2024-12-18 23:48:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 5836800. Throughput: 0: 942.2. Samples: 1459850. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:48:58,795][00179] Avg episode reward: [(0, '26.101')] -[2024-12-18 23:49:03,790][00179] Fps is (10 sec: 3277.3, 60 sec: 3481.5, 300 sec: 3679.4). Total num frames: 5853184. Throughput: 0: 920.8. Samples: 1463870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:49:03,792][00179] Avg episode reward: [(0, '25.629')] -[2024-12-18 23:49:05,043][02177] Updated weights for policy 0, policy_version 1430 (0.0029) -[2024-12-18 23:49:08,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3481.7, 300 sec: 3665.6). Total num frames: 5865472. Throughput: 0: 889.5. Samples: 1465960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:49:08,790][00179] Avg episode reward: [(0, '25.832')] -[2024-12-18 23:49:13,788][00179] Fps is (10 sec: 3687.3, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 5890048. Throughput: 0: 884.7. Samples: 1472242. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:49:13,790][00179] Avg episode reward: [(0, '24.447')] -[2024-12-18 23:49:15,105][02177] Updated weights for policy 0, policy_version 1440 (0.0017) -[2024-12-18 23:49:18,787][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 5910528. Throughput: 0: 934.9. Samples: 1478888. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:49:18,793][00179] Avg episode reward: [(0, '23.277')] -[2024-12-18 23:49:23,792][00179] Fps is (10 sec: 3275.4, 60 sec: 3549.6, 300 sec: 3693.3). Total num frames: 5922816. Throughput: 0: 901.5. Samples: 1480866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:49:23,794][00179] Avg episode reward: [(0, '22.612')] -[2024-12-18 23:49:26,962][02177] Updated weights for policy 0, policy_version 1450 (0.0024) -[2024-12-18 23:49:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 5947392. Throughput: 0: 865.6. Samples: 1486246. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:49:28,794][00179] Avg episode reward: [(0, '23.448')] -[2024-12-18 23:49:33,788][00179] Fps is (10 sec: 4507.6, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 5967872. Throughput: 0: 922.3. Samples: 1493256. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:49:33,790][00179] Avg episode reward: [(0, '23.794')] -[2024-12-18 23:49:35,931][02177] Updated weights for policy 0, policy_version 1460 (0.0018) -[2024-12-18 23:49:38,791][00179] Fps is (10 sec: 3685.2, 60 sec: 3617.9, 300 sec: 3707.2). Total num frames: 5984256. Throughput: 0: 932.4. Samples: 1496242. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) -[2024-12-18 23:49:38,794][00179] Avg episode reward: [(0, '24.558')] -[2024-12-18 23:49:43,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 6004736. Throughput: 0: 904.5. Samples: 1500552. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:49:43,794][00179] Avg episode reward: [(0, '25.305')] -[2024-12-18 23:49:47,275][02177] Updated weights for policy 0, policy_version 1470 (0.0031) -[2024-12-18 23:49:48,788][00179] Fps is (10 sec: 4097.4, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 6025216. Throughput: 0: 966.6. Samples: 1507364. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:49:48,790][00179] Avg episode reward: [(0, '26.503')] -[2024-12-18 23:49:48,805][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001471_6025216.pth... -[2024-12-18 23:49:48,933][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001256_5144576.pth -[2024-12-18 23:49:53,792][00179] Fps is (10 sec: 4094.2, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 6045696. Throughput: 0: 995.9. Samples: 1510782. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:49:53,796][00179] Avg episode reward: [(0, '27.127')] -[2024-12-18 23:49:53,802][02163] Saving new best policy, reward=27.127! -[2024-12-18 23:49:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 6057984. Throughput: 0: 955.9. Samples: 1515258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:49:58,795][00179] Avg episode reward: [(0, '25.667')] -[2024-12-18 23:49:58,934][02177] Updated weights for policy 0, policy_version 1480 (0.0015) -[2024-12-18 23:50:03,787][00179] Fps is (10 sec: 3688.0, 60 sec: 3823.1, 300 sec: 3707.2). Total num frames: 6082560. Throughput: 0: 943.6. Samples: 1521348. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:50:03,795][00179] Avg episode reward: [(0, '25.085')] -[2024-12-18 23:50:08,000][02177] Updated weights for policy 0, policy_version 1490 (0.0014) -[2024-12-18 23:50:08,789][00179] Fps is (10 sec: 4504.9, 60 sec: 3959.4, 300 sec: 3693.3). Total num frames: 6103040. Throughput: 0: 977.2. Samples: 1524836. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) -[2024-12-18 23:50:08,791][00179] Avg episode reward: [(0, '22.398')] -[2024-12-18 23:50:13,790][00179] Fps is (10 sec: 3685.6, 60 sec: 3822.8, 300 sec: 3707.2). Total num frames: 6119424. Throughput: 0: 978.4. Samples: 1530276. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:50:13,792][00179] Avg episode reward: [(0, '21.595')] -[2024-12-18 23:50:18,788][00179] Fps is (10 sec: 3277.3, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 6135808. Throughput: 0: 934.2. Samples: 1535294. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:50:18,794][00179] Avg episode reward: [(0, '20.711')] -[2024-12-18 23:50:19,765][02177] Updated weights for policy 0, policy_version 1500 (0.0021) -[2024-12-18 23:50:23,789][00179] Fps is (10 sec: 4096.3, 60 sec: 3959.7, 300 sec: 3693.3). Total num frames: 6160384. Throughput: 0: 943.7. Samples: 1538708. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:50:23,791][00179] Avg episode reward: [(0, '21.339')] -[2024-12-18 23:50:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6172672. Throughput: 0: 954.1. Samples: 1543488. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:50:28,792][00179] Avg episode reward: [(0, '20.124')] -[2024-12-18 23:50:33,230][02177] Updated weights for policy 0, policy_version 1510 (0.0021) -[2024-12-18 23:50:33,788][00179] Fps is (10 sec: 2457.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6184960. Throughput: 0: 882.0. Samples: 1547054. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:50:33,791][00179] Avg episode reward: [(0, '21.271')] -[2024-12-18 23:50:38,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.6, 300 sec: 3651.7). Total num frames: 6205440. Throughput: 0: 871.5. Samples: 1549996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:50:38,790][00179] Avg episode reward: [(0, '21.740')] -[2024-12-18 23:50:42,787][02177] Updated weights for policy 0, policy_version 1520 (0.0025) -[2024-12-18 23:50:43,788][00179] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6230016. Throughput: 0: 928.0. Samples: 1557020. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:50:43,790][00179] Avg episode reward: [(0, '22.567')] -[2024-12-18 23:50:48,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3707.3). Total num frames: 6246400. Throughput: 0: 909.5. Samples: 1562276. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:50:48,792][00179] Avg episode reward: [(0, '22.166')] -[2024-12-18 23:50:53,788][00179] Fps is (10 sec: 3276.9, 60 sec: 3618.4, 300 sec: 3707.2). Total num frames: 6262784. Throughput: 0: 879.9. Samples: 1564430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:50:53,796][00179] Avg episode reward: [(0, '22.155')] -[2024-12-18 23:50:54,326][02177] Updated weights for policy 0, policy_version 1530 (0.0016) -[2024-12-18 23:50:58,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3707.2). Total num frames: 6287360. Throughput: 0: 910.5. Samples: 1571246. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:50:58,793][00179] Avg episode reward: [(0, '20.684')] -[2024-12-18 23:51:03,677][02177] Updated weights for policy 0, policy_version 1540 (0.0015) -[2024-12-18 23:51:03,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 6307840. Throughput: 0: 936.7. Samples: 1577446. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:51:03,791][00179] Avg episode reward: [(0, '22.546')] -[2024-12-18 23:51:08,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3707.2). Total num frames: 6320128. Throughput: 0: 907.3. Samples: 1579536. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:51:08,790][00179] Avg episode reward: [(0, '23.262')] -[2024-12-18 23:51:13,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3693.3). Total num frames: 6340608. Throughput: 0: 923.8. Samples: 1585060. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:51:13,792][00179] Avg episode reward: [(0, '22.286')] -[2024-12-18 23:51:16,499][02177] Updated weights for policy 0, policy_version 1550 (0.0026) -[2024-12-18 23:51:18,789][00179] Fps is (10 sec: 3276.4, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 6352896. Throughput: 0: 940.1. Samples: 1589360. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:51:18,791][00179] Avg episode reward: [(0, '23.436')] -[2024-12-18 23:51:23,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3413.4, 300 sec: 3651.7). Total num frames: 6365184. Throughput: 0: 922.6. Samples: 1591512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:51:23,790][00179] Avg episode reward: [(0, '23.099')] -[2024-12-18 23:51:28,709][02177] Updated weights for policy 0, policy_version 1560 (0.0022) -[2024-12-18 23:51:28,788][00179] Fps is (10 sec: 3686.9, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6389760. Throughput: 0: 884.4. Samples: 1596820. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:51:28,795][00179] Avg episode reward: [(0, '25.207')] -[2024-12-18 23:51:33,787][00179] Fps is (10 sec: 4915.3, 60 sec: 3823.0, 300 sec: 3693.3). Total num frames: 6414336. Throughput: 0: 926.4. Samples: 1603964. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:51:33,790][00179] Avg episode reward: [(0, '22.965')] -[2024-12-18 23:51:38,633][02177] Updated weights for policy 0, policy_version 1570 (0.0033) -[2024-12-18 23:51:38,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3707.2). Total num frames: 6430720. Throughput: 0: 942.7. Samples: 1606854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:51:38,797][00179] Avg episode reward: [(0, '23.004')] -[2024-12-18 23:51:43,790][00179] Fps is (10 sec: 3276.0, 60 sec: 3618.0, 300 sec: 3721.1). Total num frames: 6447104. Throughput: 0: 889.2. Samples: 1611262. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:51:43,793][00179] Avg episode reward: [(0, '22.745')] -[2024-12-18 23:51:48,788][00179] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 6467584. Throughput: 0: 901.6. Samples: 1618020. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:51:48,796][00179] Avg episode reward: [(0, '22.439')] -[2024-12-18 23:51:48,807][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001579_6467584.pth... -[2024-12-18 23:51:48,928][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001364_5586944.pth -[2024-12-18 23:51:49,143][02177] Updated weights for policy 0, policy_version 1580 (0.0041) -[2024-12-18 23:51:53,790][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.5, 300 sec: 3693.3). Total num frames: 6488064. Throughput: 0: 929.5. Samples: 1621364. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:51:53,792][00179] Avg episode reward: [(0, '22.503')] -[2024-12-18 23:51:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3693.4). Total num frames: 6500352. Throughput: 0: 901.4. Samples: 1625624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:51:58,793][00179] Avg episode reward: [(0, '23.852')] -[2024-12-18 23:52:00,754][02177] Updated weights for policy 0, policy_version 1590 (0.0016) -[2024-12-18 23:52:03,788][00179] Fps is (10 sec: 3687.2, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 6524928. Throughput: 0: 947.0. Samples: 1631976. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:52:03,789][00179] Avg episode reward: [(0, '24.844')] -[2024-12-18 23:52:08,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 6545408. Throughput: 0: 973.6. Samples: 1635324. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:52:08,792][00179] Avg episode reward: [(0, '24.454')] -[2024-12-18 23:52:10,612][02177] Updated weights for policy 0, policy_version 1600 (0.0016) -[2024-12-18 23:52:13,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 6561792. Throughput: 0: 967.2. Samples: 1640342. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:52:13,789][00179] Avg episode reward: [(0, '25.397')] -[2024-12-18 23:52:18,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3707.2). Total num frames: 6578176. Throughput: 0: 929.6. Samples: 1645796. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:52:18,790][00179] Avg episode reward: [(0, '24.562')] -[2024-12-18 23:52:21,710][02177] Updated weights for policy 0, policy_version 1610 (0.0025) -[2024-12-18 23:52:23,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3707.2). Total num frames: 6602752. Throughput: 0: 940.7. Samples: 1649184. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:52:23,789][00179] Avg episode reward: [(0, '23.920')] -[2024-12-18 23:52:28,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3693.3). Total num frames: 6619136. Throughput: 0: 971.6. Samples: 1654980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:52:28,791][00179] Avg episode reward: [(0, '23.593')] -[2024-12-18 23:52:33,430][02177] Updated weights for policy 0, policy_version 1620 (0.0022) -[2024-12-18 23:52:33,788][00179] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3707.3). Total num frames: 6635520. Throughput: 0: 925.8. Samples: 1659680. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:52:33,789][00179] Avg episode reward: [(0, '24.838')] -[2024-12-18 23:52:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 6656000. Throughput: 0: 927.7. Samples: 1663108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:52:38,789][00179] Avg episode reward: [(0, '24.166')] -[2024-12-18 23:52:42,722][02177] Updated weights for policy 0, policy_version 1630 (0.0029) -[2024-12-18 23:52:43,789][00179] Fps is (10 sec: 4095.4, 60 sec: 3823.0, 300 sec: 3693.3). Total num frames: 6676480. Throughput: 0: 978.3. Samples: 1669648. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:52:43,793][00179] Avg episode reward: [(0, '23.751')] -[2024-12-18 23:52:48,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6688768. Throughput: 0: 912.4. Samples: 1673032. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:52:48,793][00179] Avg episode reward: [(0, '23.295')] -[2024-12-18 23:52:53,788][00179] Fps is (10 sec: 2457.9, 60 sec: 3550.0, 300 sec: 3665.6). Total num frames: 6701056. Throughput: 0: 875.6. Samples: 1674728. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:52:53,789][00179] Avg episode reward: [(0, '21.572')] -[2024-12-18 23:52:56,741][02177] Updated weights for policy 0, policy_version 1640 (0.0023) -[2024-12-18 23:52:58,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 6725632. Throughput: 0: 902.8. Samples: 1680966. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:52:58,789][00179] Avg episode reward: [(0, '19.506')] -[2024-12-18 23:53:03,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3693.4). Total num frames: 6746112. Throughput: 0: 918.6. Samples: 1687132. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:53:03,794][00179] Avg episode reward: [(0, '20.679')] -[2024-12-18 23:53:08,012][02177] Updated weights for policy 0, policy_version 1650 (0.0019) -[2024-12-18 23:53:08,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 6758400. Throughput: 0: 887.7. Samples: 1689132. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:53:08,796][00179] Avg episode reward: [(0, '19.363')] -[2024-12-18 23:53:13,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 6782976. Throughput: 0: 901.4. Samples: 1695542. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:53:13,789][00179] Avg episode reward: [(0, '20.546')] -[2024-12-18 23:53:16,764][02177] Updated weights for policy 0, policy_version 1660 (0.0018) -[2024-12-18 23:53:18,788][00179] Fps is (10 sec: 4915.2, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 6807552. Throughput: 0: 950.2. Samples: 1702440. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:53:18,794][00179] Avg episode reward: [(0, '22.339')] -[2024-12-18 23:53:23,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 6819840. Throughput: 0: 919.2. Samples: 1704472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:53:23,792][00179] Avg episode reward: [(0, '22.810')] -[2024-12-18 23:53:28,288][02177] Updated weights for policy 0, policy_version 1670 (0.0025) -[2024-12-18 23:53:28,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 6840320. Throughput: 0: 895.2. Samples: 1709930. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:53:28,791][00179] Avg episode reward: [(0, '21.614')] -[2024-12-18 23:53:33,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6852608. Throughput: 0: 922.8. Samples: 1714558. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:53:33,792][00179] Avg episode reward: [(0, '22.087')] -[2024-12-18 23:53:38,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 6868992. Throughput: 0: 931.7. Samples: 1716656. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:53:38,790][00179] Avg episode reward: [(0, '22.112')] -[2024-12-18 23:53:42,323][02177] Updated weights for policy 0, policy_version 1680 (0.0029) -[2024-12-18 23:53:43,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3481.7, 300 sec: 3665.6). Total num frames: 6885376. Throughput: 0: 892.8. Samples: 1721140. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:53:43,795][00179] Avg episode reward: [(0, '21.892')] -[2024-12-18 23:53:48,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3693.4). Total num frames: 6909952. Throughput: 0: 912.0. Samples: 1728172. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:53:48,790][00179] Avg episode reward: [(0, '21.565')] -[2024-12-18 23:53:48,802][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001687_6909952.pth... -[2024-12-18 23:53:48,929][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001471_6025216.pth -[2024-12-18 23:53:51,309][02177] Updated weights for policy 0, policy_version 1690 (0.0022) -[2024-12-18 23:53:53,789][00179] Fps is (10 sec: 4504.7, 60 sec: 3822.8, 300 sec: 3707.2). Total num frames: 6930432. Throughput: 0: 941.7. Samples: 1731512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:53:53,797][00179] Avg episode reward: [(0, '21.584')] -[2024-12-18 23:53:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3693.4). Total num frames: 6942720. Throughput: 0: 897.9. Samples: 1735946. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:53:58,794][00179] Avg episode reward: [(0, '22.720')] -[2024-12-18 23:54:02,720][02177] Updated weights for policy 0, policy_version 1700 (0.0024) -[2024-12-18 23:54:03,789][00179] Fps is (10 sec: 3686.6, 60 sec: 3686.3, 300 sec: 3735.0). Total num frames: 6967296. Throughput: 0: 889.9. Samples: 1742488. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:54:03,795][00179] Avg episode reward: [(0, '23.096')] -[2024-12-18 23:54:08,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 6987776. Throughput: 0: 922.3. Samples: 1745974. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:54:08,792][00179] Avg episode reward: [(0, '21.578')] -[2024-12-18 23:54:13,049][02177] Updated weights for policy 0, policy_version 1710 (0.0015) -[2024-12-18 23:54:13,788][00179] Fps is (10 sec: 3686.9, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 7004160. Throughput: 0: 918.7. Samples: 1751270. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:54:13,790][00179] Avg episode reward: [(0, '20.894')] -[2024-12-18 23:54:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3735.1). Total num frames: 7024640. Throughput: 0: 940.1. Samples: 1756862. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:54:18,794][00179] Avg episode reward: [(0, '19.497')] -[2024-12-18 23:54:23,114][02177] Updated weights for policy 0, policy_version 1720 (0.0024) -[2024-12-18 23:54:23,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 7045120. Throughput: 0: 969.3. Samples: 1760274. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:54:23,794][00179] Avg episode reward: [(0, '20.160')] -[2024-12-18 23:54:28,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 7065600. Throughput: 0: 1003.0. Samples: 1766276. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:54:28,798][00179] Avg episode reward: [(0, '20.961')] -[2024-12-18 23:54:33,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3721.2). Total num frames: 7081984. Throughput: 0: 955.1. Samples: 1771152. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:54:33,798][00179] Avg episode reward: [(0, '22.797')] -[2024-12-18 23:54:34,393][02177] Updated weights for policy 0, policy_version 1730 (0.0015) -[2024-12-18 23:54:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3721.1). Total num frames: 7102464. Throughput: 0: 960.8. Samples: 1774746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:54:38,794][00179] Avg episode reward: [(0, '24.168')] -[2024-12-18 23:54:43,457][02177] Updated weights for policy 0, policy_version 1740 (0.0019) -[2024-12-18 23:54:43,788][00179] Fps is (10 sec: 4505.5, 60 sec: 4027.7, 300 sec: 3735.0). Total num frames: 7127040. Throughput: 0: 1015.6. Samples: 1781648. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:54:43,793][00179] Avg episode reward: [(0, '25.471')] -[2024-12-18 23:54:48,790][00179] Fps is (10 sec: 3685.6, 60 sec: 3822.8, 300 sec: 3707.3). Total num frames: 7139328. Throughput: 0: 965.7. Samples: 1785944. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:54:48,791][00179] Avg episode reward: [(0, '25.114')] -[2024-12-18 23:54:53,788][00179] Fps is (10 sec: 3686.5, 60 sec: 3891.3, 300 sec: 3748.9). Total num frames: 7163904. Throughput: 0: 962.5. Samples: 1789286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:54:53,792][00179] Avg episode reward: [(0, '24.964')] -[2024-12-18 23:54:54,483][02177] Updated weights for policy 0, policy_version 1750 (0.0037) -[2024-12-18 23:54:58,788][00179] Fps is (10 sec: 4506.5, 60 sec: 4027.7, 300 sec: 3735.0). Total num frames: 7184384. Throughput: 0: 998.8. Samples: 1796218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:54:58,789][00179] Avg episode reward: [(0, '24.277')] -[2024-12-18 23:55:03,788][00179] Fps is (10 sec: 3686.3, 60 sec: 3891.3, 300 sec: 3721.1). Total num frames: 7200768. Throughput: 0: 978.3. Samples: 1800888. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:55:03,792][00179] Avg episode reward: [(0, '24.101')] -[2024-12-18 23:55:06,927][02177] Updated weights for policy 0, policy_version 1760 (0.0026) -[2024-12-18 23:55:08,788][00179] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3707.3). Total num frames: 7213056. Throughput: 0: 940.1. Samples: 1802580. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:55:08,790][00179] Avg episode reward: [(0, '24.382')] -[2024-12-18 23:55:13,788][00179] Fps is (10 sec: 2867.3, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 7229440. Throughput: 0: 915.4. Samples: 1807470. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:55:13,798][00179] Avg episode reward: [(0, '23.573')] -[2024-12-18 23:55:18,053][02177] Updated weights for policy 0, policy_version 1770 (0.0032) -[2024-12-18 23:55:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.4). Total num frames: 7249920. Throughput: 0: 941.2. Samples: 1813504. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:55:18,790][00179] Avg episode reward: [(0, '24.037')] -[2024-12-18 23:55:23,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 7262208. Throughput: 0: 907.6. Samples: 1815588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:55:23,792][00179] Avg episode reward: [(0, '23.009')] -[2024-12-18 23:55:28,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 7286784. Throughput: 0: 892.1. Samples: 1821790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:55:28,791][00179] Avg episode reward: [(0, '23.009')] -[2024-12-18 23:55:28,826][02177] Updated weights for policy 0, policy_version 1780 (0.0018) -[2024-12-18 23:55:33,790][00179] Fps is (10 sec: 4913.8, 60 sec: 3822.7, 300 sec: 3748.8). Total num frames: 7311360. Throughput: 0: 951.6. Samples: 1828768. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:55:33,796][00179] Avg episode reward: [(0, '21.842')] -[2024-12-18 23:55:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 7323648. Throughput: 0: 924.8. Samples: 1830902. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:55:38,790][00179] Avg episode reward: [(0, '22.090')] -[2024-12-18 23:55:40,395][02177] Updated weights for policy 0, policy_version 1790 (0.0027) -[2024-12-18 23:55:43,788][00179] Fps is (10 sec: 3277.8, 60 sec: 3618.2, 300 sec: 3721.1). Total num frames: 7344128. Throughput: 0: 885.1. Samples: 1836046. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:55:43,791][00179] Avg episode reward: [(0, '22.317')] -[2024-12-18 23:55:48,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.8, 300 sec: 3735.0). Total num frames: 7364608. Throughput: 0: 921.3. Samples: 1842346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:55:48,790][00179] Avg episode reward: [(0, '23.443')] -[2024-12-18 23:55:48,809][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001798_7364608.pth... -[2024-12-18 23:55:48,967][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001579_6467584.pth -[2024-12-18 23:55:50,963][02177] Updated weights for policy 0, policy_version 1800 (0.0018) -[2024-12-18 23:55:53,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 7376896. Throughput: 0: 921.4. Samples: 1844042. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:55:53,790][00179] Avg episode reward: [(0, '24.717')] -[2024-12-18 23:55:58,788][00179] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3665.6). Total num frames: 7389184. Throughput: 0: 887.7. Samples: 1847416. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:55:58,791][00179] Avg episode reward: [(0, '24.459')] -[2024-12-18 23:56:03,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3693.3). Total num frames: 7409664. Throughput: 0: 889.0. Samples: 1853510. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:56:03,790][00179] Avg episode reward: [(0, '24.354')] -[2024-12-18 23:56:04,125][02177] Updated weights for policy 0, policy_version 1810 (0.0020) -[2024-12-18 23:56:08,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 7434240. Throughput: 0: 917.3. Samples: 1856866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:56:08,790][00179] Avg episode reward: [(0, '25.639')] -[2024-12-18 23:56:13,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 7446528. Throughput: 0: 895.2. Samples: 1862074. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:56:13,789][00179] Avg episode reward: [(0, '26.446')] -[2024-12-18 23:56:15,675][02177] Updated weights for policy 0, policy_version 1820 (0.0026) -[2024-12-18 23:56:18,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 7467008. Throughput: 0: 862.9. Samples: 1867598. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:56:18,796][00179] Avg episode reward: [(0, '26.138')] -[2024-12-18 23:56:23,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 7487488. Throughput: 0: 891.6. Samples: 1871022. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:56:23,791][00179] Avg episode reward: [(0, '25.303')] -[2024-12-18 23:56:24,813][02177] Updated weights for policy 0, policy_version 1830 (0.0034) -[2024-12-18 23:56:28,788][00179] Fps is (10 sec: 4095.7, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 7507968. Throughput: 0: 910.7. Samples: 1877028. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:56:28,791][00179] Avg episode reward: [(0, '26.031')] -[2024-12-18 23:56:33,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3550.0, 300 sec: 3707.2). Total num frames: 7524352. Throughput: 0: 880.0. Samples: 1881946. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:56:33,790][00179] Avg episode reward: [(0, '26.060')] -[2024-12-18 23:56:36,112][02177] Updated weights for policy 0, policy_version 1840 (0.0038) -[2024-12-18 23:56:38,788][00179] Fps is (10 sec: 4096.3, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 7548928. Throughput: 0: 920.0. Samples: 1885442. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:56:38,790][00179] Avg episode reward: [(0, '24.366')] -[2024-12-18 23:56:43,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 7569408. Throughput: 0: 999.6. Samples: 1892396. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:56:43,790][00179] Avg episode reward: [(0, '23.567')] -[2024-12-18 23:56:46,377][02177] Updated weights for policy 0, policy_version 1850 (0.0015) -[2024-12-18 23:56:48,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3707.3). Total num frames: 7581696. Throughput: 0: 956.8. Samples: 1896564. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-12-18 23:56:48,793][00179] Avg episode reward: [(0, '23.997')] -[2024-12-18 23:56:53,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 7606272. Throughput: 0: 957.5. Samples: 1899954. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:56:53,790][00179] Avg episode reward: [(0, '22.810')] -[2024-12-18 23:56:56,212][02177] Updated weights for policy 0, policy_version 1860 (0.0026) -[2024-12-18 23:56:58,788][00179] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3735.0). Total num frames: 7626752. Throughput: 0: 997.2. Samples: 1906948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:56:58,798][00179] Avg episode reward: [(0, '23.236')] -[2024-12-18 23:57:03,787][00179] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3721.1). Total num frames: 7643136. Throughput: 0: 979.8. Samples: 1911688. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:57:03,790][00179] Avg episode reward: [(0, '24.024')] -[2024-12-18 23:57:07,576][02177] Updated weights for policy 0, policy_version 1870 (0.0020) -[2024-12-18 23:57:08,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 7663616. Throughput: 0: 962.4. Samples: 1914332. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:57:08,795][00179] Avg episode reward: [(0, '24.740')] -[2024-12-18 23:57:13,788][00179] Fps is (10 sec: 4095.9, 60 sec: 3959.4, 300 sec: 3748.9). Total num frames: 7684096. Throughput: 0: 979.9. Samples: 1921124. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:57:13,795][00179] Avg episode reward: [(0, '24.245')] -[2024-12-18 23:57:17,489][02177] Updated weights for policy 0, policy_version 1880 (0.0028) -[2024-12-18 23:57:18,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3721.1). Total num frames: 7700480. Throughput: 0: 988.8. Samples: 1926442. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:57:18,789][00179] Avg episode reward: [(0, '24.059')] -[2024-12-18 23:57:23,788][00179] Fps is (10 sec: 3276.9, 60 sec: 3822.9, 300 sec: 3721.1). Total num frames: 7716864. Throughput: 0: 956.4. Samples: 1928478. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:57:23,790][00179] Avg episode reward: [(0, '24.926')] -[2024-12-18 23:57:28,789][00179] Fps is (10 sec: 2866.8, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 7729152. Throughput: 0: 899.8. Samples: 1932890. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:57:28,794][00179] Avg episode reward: [(0, '25.324')] -[2024-12-18 23:57:31,153][02177] Updated weights for policy 0, policy_version 1890 (0.0028) -[2024-12-18 23:57:33,787][00179] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 7749632. Throughput: 0: 929.8. Samples: 1938406. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:57:33,791][00179] Avg episode reward: [(0, '25.052')] -[2024-12-18 23:57:38,792][00179] Fps is (10 sec: 3275.8, 60 sec: 3549.6, 300 sec: 3679.4). Total num frames: 7761920. Throughput: 0: 901.2. Samples: 1940512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:57:38,794][00179] Avg episode reward: [(0, '26.068')] -[2024-12-18 23:57:42,579][02177] Updated weights for policy 0, policy_version 1900 (0.0021) -[2024-12-18 23:57:43,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 7786496. Throughput: 0: 874.0. Samples: 1946276. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-12-18 23:57:43,790][00179] Avg episode reward: [(0, '26.008')] -[2024-12-18 23:57:48,788][00179] Fps is (10 sec: 4507.5, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 7806976. Throughput: 0: 917.0. Samples: 1952952. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:57:48,790][00179] Avg episode reward: [(0, '26.541')] -[2024-12-18 23:57:48,797][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001906_7806976.pth... -[2024-12-18 23:57:48,921][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001687_6909952.pth -[2024-12-18 23:57:53,487][02177] Updated weights for policy 0, policy_version 1910 (0.0021) -[2024-12-18 23:57:53,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 7823360. Throughput: 0: 907.2. Samples: 1955154. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:57:53,796][00179] Avg episode reward: [(0, '27.826')] -[2024-12-18 23:57:53,804][02163] Saving new best policy, reward=27.826! -[2024-12-18 23:57:58,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 7839744. Throughput: 0: 859.0. Samples: 1959778. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:57:58,796][00179] Avg episode reward: [(0, '26.178')] -[2024-12-18 23:58:03,534][02177] Updated weights for policy 0, policy_version 1920 (0.0018) -[2024-12-18 23:58:03,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 7864320. Throughput: 0: 893.4. Samples: 1966646. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:58:03,796][00179] Avg episode reward: [(0, '24.266')] -[2024-12-18 23:58:08,788][00179] Fps is (10 sec: 4095.8, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 7880704. Throughput: 0: 916.3. Samples: 1969710. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:58:08,797][00179] Avg episode reward: [(0, '24.854')] -[2024-12-18 23:58:13,789][00179] Fps is (10 sec: 2457.3, 60 sec: 3413.3, 300 sec: 3665.6). Total num frames: 7888896. Throughput: 0: 890.4. Samples: 1972956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:58:13,791][00179] Avg episode reward: [(0, '24.007')] -[2024-12-18 23:58:18,073][02177] Updated weights for policy 0, policy_version 1930 (0.0013) -[2024-12-18 23:58:18,788][00179] Fps is (10 sec: 2457.7, 60 sec: 3413.3, 300 sec: 3679.5). Total num frames: 7905280. Throughput: 0: 869.6. Samples: 1977540. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-12-18 23:58:18,790][00179] Avg episode reward: [(0, '24.290')] -[2024-12-18 23:58:23,788][00179] Fps is (10 sec: 4096.6, 60 sec: 3549.9, 300 sec: 3693.3). Total num frames: 7929856. Throughput: 0: 897.7. Samples: 1980906. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) -[2024-12-18 23:58:23,794][00179] Avg episode reward: [(0, '22.590')] -[2024-12-18 23:58:27,515][02177] Updated weights for policy 0, policy_version 1940 (0.0014) -[2024-12-18 23:58:28,788][00179] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3707.2). Total num frames: 7946240. Throughput: 0: 908.4. Samples: 1987156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:58:28,790][00179] Avg episode reward: [(0, '23.169')] -[2024-12-18 23:58:33,788][00179] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 7962624. Throughput: 0: 853.9. Samples: 1991378. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-12-18 23:58:33,796][00179] Avg episode reward: [(0, '24.641')] -[2024-12-18 23:58:38,788][00179] Fps is (10 sec: 3686.4, 60 sec: 3686.7, 300 sec: 3721.1). Total num frames: 7983104. Throughput: 0: 878.6. Samples: 1994690. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-12-18 23:58:38,792][00179] Avg episode reward: [(0, '23.077')] -[2024-12-18 23:58:39,089][02177] Updated weights for policy 0, policy_version 1950 (0.0044) -[2024-12-18 23:58:43,789][00179] Fps is (10 sec: 4095.5, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 8003584. Throughput: 0: 924.2. Samples: 2001368. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-12-18 23:58:43,791][00179] Avg episode reward: [(0, '23.453')] -[2024-12-18 23:58:43,964][02163] Stopping Batcher_0... -[2024-12-18 23:58:43,965][02163] Loop batcher_evt_loop terminating... -[2024-12-18 23:58:43,965][00179] Component Batcher_0 stopped! -[2024-12-18 23:58:43,970][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... -[2024-12-18 23:58:44,067][02177] Weights refcount: 2 0 -[2024-12-18 23:58:44,086][00179] Component InferenceWorker_p0-w0 stopped! -[2024-12-18 23:58:44,091][02177] Stopping InferenceWorker_p0-w0... -[2024-12-18 23:58:44,092][02177] Loop inference_proc0-0_evt_loop terminating... -[2024-12-18 23:58:44,169][02163] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001798_7364608.pth -[2024-12-18 23:58:44,192][02163] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... -[2024-12-18 23:58:44,386][02163] Stopping LearnerWorker_p0... -[2024-12-18 23:58:44,386][02163] Loop learner_proc0_evt_loop terminating... -[2024-12-18 23:58:44,390][00179] Component LearnerWorker_p0 stopped! -[2024-12-18 23:58:44,620][02181] Stopping RolloutWorker_w5... -[2024-12-18 23:58:44,622][02181] Loop rollout_proc5_evt_loop terminating... -[2024-12-18 23:58:44,624][00179] Component RolloutWorker_w5 stopped! -[2024-12-18 23:58:44,646][02178] Stopping RolloutWorker_w1... -[2024-12-18 23:58:44,647][02178] Loop rollout_proc1_evt_loop terminating... -[2024-12-18 23:58:44,647][00179] Component RolloutWorker_w1 stopped! -[2024-12-18 23:58:44,672][02184] Stopping RolloutWorker_w7... -[2024-12-18 23:58:44,672][00179] Component RolloutWorker_w7 stopped! -[2024-12-18 23:58:44,672][02184] Loop rollout_proc7_evt_loop terminating... -[2024-12-18 23:58:44,704][02180] Stopping RolloutWorker_w3... -[2024-12-18 23:58:44,704][00179] Component RolloutWorker_w3 stopped! -[2024-12-18 23:58:44,723][02180] Loop rollout_proc3_evt_loop terminating... -[2024-12-18 23:58:44,758][00179] Component RolloutWorker_w0 stopped! -[2024-12-18 23:58:44,767][00179] Component RolloutWorker_w4 stopped! -[2024-12-18 23:58:44,770][02182] Stopping RolloutWorker_w4... -[2024-12-18 23:58:44,770][02182] Loop rollout_proc4_evt_loop terminating... -[2024-12-18 23:58:44,761][02176] Stopping RolloutWorker_w0... -[2024-12-18 23:58:44,784][02176] Loop rollout_proc0_evt_loop terminating... -[2024-12-18 23:58:44,803][00179] Component RolloutWorker_w6 stopped! -[2024-12-18 23:58:44,805][02183] Stopping RolloutWorker_w6... -[2024-12-18 23:58:44,817][00179] Component RolloutWorker_w2 stopped! -[2024-12-18 23:58:44,824][02179] Stopping RolloutWorker_w2... -[2024-12-18 23:58:44,824][02179] Loop rollout_proc2_evt_loop terminating... -[2024-12-18 23:58:44,805][02183] Loop rollout_proc6_evt_loop terminating... -[2024-12-18 23:58:44,819][00179] Waiting for process learner_proc0 to stop... -[2024-12-18 23:58:46,819][00179] Waiting for process inference_proc0-0 to join... -[2024-12-18 23:58:46,828][00179] Waiting for process rollout_proc0 to join... -[2024-12-18 23:58:49,437][00179] Waiting for process rollout_proc1 to join... -[2024-12-18 23:58:49,440][00179] Waiting for process rollout_proc2 to join... -[2024-12-18 23:58:49,450][00179] Waiting for process rollout_proc3 to join... -[2024-12-18 23:58:49,453][00179] Waiting for process rollout_proc4 to join... -[2024-12-18 23:58:49,458][00179] Waiting for process rollout_proc5 to join... -[2024-12-18 23:58:49,462][00179] Waiting for process rollout_proc6 to join... -[2024-12-18 23:58:49,466][00179] Waiting for process rollout_proc7 to join... -[2024-12-18 23:58:49,470][00179] Batcher 0 profile tree view: -batching: 54.6683, releasing_batches: 0.0591 -[2024-12-18 23:58:49,474][00179] InferenceWorker_p0-w0 profile tree view: +[2024-12-19 09:58:14,092][07448] Using optimizer +[2024-12-19 09:58:16,370][07135] Heartbeat connected on Batcher_0 +[2024-12-19 09:58:16,383][07135] Heartbeat connected on InferenceWorker_p0-w0 +[2024-12-19 09:58:16,395][07135] Heartbeat connected on RolloutWorker_w0 +[2024-12-19 09:58:16,404][07135] Heartbeat connected on RolloutWorker_w1 +[2024-12-19 09:58:16,408][07135] Heartbeat connected on RolloutWorker_w2 +[2024-12-19 09:58:16,430][07135] Heartbeat connected on RolloutWorker_w3 +[2024-12-19 09:58:16,452][07135] Heartbeat connected on RolloutWorker_w4 +[2024-12-19 09:58:16,457][07135] Heartbeat connected on RolloutWorker_w5 +[2024-12-19 09:58:16,469][07135] Heartbeat connected on RolloutWorker_w6 +[2024-12-19 09:58:16,471][07135] Heartbeat connected on RolloutWorker_w7 +[2024-12-19 09:58:18,165][07448] No checkpoints found +[2024-12-19 09:58:18,165][07448] Did not load from checkpoint, starting from scratch! +[2024-12-19 09:58:18,165][07448] Initialized policy 0 weights for model version 0 +[2024-12-19 09:58:18,169][07448] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 09:58:18,176][07448] LearnerWorker_p0 finished initialization! +[2024-12-19 09:58:18,177][07135] Heartbeat connected on LearnerWorker_p0 +[2024-12-19 09:58:18,280][07462] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 09:58:18,282][07462] RunningMeanStd input shape: (1,) +[2024-12-19 09:58:18,294][07462] ConvEncoder: input_channels=3 +[2024-12-19 09:58:18,401][07462] Conv encoder output size: 512 +[2024-12-19 09:58:18,401][07462] Policy head output size: 512 +[2024-12-19 09:58:18,454][07135] Inference worker 0-0 is ready! +[2024-12-19 09:58:18,456][07135] All inference workers are ready! Signal rollout workers to start! +[2024-12-19 09:58:18,683][07469] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:18,689][07466] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:18,687][07467] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:18,686][07468] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:18,686][07463] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:18,694][07464] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:18,689][07461] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:18,691][07465] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 09:58:20,070][07469] Decorrelating experience for 0 frames... +[2024-12-19 09:58:20,068][07463] Decorrelating experience for 0 frames... +[2024-12-19 09:58:20,070][07468] Decorrelating experience for 0 frames... +[2024-12-19 09:58:20,071][07467] Decorrelating experience for 0 frames... +[2024-12-19 09:58:20,075][07464] Decorrelating experience for 0 frames... +[2024-12-19 09:58:20,079][07465] Decorrelating experience for 0 frames... +[2024-12-19 09:58:20,804][07469] Decorrelating experience for 32 frames... +[2024-12-19 09:58:20,877][07466] Decorrelating experience for 0 frames... +[2024-12-19 09:58:21,210][07465] Decorrelating experience for 32 frames... +[2024-12-19 09:58:21,213][07464] Decorrelating experience for 32 frames... +[2024-12-19 09:58:21,234][07461] Decorrelating experience for 0 frames... +[2024-12-19 09:58:21,685][07135] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-19 09:58:21,814][07467] Decorrelating experience for 32 frames... +[2024-12-19 09:58:21,834][07466] Decorrelating experience for 32 frames... +[2024-12-19 09:58:23,097][07461] Decorrelating experience for 32 frames... +[2024-12-19 09:58:23,290][07463] Decorrelating experience for 32 frames... +[2024-12-19 09:58:23,499][07464] Decorrelating experience for 64 frames... +[2024-12-19 09:58:23,501][07465] Decorrelating experience for 64 frames... +[2024-12-19 09:58:23,507][07467] Decorrelating experience for 64 frames... +[2024-12-19 09:58:23,745][07468] Decorrelating experience for 32 frames... +[2024-12-19 09:58:24,572][07461] Decorrelating experience for 64 frames... +[2024-12-19 09:58:24,640][07463] Decorrelating experience for 64 frames... +[2024-12-19 09:58:24,655][07465] Decorrelating experience for 96 frames... +[2024-12-19 09:58:24,717][07467] Decorrelating experience for 96 frames... +[2024-12-19 09:58:24,991][07466] Decorrelating experience for 64 frames... +[2024-12-19 09:58:25,871][07464] Decorrelating experience for 96 frames... +[2024-12-19 09:58:25,928][07461] Decorrelating experience for 96 frames... +[2024-12-19 09:58:26,074][07463] Decorrelating experience for 96 frames... +[2024-12-19 09:58:26,379][07469] Decorrelating experience for 64 frames... +[2024-12-19 09:58:26,463][07466] Decorrelating experience for 96 frames... +[2024-12-19 09:58:26,627][07468] Decorrelating experience for 64 frames... +[2024-12-19 09:58:26,685][07135] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-19 09:58:28,821][07469] Decorrelating experience for 96 frames... +[2024-12-19 09:58:29,856][07468] Decorrelating experience for 96 frames... +[2024-12-19 09:58:31,686][07135] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 183.4. Samples: 1834. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-19 09:58:31,688][07135] Avg episode reward: [(0, '2.148')] +[2024-12-19 09:58:32,028][07448] Signal inference workers to stop experience collection... +[2024-12-19 09:58:32,049][07462] InferenceWorker_p0-w0: stopping experience collection +[2024-12-19 09:58:34,989][07448] Signal inference workers to resume experience collection... +[2024-12-19 09:58:34,990][07462] InferenceWorker_p0-w0: resuming experience collection +[2024-12-19 09:58:36,685][07135] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 12288. Throughput: 0: 253.7. Samples: 3806. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-12-19 09:58:36,691][07135] Avg episode reward: [(0, '3.045')] +[2024-12-19 09:58:41,685][07135] Fps is (10 sec: 3686.7, 60 sec: 1843.2, 300 sec: 1843.2). Total num frames: 36864. Throughput: 0: 365.8. Samples: 7316. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-19 09:58:41,691][07135] Avg episode reward: [(0, '3.804')] +[2024-12-19 09:58:42,478][07462] Updated weights for policy 0, policy_version 10 (0.0021) +[2024-12-19 09:58:46,688][07135] Fps is (10 sec: 4095.0, 60 sec: 2129.7, 300 sec: 2129.7). Total num frames: 53248. Throughput: 0: 523.1. Samples: 13078. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 09:58:46,692][07135] Avg episode reward: [(0, '4.269')] +[2024-12-19 09:58:51,685][07135] Fps is (10 sec: 3276.8, 60 sec: 2321.1, 300 sec: 2321.1). Total num frames: 69632. Throughput: 0: 602.2. Samples: 18066. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 09:58:51,691][07135] Avg episode reward: [(0, '4.397')] +[2024-12-19 09:58:54,303][07462] Updated weights for policy 0, policy_version 20 (0.0013) +[2024-12-19 09:58:56,685][07135] Fps is (10 sec: 3687.3, 60 sec: 2574.6, 300 sec: 2574.6). Total num frames: 90112. Throughput: 0: 603.5. Samples: 21122. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 09:58:56,688][07135] Avg episode reward: [(0, '4.604')] +[2024-12-19 09:59:01,688][07135] Fps is (10 sec: 4094.6, 60 sec: 2764.6, 300 sec: 2764.6). Total num frames: 110592. Throughput: 0: 698.3. Samples: 27934. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 09:59:01,692][07135] Avg episode reward: [(0, '4.441')] +[2024-12-19 09:59:01,698][07448] Saving new best policy, reward=4.441! +[2024-12-19 09:59:05,315][07462] Updated weights for policy 0, policy_version 30 (0.0025) +[2024-12-19 09:59:06,685][07135] Fps is (10 sec: 3276.8, 60 sec: 2730.7, 300 sec: 2730.7). Total num frames: 122880. Throughput: 0: 716.4. Samples: 32240. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 09:59:06,692][07135] Avg episode reward: [(0, '4.578')] +[2024-12-19 09:59:06,721][07448] Saving new best policy, reward=4.578! +[2024-12-19 09:59:11,685][07135] Fps is (10 sec: 3277.8, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 143360. Throughput: 0: 788.5. Samples: 35482. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 09:59:11,692][07135] Avg episode reward: [(0, '4.483')] +[2024-12-19 09:59:15,242][07462] Updated weights for policy 0, policy_version 40 (0.0024) +[2024-12-19 09:59:16,685][07135] Fps is (10 sec: 4505.6, 60 sec: 3053.4, 300 sec: 3053.4). Total num frames: 167936. Throughput: 0: 888.5. Samples: 41814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 09:59:16,687][07135] Avg episode reward: [(0, '4.546')] +[2024-12-19 09:59:21,691][07135] Fps is (10 sec: 4093.6, 60 sec: 3071.7, 300 sec: 3071.7). Total num frames: 184320. Throughput: 0: 951.3. Samples: 46622. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 09:59:21,694][07135] Avg episode reward: [(0, '4.424')] +[2024-12-19 09:59:26,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3087.7). Total num frames: 200704. Throughput: 0: 921.9. Samples: 48802. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 09:59:26,687][07135] Avg episode reward: [(0, '4.420')] +[2024-12-19 09:59:27,183][07462] Updated weights for policy 0, policy_version 50 (0.0018) +[2024-12-19 09:59:31,685][07135] Fps is (10 sec: 4098.5, 60 sec: 3754.7, 300 sec: 3218.3). Total num frames: 225280. Throughput: 0: 948.6. Samples: 55762. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 09:59:31,687][07135] Avg episode reward: [(0, '4.345')] +[2024-12-19 09:59:36,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3222.2). Total num frames: 241664. Throughput: 0: 970.5. Samples: 61738. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 09:59:36,693][07135] Avg episode reward: [(0, '4.310')] +[2024-12-19 09:59:36,950][07462] Updated weights for policy 0, policy_version 60 (0.0028) +[2024-12-19 09:59:41,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3225.6). Total num frames: 258048. Throughput: 0: 950.1. Samples: 63876. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 09:59:41,687][07135] Avg episode reward: [(0, '4.286')] +[2024-12-19 09:59:46,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3823.1, 300 sec: 3325.0). Total num frames: 282624. Throughput: 0: 939.5. Samples: 70210. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 09:59:46,687][07135] Avg episode reward: [(0, '4.347')] +[2024-12-19 09:59:46,696][07448] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000069_282624.pth... +[2024-12-19 09:59:47,264][07462] Updated weights for policy 0, policy_version 70 (0.0026) +[2024-12-19 09:59:51,685][07135] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3367.8). Total num frames: 303104. Throughput: 0: 994.9. Samples: 77010. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 09:59:51,690][07135] Avg episode reward: [(0, '4.303')] +[2024-12-19 09:59:56,687][07135] Fps is (10 sec: 3685.7, 60 sec: 3822.8, 300 sec: 3363.0). Total num frames: 319488. Throughput: 0: 971.1. Samples: 79184. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 09:59:56,689][07135] Avg episode reward: [(0, '4.353')] +[2024-12-19 09:59:58,693][07462] Updated weights for policy 0, policy_version 80 (0.0033) +[2024-12-19 10:00:01,687][07135] Fps is (10 sec: 3685.5, 60 sec: 3823.0, 300 sec: 3399.6). Total num frames: 339968. Throughput: 0: 947.8. Samples: 84466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:00:01,694][07135] Avg episode reward: [(0, '4.324')] +[2024-12-19 10:00:06,685][07135] Fps is (10 sec: 4506.4, 60 sec: 4027.7, 300 sec: 3471.8). Total num frames: 364544. Throughput: 0: 999.9. Samples: 91612. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:00:06,687][07135] Avg episode reward: [(0, '4.350')] +[2024-12-19 10:00:07,362][07462] Updated weights for policy 0, policy_version 90 (0.0016) +[2024-12-19 10:00:11,685][07135] Fps is (10 sec: 4096.9, 60 sec: 3959.5, 300 sec: 3463.0). Total num frames: 380928. Throughput: 0: 1019.3. Samples: 94670. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:00:11,690][07135] Avg episode reward: [(0, '4.351')] +[2024-12-19 10:00:16,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3454.9). Total num frames: 397312. Throughput: 0: 960.7. Samples: 98992. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:00:16,687][07135] Avg episode reward: [(0, '4.362')] +[2024-12-19 10:00:18,904][07462] Updated weights for policy 0, policy_version 100 (0.0017) +[2024-12-19 10:00:21,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.9, 300 sec: 3515.7). Total num frames: 421888. Throughput: 0: 987.0. Samples: 106154. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-19 10:00:21,687][07135] Avg episode reward: [(0, '4.453')] +[2024-12-19 10:00:26,685][07135] Fps is (10 sec: 4505.7, 60 sec: 4027.7, 300 sec: 3538.9). Total num frames: 442368. Throughput: 0: 1016.4. Samples: 109616. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:00:26,691][07135] Avg episode reward: [(0, '4.478')] +[2024-12-19 10:00:29,128][07462] Updated weights for policy 0, policy_version 110 (0.0021) +[2024-12-19 10:00:31,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3497.4). Total num frames: 454656. Throughput: 0: 978.0. Samples: 114218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:00:31,687][07135] Avg episode reward: [(0, '4.319')] +[2024-12-19 10:00:36,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3549.9). Total num frames: 479232. Throughput: 0: 964.9. Samples: 120430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:00:36,691][07135] Avg episode reward: [(0, '4.476')] +[2024-12-19 10:00:39,001][07462] Updated weights for policy 0, policy_version 120 (0.0019) +[2024-12-19 10:00:41,685][07135] Fps is (10 sec: 4915.2, 60 sec: 4096.0, 300 sec: 3598.6). Total num frames: 503808. Throughput: 0: 995.6. Samples: 123984. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:00:41,688][07135] Avg episode reward: [(0, '4.547')] +[2024-12-19 10:00:46,686][07135] Fps is (10 sec: 3686.1, 60 sec: 3891.1, 300 sec: 3559.3). Total num frames: 516096. Throughput: 0: 1002.2. Samples: 129562. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:00:46,691][07135] Avg episode reward: [(0, '4.508')] +[2024-12-19 10:00:50,351][07462] Updated weights for policy 0, policy_version 130 (0.0024) +[2024-12-19 10:00:51,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3577.2). Total num frames: 536576. Throughput: 0: 962.5. Samples: 134926. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:00:51,688][07135] Avg episode reward: [(0, '4.695')] +[2024-12-19 10:00:51,691][07448] Saving new best policy, reward=4.695! +[2024-12-19 10:00:56,685][07135] Fps is (10 sec: 4506.0, 60 sec: 4027.9, 300 sec: 3620.3). Total num frames: 561152. Throughput: 0: 971.6. Samples: 138390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:00:56,688][07135] Avg episode reward: [(0, '4.655')] +[2024-12-19 10:00:59,347][07462] Updated weights for policy 0, policy_version 140 (0.0025) +[2024-12-19 10:01:01,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3609.6). Total num frames: 577536. Throughput: 0: 1016.5. Samples: 144734. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:01:01,692][07135] Avg episode reward: [(0, '4.454')] +[2024-12-19 10:01:06,687][07135] Fps is (10 sec: 3276.0, 60 sec: 3822.8, 300 sec: 3599.5). Total num frames: 593920. Throughput: 0: 954.1. Samples: 149090. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:01:06,692][07135] Avg episode reward: [(0, '4.366')] +[2024-12-19 10:01:10,917][07462] Updated weights for policy 0, policy_version 150 (0.0024) +[2024-12-19 10:01:11,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3614.1). Total num frames: 614400. Throughput: 0: 954.3. Samples: 152560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:01:11,690][07135] Avg episode reward: [(0, '4.234')] +[2024-12-19 10:01:16,685][07135] Fps is (10 sec: 3687.2, 60 sec: 3891.2, 300 sec: 3604.5). Total num frames: 630784. Throughput: 0: 970.4. Samples: 157888. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:01:16,691][07135] Avg episode reward: [(0, '4.409')] +[2024-12-19 10:01:21,685][07135] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3572.6). Total num frames: 643072. Throughput: 0: 914.6. Samples: 161588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:01:21,692][07135] Avg episode reward: [(0, '4.530')] +[2024-12-19 10:01:25,426][07462] Updated weights for policy 0, policy_version 160 (0.0034) +[2024-12-19 10:01:26,685][07135] Fps is (10 sec: 2867.3, 60 sec: 3618.1, 300 sec: 3564.6). Total num frames: 659456. Throughput: 0: 881.0. Samples: 163628. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:01:26,690][07135] Avg episode reward: [(0, '4.518')] +[2024-12-19 10:01:31,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3600.2). Total num frames: 684032. Throughput: 0: 904.0. Samples: 170242. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:01:31,690][07135] Avg episode reward: [(0, '4.486')] +[2024-12-19 10:01:34,382][07462] Updated weights for policy 0, policy_version 170 (0.0021) +[2024-12-19 10:01:36,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3591.9). Total num frames: 700416. Throughput: 0: 919.5. Samples: 176302. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:01:36,694][07135] Avg episode reward: [(0, '4.417')] +[2024-12-19 10:01:41,688][07135] Fps is (10 sec: 3275.7, 60 sec: 3549.7, 300 sec: 3583.9). Total num frames: 716800. Throughput: 0: 888.5. Samples: 178374. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:01:41,691][07135] Avg episode reward: [(0, '4.388')] +[2024-12-19 10:01:46,142][07462] Updated weights for policy 0, policy_version 180 (0.0022) +[2024-12-19 10:01:46,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3596.5). Total num frames: 737280. Throughput: 0: 874.9. Samples: 184106. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:01:46,688][07135] Avg episode reward: [(0, '4.567')] +[2024-12-19 10:01:46,695][07448] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000180_737280.pth... +[2024-12-19 10:01:51,685][07135] Fps is (10 sec: 4507.1, 60 sec: 3754.7, 300 sec: 3627.9). Total num frames: 761856. Throughput: 0: 928.0. Samples: 190846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:01:51,688][07135] Avg episode reward: [(0, '4.752')] +[2024-12-19 10:01:51,689][07448] Saving new best policy, reward=4.752! +[2024-12-19 10:01:56,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3600.7). Total num frames: 774144. Throughput: 0: 901.1. Samples: 193108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:01:56,689][07135] Avg episode reward: [(0, '4.805')] +[2024-12-19 10:01:56,700][07448] Saving new best policy, reward=4.805! +[2024-12-19 10:01:57,674][07462] Updated weights for policy 0, policy_version 190 (0.0041) +[2024-12-19 10:02:01,685][07135] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3593.3). Total num frames: 790528. Throughput: 0: 883.6. Samples: 197650. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:02:01,692][07135] Avg episode reward: [(0, '4.688')] +[2024-12-19 10:02:06,685][07135] Fps is (10 sec: 4095.8, 60 sec: 3686.5, 300 sec: 3622.7). Total num frames: 815104. Throughput: 0: 954.7. Samples: 204552. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:02:06,692][07135] Avg episode reward: [(0, '4.440')] +[2024-12-19 10:02:07,274][07462] Updated weights for policy 0, policy_version 200 (0.0022) +[2024-12-19 10:02:11,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3615.2). Total num frames: 831488. Throughput: 0: 984.3. Samples: 207920. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:02:11,690][07135] Avg episode reward: [(0, '4.676')] +[2024-12-19 10:02:16,685][07135] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3608.0). Total num frames: 847872. Throughput: 0: 932.5. Samples: 212204. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:02:16,695][07135] Avg episode reward: [(0, '4.738')] +[2024-12-19 10:02:18,767][07462] Updated weights for policy 0, policy_version 210 (0.0031) +[2024-12-19 10:02:21,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3635.2). Total num frames: 872448. Throughput: 0: 945.6. Samples: 218856. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:02:21,692][07135] Avg episode reward: [(0, '4.478')] +[2024-12-19 10:02:26,689][07135] Fps is (10 sec: 4503.7, 60 sec: 3890.9, 300 sec: 3644.5). Total num frames: 892928. Throughput: 0: 978.5. Samples: 222408. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:02:26,692][07135] Avg episode reward: [(0, '4.433')] +[2024-12-19 10:02:28,171][07462] Updated weights for policy 0, policy_version 220 (0.0023) +[2024-12-19 10:02:31,685][07135] Fps is (10 sec: 3686.3, 60 sec: 3754.6, 300 sec: 3637.2). Total num frames: 909312. Throughput: 0: 959.5. Samples: 227284. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:02:31,689][07135] Avg episode reward: [(0, '4.594')] +[2024-12-19 10:02:36,685][07135] Fps is (10 sec: 3687.9, 60 sec: 3822.9, 300 sec: 3646.2). Total num frames: 929792. Throughput: 0: 934.7. Samples: 232908. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:02:36,693][07135] Avg episode reward: [(0, '4.614')] +[2024-12-19 10:02:39,346][07462] Updated weights for policy 0, policy_version 230 (0.0022) +[2024-12-19 10:02:41,685][07135] Fps is (10 sec: 4096.1, 60 sec: 3891.4, 300 sec: 3654.9). Total num frames: 950272. Throughput: 0: 958.8. Samples: 236252. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:02:41,693][07135] Avg episode reward: [(0, '4.629')] +[2024-12-19 10:02:46,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3647.8). Total num frames: 966656. Throughput: 0: 983.2. Samples: 241896. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:02:46,689][07135] Avg episode reward: [(0, '4.731')] +[2024-12-19 10:02:51,284][07462] Updated weights for policy 0, policy_version 240 (0.0022) +[2024-12-19 10:02:51,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3640.9). Total num frames: 983040. Throughput: 0: 934.3. Samples: 246594. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:02:51,688][07135] Avg episode reward: [(0, '4.717')] +[2024-12-19 10:02:56,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3649.2). Total num frames: 1003520. Throughput: 0: 936.3. Samples: 250052. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:02:56,688][07135] Avg episode reward: [(0, '4.717')] +[2024-12-19 10:03:00,483][07462] Updated weights for policy 0, policy_version 250 (0.0023) +[2024-12-19 10:03:01,689][07135] Fps is (10 sec: 4094.5, 60 sec: 3891.0, 300 sec: 3657.1). Total num frames: 1024000. Throughput: 0: 983.7. Samples: 256474. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:03:01,693][07135] Avg episode reward: [(0, '4.683')] +[2024-12-19 10:03:06,686][07135] Fps is (10 sec: 3686.0, 60 sec: 3754.6, 300 sec: 3650.5). Total num frames: 1040384. Throughput: 0: 928.3. Samples: 260630. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:03:06,691][07135] Avg episode reward: [(0, '4.667')] +[2024-12-19 10:03:11,685][07135] Fps is (10 sec: 3687.8, 60 sec: 3822.9, 300 sec: 3658.2). Total num frames: 1060864. Throughput: 0: 917.4. Samples: 263686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:03:11,687][07135] Avg episode reward: [(0, '4.648')] +[2024-12-19 10:03:12,154][07462] Updated weights for policy 0, policy_version 260 (0.0018) +[2024-12-19 10:03:16,685][07135] Fps is (10 sec: 4506.0, 60 sec: 3959.5, 300 sec: 3679.5). Total num frames: 1085440. Throughput: 0: 964.3. Samples: 270678. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:03:16,689][07135] Avg episode reward: [(0, '4.651')] +[2024-12-19 10:03:21,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3721.1). Total num frames: 1097728. Throughput: 0: 952.1. Samples: 275754. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:03:21,690][07135] Avg episode reward: [(0, '4.739')] +[2024-12-19 10:03:23,385][07462] Updated weights for policy 0, policy_version 270 (0.0018) +[2024-12-19 10:03:26,686][07135] Fps is (10 sec: 3276.6, 60 sec: 3754.9, 300 sec: 3790.5). Total num frames: 1118208. Throughput: 0: 925.9. Samples: 277920. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:03:26,689][07135] Avg episode reward: [(0, '4.872')] +[2024-12-19 10:03:26,697][07448] Saving new best policy, reward=4.872! +[2024-12-19 10:03:31,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3818.3). Total num frames: 1138688. Throughput: 0: 951.5. Samples: 284712. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:03:31,687][07135] Avg episode reward: [(0, '4.871')] +[2024-12-19 10:03:32,728][07462] Updated weights for policy 0, policy_version 280 (0.0018) +[2024-12-19 10:03:36,687][07135] Fps is (10 sec: 4095.7, 60 sec: 3822.8, 300 sec: 3804.4). Total num frames: 1159168. Throughput: 0: 981.9. Samples: 290780. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:03:36,690][07135] Avg episode reward: [(0, '4.717')] +[2024-12-19 10:03:41,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3790.6). Total num frames: 1171456. Throughput: 0: 950.8. Samples: 292840. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:03:41,688][07135] Avg episode reward: [(0, '4.664')] +[2024-12-19 10:03:44,265][07462] Updated weights for policy 0, policy_version 290 (0.0025) +[2024-12-19 10:03:46,685][07135] Fps is (10 sec: 3686.9, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 1196032. Throughput: 0: 941.8. Samples: 298852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:03:46,689][07135] Avg episode reward: [(0, '4.768')] +[2024-12-19 10:03:46,700][07448] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000292_1196032.pth... +[2024-12-19 10:03:46,815][07448] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000069_282624.pth +[2024-12-19 10:03:51,689][07135] Fps is (10 sec: 4913.1, 60 sec: 3959.2, 300 sec: 3832.1). Total num frames: 1220608. Throughput: 0: 1001.1. Samples: 305682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:03:51,695][07135] Avg episode reward: [(0, '5.038')] +[2024-12-19 10:03:51,701][07448] Saving new best policy, reward=5.038! +[2024-12-19 10:03:54,375][07462] Updated weights for policy 0, policy_version 300 (0.0024) +[2024-12-19 10:03:56,688][07135] Fps is (10 sec: 3685.2, 60 sec: 3822.7, 300 sec: 3804.4). Total num frames: 1232896. Throughput: 0: 981.8. Samples: 307870. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:03:56,693][07135] Avg episode reward: [(0, '5.141')] +[2024-12-19 10:03:56,705][07448] Saving new best policy, reward=5.141! +[2024-12-19 10:04:01,685][07135] Fps is (10 sec: 3278.2, 60 sec: 3823.2, 300 sec: 3832.2). Total num frames: 1253376. Throughput: 0: 936.2. Samples: 312808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:04:01,688][07135] Avg episode reward: [(0, '5.120')] +[2024-12-19 10:04:04,924][07462] Updated weights for policy 0, policy_version 310 (0.0022) +[2024-12-19 10:04:06,685][07135] Fps is (10 sec: 4097.3, 60 sec: 3891.3, 300 sec: 3832.2). Total num frames: 1273856. Throughput: 0: 978.3. Samples: 319776. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:04:06,687][07135] Avg episode reward: [(0, '4.820')] +[2024-12-19 10:04:11,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 1294336. Throughput: 0: 1002.1. Samples: 323014. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:04:11,690][07135] Avg episode reward: [(0, '4.666')] +[2024-12-19 10:04:16,578][07462] Updated weights for policy 0, policy_version 320 (0.0023) +[2024-12-19 10:04:16,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3818.4). Total num frames: 1310720. Throughput: 0: 945.2. Samples: 327248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:04:16,688][07135] Avg episode reward: [(0, '4.734')] +[2024-12-19 10:04:21,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 1331200. Throughput: 0: 959.9. Samples: 333976. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:04:21,687][07135] Avg episode reward: [(0, '4.821')] +[2024-12-19 10:04:25,371][07462] Updated weights for policy 0, policy_version 330 (0.0018) +[2024-12-19 10:04:26,685][07135] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 1355776. Throughput: 0: 993.4. Samples: 337544. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:04:26,687][07135] Avg episode reward: [(0, '4.637')] +[2024-12-19 10:04:31,687][07135] Fps is (10 sec: 3685.6, 60 sec: 3822.8, 300 sec: 3818.3). Total num frames: 1368064. Throughput: 0: 969.2. Samples: 342466. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:04:31,692][07135] Avg episode reward: [(0, '4.674')] +[2024-12-19 10:04:36,685][07135] Fps is (10 sec: 3276.9, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 1388544. Throughput: 0: 945.3. Samples: 348218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:04:36,688][07135] Avg episode reward: [(0, '4.874')] +[2024-12-19 10:04:36,958][07462] Updated weights for policy 0, policy_version 340 (0.0026) +[2024-12-19 10:04:41,685][07135] Fps is (10 sec: 4506.5, 60 sec: 4027.7, 300 sec: 3832.2). Total num frames: 1413120. Throughput: 0: 975.0. Samples: 351744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:04:41,690][07135] Avg episode reward: [(0, '4.612')] +[2024-12-19 10:04:46,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 1429504. Throughput: 0: 999.0. Samples: 357762. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:04:46,692][07135] Avg episode reward: [(0, '4.457')] +[2024-12-19 10:04:47,138][07462] Updated weights for policy 0, policy_version 350 (0.0027) +[2024-12-19 10:04:51,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.9, 300 sec: 3818.3). Total num frames: 1445888. Throughput: 0: 948.7. Samples: 362466. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:04:51,691][07135] Avg episode reward: [(0, '4.555')] +[2024-12-19 10:04:56,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.7, 300 sec: 3832.2). Total num frames: 1470464. Throughput: 0: 955.2. Samples: 365998. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:04:56,687][07135] Avg episode reward: [(0, '4.654')] +[2024-12-19 10:04:57,164][07462] Updated weights for policy 0, policy_version 360 (0.0023) +[2024-12-19 10:05:01,685][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3818.3). Total num frames: 1490944. Throughput: 0: 1011.9. Samples: 372784. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:05:01,688][07135] Avg episode reward: [(0, '4.575')] +[2024-12-19 10:05:06,689][07135] Fps is (10 sec: 3275.5, 60 sec: 3822.7, 300 sec: 3804.4). Total num frames: 1503232. Throughput: 0: 958.8. Samples: 377128. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:05:06,692][07135] Avg episode reward: [(0, '4.516')] +[2024-12-19 10:05:08,906][07462] Updated weights for policy 0, policy_version 370 (0.0019) +[2024-12-19 10:05:11,685][07135] Fps is (10 sec: 3686.5, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 1527808. Throughput: 0: 947.4. Samples: 380178. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:05:11,687][07135] Avg episode reward: [(0, '4.637')] +[2024-12-19 10:05:16,685][07135] Fps is (10 sec: 4507.4, 60 sec: 3959.5, 300 sec: 3818.3). Total num frames: 1548288. Throughput: 0: 994.3. Samples: 387208. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:05:16,692][07135] Avg episode reward: [(0, '4.590')] +[2024-12-19 10:05:17,821][07462] Updated weights for policy 0, policy_version 380 (0.0013) +[2024-12-19 10:05:21,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 1564672. Throughput: 0: 981.3. Samples: 392378. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:05:21,687][07135] Avg episode reward: [(0, '4.697')] +[2024-12-19 10:05:26,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 1585152. Throughput: 0: 950.3. Samples: 394506. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:05:26,687][07135] Avg episode reward: [(0, '4.477')] +[2024-12-19 10:05:29,357][07462] Updated weights for policy 0, policy_version 390 (0.0019) +[2024-12-19 10:05:31,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3818.3). Total num frames: 1605632. Throughput: 0: 966.0. Samples: 401234. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:05:31,688][07135] Avg episode reward: [(0, '4.334')] +[2024-12-19 10:05:36,686][07135] Fps is (10 sec: 4095.5, 60 sec: 3959.4, 300 sec: 3804.4). Total num frames: 1626112. Throughput: 0: 996.0. Samples: 407288. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:05:36,690][07135] Avg episode reward: [(0, '4.519')] +[2024-12-19 10:05:40,565][07462] Updated weights for policy 0, policy_version 400 (0.0029) +[2024-12-19 10:05:41,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3804.4). Total num frames: 1638400. Throughput: 0: 964.5. Samples: 409400. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:05:41,690][07135] Avg episode reward: [(0, '4.594')] +[2024-12-19 10:05:46,685][07135] Fps is (10 sec: 3686.9, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 1662976. Throughput: 0: 946.1. Samples: 415360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:05:46,687][07135] Avg episode reward: [(0, '4.574')] +[2024-12-19 10:05:46,698][07448] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000406_1662976.pth... +[2024-12-19 10:05:46,808][07448] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000180_737280.pth +[2024-12-19 10:05:49,882][07462] Updated weights for policy 0, policy_version 410 (0.0020) +[2024-12-19 10:05:51,685][07135] Fps is (10 sec: 4915.2, 60 sec: 4027.8, 300 sec: 3818.3). Total num frames: 1687552. Throughput: 0: 1003.0. Samples: 422258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:05:51,687][07135] Avg episode reward: [(0, '4.596')] +[2024-12-19 10:05:56,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3804.4). Total num frames: 1699840. Throughput: 0: 984.2. Samples: 424466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:05:56,689][07135] Avg episode reward: [(0, '4.694')] +[2024-12-19 10:06:01,685][07135] Fps is (10 sec: 2457.6, 60 sec: 3686.4, 300 sec: 3790.6). Total num frames: 1712128. Throughput: 0: 908.6. Samples: 428094. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:06:01,687][07135] Avg episode reward: [(0, '4.813')] +[2024-12-19 10:06:03,991][07462] Updated weights for policy 0, policy_version 420 (0.0017) +[2024-12-19 10:06:06,685][07135] Fps is (10 sec: 2867.2, 60 sec: 3754.9, 300 sec: 3776.6). Total num frames: 1728512. Throughput: 0: 901.2. Samples: 432930. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:06:06,688][07135] Avg episode reward: [(0, '4.799')] +[2024-12-19 10:06:11,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3790.5). Total num frames: 1748992. Throughput: 0: 930.7. Samples: 436388. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:06:11,691][07135] Avg episode reward: [(0, '4.900')] +[2024-12-19 10:06:14,969][07462] Updated weights for policy 0, policy_version 430 (0.0015) +[2024-12-19 10:06:16,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3804.4). Total num frames: 1765376. Throughput: 0: 887.8. Samples: 441184. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:06:16,689][07135] Avg episode reward: [(0, '4.966')] +[2024-12-19 10:06:21,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 1785856. Throughput: 0: 883.5. Samples: 447046. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:06:21,688][07135] Avg episode reward: [(0, '5.140')] +[2024-12-19 10:06:25,063][07462] Updated weights for policy 0, policy_version 440 (0.0034) +[2024-12-19 10:06:26,685][07135] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3804.4). Total num frames: 1806336. Throughput: 0: 914.4. Samples: 450546. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:06:26,687][07135] Avg episode reward: [(0, '4.744')] +[2024-12-19 10:06:31,688][07135] Fps is (10 sec: 3685.1, 60 sec: 3617.9, 300 sec: 3804.4). Total num frames: 1822720. Throughput: 0: 908.3. Samples: 456238. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:06:31,691][07135] Avg episode reward: [(0, '4.639')] +[2024-12-19 10:06:36,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3804.5). Total num frames: 1839104. Throughput: 0: 862.4. Samples: 461064. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:06:36,688][07135] Avg episode reward: [(0, '4.731')] +[2024-12-19 10:06:36,768][07462] Updated weights for policy 0, policy_version 450 (0.0029) +[2024-12-19 10:06:41,685][07135] Fps is (10 sec: 4097.4, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 1863680. Throughput: 0: 890.3. Samples: 464528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:06:41,692][07135] Avg episode reward: [(0, '4.632')] +[2024-12-19 10:06:45,848][07462] Updated weights for policy 0, policy_version 460 (0.0020) +[2024-12-19 10:06:46,687][07135] Fps is (10 sec: 4504.8, 60 sec: 3686.3, 300 sec: 3804.4). Total num frames: 1884160. Throughput: 0: 963.1. Samples: 471434. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:06:46,689][07135] Avg episode reward: [(0, '4.725')] +[2024-12-19 10:06:51,688][07135] Fps is (10 sec: 3275.7, 60 sec: 3481.4, 300 sec: 3804.4). Total num frames: 1896448. Throughput: 0: 950.2. Samples: 475692. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:06:51,693][07135] Avg episode reward: [(0, '4.730')] +[2024-12-19 10:06:56,685][07135] Fps is (10 sec: 3686.9, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 1921024. Throughput: 0: 943.0. Samples: 478822. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:06:56,692][07135] Avg episode reward: [(0, '4.693')] +[2024-12-19 10:06:57,054][07462] Updated weights for policy 0, policy_version 470 (0.0022) +[2024-12-19 10:07:01,685][07135] Fps is (10 sec: 4916.8, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 1945600. Throughput: 0: 988.5. Samples: 485666. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:07:01,688][07135] Avg episode reward: [(0, '4.697')] +[2024-12-19 10:07:06,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 1957888. Throughput: 0: 970.9. Samples: 490736. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:07:06,690][07135] Avg episode reward: [(0, '4.895')] +[2024-12-19 10:07:08,388][07462] Updated weights for policy 0, policy_version 480 (0.0015) +[2024-12-19 10:07:11,685][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 1978368. Throughput: 0: 943.2. Samples: 492992. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:07:11,687][07135] Avg episode reward: [(0, '4.864')] +[2024-12-19 10:07:16,685][07135] Fps is (10 sec: 4505.8, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 2002944. Throughput: 0: 971.4. Samples: 499948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:07:16,690][07135] Avg episode reward: [(0, '4.573')] +[2024-12-19 10:07:17,635][07462] Updated weights for policy 0, policy_version 490 (0.0028) +[2024-12-19 10:07:21,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3818.4). Total num frames: 2019328. Throughput: 0: 999.1. Samples: 506022. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:07:21,690][07135] Avg episode reward: [(0, '4.602')] +[2024-12-19 10:07:26,685][07135] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 2035712. Throughput: 0: 970.1. Samples: 508182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:07:26,688][07135] Avg episode reward: [(0, '4.677')] +[2024-12-19 10:07:29,078][07462] Updated weights for policy 0, policy_version 500 (0.0026) +[2024-12-19 10:07:31,685][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.4, 300 sec: 3818.3). Total num frames: 2056192. Throughput: 0: 947.7. Samples: 514080. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:07:31,692][07135] Avg episode reward: [(0, '4.721')] +[2024-12-19 10:07:36,686][07135] Fps is (10 sec: 4505.4, 60 sec: 4027.7, 300 sec: 3832.2). Total num frames: 2080768. Throughput: 0: 1008.5. Samples: 521074. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:07:36,688][07135] Avg episode reward: [(0, '4.645')] +[2024-12-19 10:07:38,814][07462] Updated weights for policy 0, policy_version 510 (0.0017) +[2024-12-19 10:07:41,689][07135] Fps is (10 sec: 3684.8, 60 sec: 3822.7, 300 sec: 3818.3). Total num frames: 2093056. Throughput: 0: 984.8. Samples: 523144. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:07:41,694][07135] Avg episode reward: [(0, '4.702')] +[2024-12-19 10:07:46,685][07135] Fps is (10 sec: 3277.0, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 2113536. Throughput: 0: 949.7. Samples: 528402. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:07:46,687][07135] Avg episode reward: [(0, '4.758')] +[2024-12-19 10:07:46,696][07448] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000516_2113536.pth... +[2024-12-19 10:07:46,824][07448] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000292_1196032.pth +[2024-12-19 10:07:49,629][07462] Updated weights for policy 0, policy_version 520 (0.0020) +[2024-12-19 10:07:51,685][07135] Fps is (10 sec: 4507.3, 60 sec: 4027.9, 300 sec: 3846.1). Total num frames: 2138112. Throughput: 0: 990.6. Samples: 535314. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:07:51,687][07135] Avg episode reward: [(0, '4.709')] +[2024-12-19 10:07:56,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 2154496. Throughput: 0: 1004.2. Samples: 538182. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-12-19 10:07:56,690][07135] Avg episode reward: [(0, '4.614')] +[2024-12-19 10:08:01,343][07462] Updated weights for policy 0, policy_version 530 (0.0024) +[2024-12-19 10:08:01,685][07135] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 2170880. Throughput: 0: 943.9. Samples: 542422. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:08:01,687][07135] Avg episode reward: [(0, '4.540')] +[2024-12-19 10:08:06,685][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 2195456. Throughput: 0: 964.3. Samples: 549416. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:08:06,688][07135] Avg episode reward: [(0, '4.489')] +[2024-12-19 10:08:10,132][07462] Updated weights for policy 0, policy_version 540 (0.0033) +[2024-12-19 10:08:11,687][07135] Fps is (10 sec: 4504.7, 60 sec: 3959.3, 300 sec: 3832.2). Total num frames: 2215936. Throughput: 0: 994.5. Samples: 552934. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:08:11,693][07135] Avg episode reward: [(0, '4.697')] +[2024-12-19 10:08:16,686][07135] Fps is (10 sec: 3276.6, 60 sec: 3754.6, 300 sec: 3832.2). Total num frames: 2228224. Throughput: 0: 968.2. Samples: 557648. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:08:16,688][07135] Avg episode reward: [(0, '4.709')] +[2024-12-19 10:08:21,442][07462] Updated weights for policy 0, policy_version 550 (0.0021) +[2024-12-19 10:08:21,686][07135] Fps is (10 sec: 3686.9, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 2252800. Throughput: 0: 947.6. Samples: 563718. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:08:21,688][07135] Avg episode reward: [(0, '4.596')] +[2024-12-19 10:08:26,685][07135] Fps is (10 sec: 4505.9, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 2273280. Throughput: 0: 978.5. Samples: 567174. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:08:26,691][07135] Avg episode reward: [(0, '4.458')] +[2024-12-19 10:08:31,685][07135] Fps is (10 sec: 3686.6, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 2289664. Throughput: 0: 983.6. Samples: 572664. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:08:31,687][07135] Avg episode reward: [(0, '4.481')] +[2024-12-19 10:08:33,636][07462] Updated weights for policy 0, policy_version 560 (0.0033) +[2024-12-19 10:08:34,132][07135] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 7135], exiting... +[2024-12-19 10:08:34,142][07448] Stopping Batcher_0... +[2024-12-19 10:08:34,144][07448] Loop batcher_evt_loop terminating... +[2024-12-19 10:08:34,144][07448] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000560_2293760.pth... +[2024-12-19 10:08:34,141][07135] Runner profile tree view: +main_loop: 637.6622 +[2024-12-19 10:08:34,153][07135] Collected {0: 2293760}, FPS: 3597.1 +[2024-12-19 10:08:34,352][07462] Weights refcount: 2 0 +[2024-12-19 10:08:34,358][07462] Stopping InferenceWorker_p0-w0... +[2024-12-19 10:08:34,368][07462] Loop inference_proc0-0_evt_loop terminating... +[2024-12-19 10:08:34,510][07463] EvtLoop [rollout_proc1_evt_loop, process=rollout_proc1] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance1'), args=(0, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:34,511][07463] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc1_evt_loop +[2024-12-19 10:08:34,545][07469] EvtLoop [rollout_proc3_evt_loop, process=rollout_proc3] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance3'), args=(0, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:34,554][07469] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc3_evt_loop +[2024-12-19 10:08:34,535][07468] EvtLoop [rollout_proc6_evt_loop, process=rollout_proc6] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance6'), args=(0, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:34,653][07468] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc6_evt_loop +[2024-12-19 10:08:34,626][07467] EvtLoop [rollout_proc5_evt_loop, process=rollout_proc5] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance5'), args=(1, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:34,638][07466] EvtLoop [rollout_proc7_evt_loop, process=rollout_proc7] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance7'), args=(1, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:34,654][07466] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc7_evt_loop +[2024-12-19 10:08:34,654][07467] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc5_evt_loop +[2024-12-19 10:08:34,683][07464] EvtLoop [rollout_proc2_evt_loop, process=rollout_proc2] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance2'), args=(1, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:34,696][07464] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc2_evt_loop +[2024-12-19 10:08:34,725][07448] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000406_1662976.pth +[2024-12-19 10:08:34,833][07448] Stopping LearnerWorker_p0... +[2024-12-19 10:08:34,840][07448] Loop learner_proc0_evt_loop terminating... +[2024-12-19 10:08:34,991][07465] EvtLoop [rollout_proc4_evt_loop, process=rollout_proc4] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance4'), args=(1, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:35,007][07461] EvtLoop [rollout_proc0_evt_loop, process=rollout_proc0] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance0'), args=(1, 0) +Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/signal_slot/signal_slot.py", line 355, in _process_signal + slot_callable(*args) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts + complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts + new_obs, rewards, terminated, truncated, infos = e.step(actions) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 129, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/algo/utils/make_env.py", line 115, in step + obs, rew, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 522, in step + observation, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sample_factory/envs/env_wrappers.py", line 86, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/gymnasium/core.py", line 461, in step + return self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step + obs, reward, terminated, truncated, info = self.env.step(action) + File "/usr/local/lib/python3.10/dist-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step + reward = self.game.make_action(actions_flattened, self.skip_frames) +vizdoom.vizdoom.SignalException: Signal SIGINT received. ViZDoom instance has been closed. +[2024-12-19 10:08:35,159][07465] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc4_evt_loop +[2024-12-19 10:08:35,192][07461] Unhandled exception Signal SIGINT received. ViZDoom instance has been closed. in evt loop rollout_proc0_evt_loop +[2024-12-19 10:08:36,640][07135] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 10:08:36,647][07135] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-19 10:08:36,650][07135] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-19 10:08:36,653][07135] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-19 10:08:36,656][07135] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 10:08:36,665][07135] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-19 10:08:36,668][07135] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 10:08:36,675][07135] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-19 10:08:36,675][07135] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-12-19 10:08:36,677][07135] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-12-19 10:08:36,688][07135] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-19 10:08:36,692][07135] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-19 10:08:36,694][07135] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-19 10:08:36,698][07135] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-19 10:08:36,700][07135] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-19 10:08:36,805][07135] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:08:36,824][07135] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 10:08:36,833][07135] RunningMeanStd input shape: (1,) +[2024-12-19 10:08:36,895][07135] ConvEncoder: input_channels=3 +[2024-12-19 10:08:37,370][07135] Conv encoder output size: 512 +[2024-12-19 10:08:37,383][07135] Policy head output size: 512 +[2024-12-19 10:08:38,022][07135] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000560_2293760.pth... +[2024-12-19 10:08:40,683][07135] Num frames 100... +[2024-12-19 10:08:40,959][07135] Num frames 200... +[2024-12-19 10:08:41,224][07135] Num frames 300... +[2024-12-19 10:08:41,455][07135] Num frames 400... +[2024-12-19 10:08:41,617][07135] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480 +[2024-12-19 10:08:41,619][07135] Avg episode reward: 5.480, avg true_objective: 4.480 +[2024-12-19 10:08:41,732][07135] Num frames 500... +[2024-12-19 10:08:41,957][07135] Num frames 600... +[2024-12-19 10:08:42,145][07135] Num frames 700... +[2024-12-19 10:08:42,335][07135] Num frames 800... +[2024-12-19 10:08:42,549][07135] Num frames 900... +[2024-12-19 10:08:42,722][07135] Avg episode rewards: #0: 6.300, true rewards: #0: 4.800 +[2024-12-19 10:08:42,724][07135] Avg episode reward: 6.300, avg true_objective: 4.800 +[2024-12-19 10:08:42,806][07135] Num frames 1000... +[2024-12-19 10:08:43,038][07135] Num frames 1100... +[2024-12-19 10:08:43,190][07135] Num frames 1200... +[2024-12-19 10:08:43,313][07135] Num frames 1300... +[2024-12-19 10:08:43,466][07135] Avg episode rewards: #0: 5.920, true rewards: #0: 4.587 +[2024-12-19 10:08:43,468][07135] Avg episode reward: 5.920, avg true_objective: 4.587 +[2024-12-19 10:08:43,502][07135] Num frames 1400... +[2024-12-19 10:08:43,631][07135] Num frames 1500... +[2024-12-19 10:08:43,758][07135] Num frames 1600... +[2024-12-19 10:08:43,888][07135] Num frames 1700... +[2024-12-19 10:08:44,026][07135] Avg episode rewards: #0: 5.400, true rewards: #0: 4.400 +[2024-12-19 10:08:44,027][07135] Avg episode reward: 5.400, avg true_objective: 4.400 +[2024-12-19 10:08:44,079][07135] Num frames 1800... +[2024-12-19 10:08:44,204][07135] Num frames 1900... +[2024-12-19 10:08:44,331][07135] Num frames 2000... +[2024-12-19 10:08:44,460][07135] Num frames 2100... +[2024-12-19 10:08:44,574][07135] Avg episode rewards: #0: 5.088, true rewards: #0: 4.288 +[2024-12-19 10:08:44,576][07135] Avg episode reward: 5.088, avg true_objective: 4.288 +[2024-12-19 10:08:44,676][07135] Num frames 2200... +[2024-12-19 10:08:44,847][07135] Num frames 2300... +[2024-12-19 10:08:45,028][07135] Num frames 2400... +[2024-12-19 10:08:45,194][07135] Num frames 2500... +[2024-12-19 10:08:45,299][07135] Avg episode rewards: #0: 4.880, true rewards: #0: 4.213 +[2024-12-19 10:08:45,304][07135] Avg episode reward: 4.880, avg true_objective: 4.213 +[2024-12-19 10:08:45,425][07135] Num frames 2600... +[2024-12-19 10:08:45,612][07135] Num frames 2700... +[2024-12-19 10:08:45,784][07135] Num frames 2800... +[2024-12-19 10:08:45,973][07135] Num frames 2900... +[2024-12-19 10:08:46,060][07135] Avg episode rewards: #0: 4.731, true rewards: #0: 4.160 +[2024-12-19 10:08:46,062][07135] Avg episode reward: 4.731, avg true_objective: 4.160 +[2024-12-19 10:08:46,218][07135] Num frames 3000... +[2024-12-19 10:08:46,404][07135] Num frames 3100... +[2024-12-19 10:08:46,579][07135] Num frames 3200... +[2024-12-19 10:08:46,808][07135] Avg episode rewards: #0: 4.620, true rewards: #0: 4.120 +[2024-12-19 10:08:46,810][07135] Avg episode reward: 4.620, avg true_objective: 4.120 +[2024-12-19 10:08:46,822][07135] Num frames 3300... +[2024-12-19 10:08:47,010][07135] Num frames 3400... +[2024-12-19 10:08:47,142][07135] Num frames 3500... +[2024-12-19 10:08:47,269][07135] Num frames 3600... +[2024-12-19 10:08:47,393][07135] Num frames 3700... +[2024-12-19 10:08:47,503][07135] Avg episode rewards: #0: 4.716, true rewards: #0: 4.160 +[2024-12-19 10:08:47,506][07135] Avg episode reward: 4.716, avg true_objective: 4.160 +[2024-12-19 10:08:47,584][07135] Num frames 3800... +[2024-12-19 10:08:47,708][07135] Num frames 3900... +[2024-12-19 10:08:47,832][07135] Num frames 4000... +[2024-12-19 10:08:47,955][07135] Num frames 4100... +[2024-12-19 10:08:48,044][07135] Avg episode rewards: #0: 4.628, true rewards: #0: 4.128 +[2024-12-19 10:08:48,046][07135] Avg episode reward: 4.628, avg true_objective: 4.128 +[2024-12-19 10:09:06,834][07135] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-12-19 10:09:07,139][07135] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 10:09:07,141][07135] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-19 10:09:07,142][07135] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-19 10:09:07,144][07135] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-19 10:09:07,145][07135] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 10:09:07,146][07135] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-19 10:09:07,146][07135] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-12-19 10:09:07,147][07135] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-19 10:09:07,148][07135] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-12-19 10:09:07,150][07135] Adding new argument 'hf_repository'='ThomasSimonini/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-12-19 10:09:07,151][07135] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-19 10:09:07,152][07135] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-19 10:09:07,153][07135] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-19 10:09:07,154][07135] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-19 10:09:07,155][07135] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-19 10:09:07,195][07135] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 10:09:07,197][07135] RunningMeanStd input shape: (1,) +[2024-12-19 10:09:07,213][07135] ConvEncoder: input_channels=3 +[2024-12-19 10:09:07,270][07135] Conv encoder output size: 512 +[2024-12-19 10:09:07,272][07135] Policy head output size: 512 +[2024-12-19 10:09:07,298][07135] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000560_2293760.pth... +[2024-12-19 10:09:07,928][07135] Num frames 100... +[2024-12-19 10:09:08,106][07135] Num frames 200... +[2024-12-19 10:09:08,295][07135] Num frames 300... +[2024-12-19 10:09:08,460][07135] Num frames 400... +[2024-12-19 10:09:08,598][07135] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480 +[2024-12-19 10:09:08,600][07135] Avg episode reward: 5.480, avg true_objective: 4.480 +[2024-12-19 10:09:08,686][07135] Num frames 500... +[2024-12-19 10:09:08,848][07135] Num frames 600... +[2024-12-19 10:09:09,002][07135] Num frames 700... +[2024-12-19 10:09:09,124][07135] Num frames 800... +[2024-12-19 10:09:09,218][07135] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160 +[2024-12-19 10:09:09,220][07135] Avg episode reward: 4.660, avg true_objective: 4.160 +[2024-12-19 10:09:09,302][07135] Num frames 900... +[2024-12-19 10:09:09,421][07135] Num frames 1000... +[2024-12-19 10:09:09,547][07135] Num frames 1100... +[2024-12-19 10:09:09,669][07135] Num frames 1200... +[2024-12-19 10:09:09,824][07135] Avg episode rewards: #0: 4.933, true rewards: #0: 4.267 +[2024-12-19 10:09:09,827][07135] Avg episode reward: 4.933, avg true_objective: 4.267 +[2024-12-19 10:09:09,867][07135] Num frames 1300... +[2024-12-19 10:09:09,991][07135] Num frames 1400... +[2024-12-19 10:09:10,111][07135] Num frames 1500... +[2024-12-19 10:09:10,261][07135] Num frames 1600... +[2024-12-19 10:09:10,423][07135] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160 +[2024-12-19 10:09:10,425][07135] Avg episode reward: 4.660, avg true_objective: 4.160 +[2024-12-19 10:09:10,489][07135] Num frames 1700... +[2024-12-19 10:09:10,668][07135] Num frames 1800... +[2024-12-19 10:09:10,833][07135] Num frames 1900... +[2024-12-19 10:09:11,018][07135] Num frames 2000... +[2024-12-19 10:09:11,162][07135] Avg episode rewards: #0: 4.496, true rewards: #0: 4.096 +[2024-12-19 10:09:11,167][07135] Avg episode reward: 4.496, avg true_objective: 4.096 +[2024-12-19 10:09:11,256][07135] Num frames 2100... +[2024-12-19 10:09:11,417][07135] Num frames 2200... +[2024-12-19 10:09:11,588][07135] Num frames 2300... +[2024-12-19 10:09:11,763][07135] Num frames 2400... +[2024-12-19 10:09:11,874][07135] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053 +[2024-12-19 10:09:11,877][07135] Avg episode reward: 4.387, avg true_objective: 4.053 +[2024-12-19 10:09:12,010][07135] Num frames 2500... +[2024-12-19 10:09:12,181][07135] Num frames 2600... +[2024-12-19 10:09:12,347][07135] Num frames 2700... +[2024-12-19 10:09:12,520][07135] Num frames 2800... +[2024-12-19 10:09:12,607][07135] Avg episode rewards: #0: 4.309, true rewards: #0: 4.023 +[2024-12-19 10:09:12,609][07135] Avg episode reward: 4.309, avg true_objective: 4.023 +[2024-12-19 10:09:12,716][07135] Num frames 2900... +[2024-12-19 10:09:12,836][07135] Num frames 3000... +[2024-12-19 10:09:12,958][07135] Num frames 3100... +[2024-12-19 10:09:13,098][07135] Num frames 3200... +[2024-12-19 10:09:13,152][07135] Avg episode rewards: #0: 4.250, true rewards: #0: 4.000 +[2024-12-19 10:09:13,154][07135] Avg episode reward: 4.250, avg true_objective: 4.000 +[2024-12-19 10:09:13,276][07135] Num frames 3300... +[2024-12-19 10:09:13,398][07135] Num frames 3400... +[2024-12-19 10:09:13,529][07135] Num frames 3500... +[2024-12-19 10:09:13,689][07135] Avg episode rewards: #0: 4.204, true rewards: #0: 3.982 +[2024-12-19 10:09:13,690][07135] Avg episode reward: 4.204, avg true_objective: 3.982 +[2024-12-19 10:09:13,714][07135] Num frames 3600... +[2024-12-19 10:09:13,832][07135] Num frames 3700... +[2024-12-19 10:09:13,955][07135] Num frames 3800... +[2024-12-19 10:09:14,090][07135] Num frames 3900... +[2024-12-19 10:09:14,218][07135] Num frames 4000... +[2024-12-19 10:09:14,271][07135] Avg episode rewards: #0: 4.300, true rewards: #0: 4.000 +[2024-12-19 10:09:14,272][07135] Avg episode reward: 4.300, avg true_objective: 4.000 +[2024-12-19 10:09:31,879][07135] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-12-19 10:18:48,403][07135] Environment doom_basic already registered, overwriting... +[2024-12-19 10:18:48,406][07135] Environment doom_two_colors_easy already registered, overwriting... +[2024-12-19 10:18:48,409][07135] Environment doom_two_colors_hard already registered, overwriting... +[2024-12-19 10:18:48,410][07135] Environment doom_dm already registered, overwriting... +[2024-12-19 10:18:48,412][07135] Environment doom_dwango5 already registered, overwriting... +[2024-12-19 10:18:48,413][07135] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-12-19 10:18:48,414][07135] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-12-19 10:18:48,419][07135] Environment doom_my_way_home already registered, overwriting... +[2024-12-19 10:18:48,420][07135] Environment doom_deadly_corridor already registered, overwriting... +[2024-12-19 10:18:48,422][07135] Environment doom_defend_the_center already registered, overwriting... +[2024-12-19 10:18:48,423][07135] Environment doom_defend_the_line already registered, overwriting... +[2024-12-19 10:18:48,425][07135] Environment doom_health_gathering already registered, overwriting... +[2024-12-19 10:18:48,426][07135] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-12-19 10:18:48,430][07135] Environment doom_battle already registered, overwriting... +[2024-12-19 10:18:48,431][07135] Environment doom_battle2 already registered, overwriting... +[2024-12-19 10:18:48,432][07135] Environment doom_duel_bots already registered, overwriting... +[2024-12-19 10:18:48,435][07135] Environment doom_deathmatch_bots already registered, overwriting... +[2024-12-19 10:18:48,437][07135] Environment doom_duel already registered, overwriting... +[2024-12-19 10:18:48,439][07135] Environment doom_deathmatch_full already registered, overwriting... +[2024-12-19 10:18:48,441][07135] Environment doom_benchmark already registered, overwriting... +[2024-12-19 10:18:48,443][07135] register_encoder_factory: +[2024-12-19 10:18:48,481][07135] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 10:18:48,484][07135] Overriding arg 'train_for_env_steps' with value 10000000 passed from command line +[2024-12-19 10:18:48,492][07135] Experiment dir /content/train_dir/default_experiment already exists! +[2024-12-19 10:18:48,494][07135] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-12-19 10:18:48,498][07135] Weights and Biases integration disabled +[2024-12-19 10:18:48,501][07135] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2024-12-19 10:18:50,770][07135] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=10000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=10000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 10000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-12-19 10:18:50,771][07135] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-12-19 10:18:50,776][07135] Rollout worker 0 uses device cpu +[2024-12-19 10:18:50,778][07135] Rollout worker 1 uses device cpu +[2024-12-19 10:18:50,780][07135] Rollout worker 2 uses device cpu +[2024-12-19 10:18:50,781][07135] Rollout worker 3 uses device cpu +[2024-12-19 10:18:50,783][07135] Rollout worker 4 uses device cpu +[2024-12-19 10:18:50,784][07135] Rollout worker 5 uses device cpu +[2024-12-19 10:18:50,785][07135] Rollout worker 6 uses device cpu +[2024-12-19 10:18:50,786][07135] Rollout worker 7 uses device cpu +[2024-12-19 10:18:50,884][07135] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 10:18:50,886][07135] InferenceWorker_p0-w0: min num requests: 2 +[2024-12-19 10:18:50,918][07135] Starting all processes... +[2024-12-19 10:18:50,919][07135] Starting process learner_proc0 +[2024-12-19 10:18:50,968][07135] Starting all processes... +[2024-12-19 10:18:50,975][07135] Starting process inference_proc0-0 +[2024-12-19 10:18:50,977][07135] Starting process rollout_proc0 +[2024-12-19 10:18:50,984][07135] Starting process rollout_proc1 +[2024-12-19 10:18:50,984][07135] Starting process rollout_proc2 +[2024-12-19 10:18:50,984][07135] Starting process rollout_proc3 +[2024-12-19 10:18:50,984][07135] Starting process rollout_proc4 +[2024-12-19 10:18:50,984][07135] Starting process rollout_proc5 +[2024-12-19 10:18:50,984][07135] Starting process rollout_proc6 +[2024-12-19 10:18:50,984][07135] Starting process rollout_proc7 +[2024-12-19 10:19:07,001][15948] Worker 5 uses CPU cores [1] +[2024-12-19 10:19:07,500][15925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 10:19:07,505][15925] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-12-19 10:19:07,574][15939] Worker 0 uses CPU cores [0] +[2024-12-19 10:19:07,575][15925] Num visible devices: 1 +[2024-12-19 10:19:07,607][15925] Starting seed is not provided +[2024-12-19 10:19:07,608][15925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 10:19:07,609][15925] Initializing actor-critic model on device cuda:0 +[2024-12-19 10:19:07,610][15925] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 10:19:07,611][15925] RunningMeanStd input shape: (1,) +[2024-12-19 10:19:07,692][15925] ConvEncoder: input_channels=3 +[2024-12-19 10:19:07,787][15938] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 10:19:07,789][15938] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-12-19 10:19:07,865][15938] Num visible devices: 1 +[2024-12-19 10:19:07,918][15945] Worker 2 uses CPU cores [0] +[2024-12-19 10:19:08,026][15949] Worker 7 uses CPU cores [1] +[2024-12-19 10:19:08,040][15950] Worker 6 uses CPU cores [0] +[2024-12-19 10:19:08,068][15946] Worker 4 uses CPU cores [0] +[2024-12-19 10:19:08,124][15947] Worker 3 uses CPU cores [1] +[2024-12-19 10:19:08,146][15925] Conv encoder output size: 512 +[2024-12-19 10:19:08,146][15925] Policy head output size: 512 +[2024-12-19 10:19:08,173][15925] Created Actor Critic model with architecture: +[2024-12-19 10:19:08,174][15925] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-12-19 10:19:08,220][15944] Worker 1 uses CPU cores [1] +[2024-12-19 10:19:08,304][15925] Using optimizer +[2024-12-19 10:19:09,104][15925] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000560_2293760.pth... +[2024-12-19 10:19:09,140][15925] Loading model from checkpoint +[2024-12-19 10:19:09,142][15925] Loaded experiment state at self.train_step=560, self.env_steps=2293760 +[2024-12-19 10:19:09,142][15925] Initialized policy 0 weights for model version 560 +[2024-12-19 10:19:09,146][15925] LearnerWorker_p0 finished initialization! +[2024-12-19 10:19:09,147][15925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-19 10:19:09,251][15938] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 10:19:09,252][15938] RunningMeanStd input shape: (1,) +[2024-12-19 10:19:09,265][15938] ConvEncoder: input_channels=3 +[2024-12-19 10:19:09,368][15938] Conv encoder output size: 512 +[2024-12-19 10:19:09,368][15938] Policy head output size: 512 +[2024-12-19 10:19:09,422][07135] Inference worker 0-0 is ready! +[2024-12-19 10:19:09,424][07135] All inference workers are ready! Signal rollout workers to start! +[2024-12-19 10:19:09,621][15948] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:09,625][15944] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:09,627][15949] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:09,634][15946] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:09,636][15945] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:09,638][15939] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:09,639][15950] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:09,639][15947] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-19 10:19:10,627][15944] Decorrelating experience for 0 frames... +[2024-12-19 10:19:10,635][15947] Decorrelating experience for 0 frames... +[2024-12-19 10:19:10,877][07135] Heartbeat connected on Batcher_0 +[2024-12-19 10:19:10,882][07135] Heartbeat connected on LearnerWorker_p0 +[2024-12-19 10:19:10,925][07135] Heartbeat connected on InferenceWorker_p0-w0 +[2024-12-19 10:19:11,305][15950] Decorrelating experience for 0 frames... +[2024-12-19 10:19:11,303][15945] Decorrelating experience for 0 frames... +[2024-12-19 10:19:11,312][15946] Decorrelating experience for 0 frames... +[2024-12-19 10:19:11,315][15939] Decorrelating experience for 0 frames... +[2024-12-19 10:19:11,461][15944] Decorrelating experience for 32 frames... +[2024-12-19 10:19:11,472][15947] Decorrelating experience for 32 frames... +[2024-12-19 10:19:12,337][15945] Decorrelating experience for 32 frames... +[2024-12-19 10:19:12,349][15939] Decorrelating experience for 32 frames... +[2024-12-19 10:19:12,637][15950] Decorrelating experience for 32 frames... +[2024-12-19 10:19:12,914][15948] Decorrelating experience for 0 frames... +[2024-12-19 10:19:12,930][15949] Decorrelating experience for 0 frames... +[2024-12-19 10:19:13,502][07135] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 2293760. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-19 10:19:13,537][15946] Decorrelating experience for 32 frames... +[2024-12-19 10:19:13,630][15944] Decorrelating experience for 64 frames... +[2024-12-19 10:19:13,715][15947] Decorrelating experience for 64 frames... +[2024-12-19 10:19:14,292][15939] Decorrelating experience for 64 frames... +[2024-12-19 10:19:14,617][15948] Decorrelating experience for 32 frames... +[2024-12-19 10:19:14,629][15949] Decorrelating experience for 32 frames... +[2024-12-19 10:19:15,745][15950] Decorrelating experience for 64 frames... +[2024-12-19 10:19:15,874][15939] Decorrelating experience for 96 frames... +[2024-12-19 10:19:16,133][07135] Heartbeat connected on RolloutWorker_w0 +[2024-12-19 10:19:16,457][15945] Decorrelating experience for 64 frames... +[2024-12-19 10:19:16,708][15947] Decorrelating experience for 96 frames... +[2024-12-19 10:19:17,078][07135] Heartbeat connected on RolloutWorker_w3 +[2024-12-19 10:19:17,460][15948] Decorrelating experience for 64 frames... +[2024-12-19 10:19:17,466][15949] Decorrelating experience for 64 frames... +[2024-12-19 10:19:18,502][07135] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 2293760. Throughput: 0: 84.0. Samples: 420. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-19 10:19:18,507][07135] Avg episode reward: [(0, '1.387')] +[2024-12-19 10:19:18,663][15950] Decorrelating experience for 96 frames... +[2024-12-19 10:19:19,005][07135] Heartbeat connected on RolloutWorker_w6 +[2024-12-19 10:19:19,505][15945] Decorrelating experience for 96 frames... +[2024-12-19 10:19:19,637][15946] Decorrelating experience for 64 frames... +[2024-12-19 10:19:19,841][07135] Heartbeat connected on RolloutWorker_w2 +[2024-12-19 10:19:19,870][15948] Decorrelating experience for 96 frames... +[2024-12-19 10:19:20,195][07135] Heartbeat connected on RolloutWorker_w5 +[2024-12-19 10:19:22,233][15946] Decorrelating experience for 96 frames... +[2024-12-19 10:19:22,752][07135] Heartbeat connected on RolloutWorker_w4 +[2024-12-19 10:19:22,996][15949] Decorrelating experience for 96 frames... +[2024-12-19 10:19:23,146][15944] Decorrelating experience for 96 frames... +[2024-12-19 10:19:23,257][15925] Signal inference workers to stop experience collection... +[2024-12-19 10:19:23,275][15938] InferenceWorker_p0-w0: stopping experience collection +[2024-12-19 10:19:23,363][07135] Heartbeat connected on RolloutWorker_w7 +[2024-12-19 10:19:23,393][07135] Heartbeat connected on RolloutWorker_w1 +[2024-12-19 10:19:23,502][07135] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 2293760. Throughput: 0: 175.4. Samples: 1754. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-19 10:19:23,506][07135] Avg episode reward: [(0, '3.433')] +[2024-12-19 10:19:24,871][15925] Signal inference workers to resume experience collection... +[2024-12-19 10:19:24,872][15938] InferenceWorker_p0-w0: resuming experience collection +[2024-12-19 10:19:28,502][07135] Fps is (10 sec: 2048.0, 60 sec: 1365.4, 300 sec: 1365.4). Total num frames: 2314240. Throughput: 0: 348.7. Samples: 5230. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:19:28,506][07135] Avg episode reward: [(0, '3.644')] +[2024-12-19 10:19:33,503][07135] Fps is (10 sec: 3686.0, 60 sec: 1843.1, 300 sec: 1843.1). Total num frames: 2330624. Throughput: 0: 513.9. Samples: 10278. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:19:33,508][07135] Avg episode reward: [(0, '4.164')] +[2024-12-19 10:19:34,367][15938] Updated weights for policy 0, policy_version 570 (0.0142) +[2024-12-19 10:19:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 2129.9, 300 sec: 2129.9). Total num frames: 2347008. Throughput: 0: 486.5. Samples: 12162. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:19:38,508][07135] Avg episode reward: [(0, '4.465')] +[2024-12-19 10:19:43,502][07135] Fps is (10 sec: 3686.8, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 2367488. Throughput: 0: 620.3. Samples: 18608. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:19:43,508][07135] Avg episode reward: [(0, '4.476')] +[2024-12-19 10:19:44,567][15938] Updated weights for policy 0, policy_version 580 (0.0022) +[2024-12-19 10:19:48,502][07135] Fps is (10 sec: 4505.6, 60 sec: 2808.7, 300 sec: 2808.7). Total num frames: 2392064. Throughput: 0: 724.2. Samples: 25346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:19:48,506][07135] Avg episode reward: [(0, '4.448')] +[2024-12-19 10:19:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 2764.8, 300 sec: 2764.8). Total num frames: 2404352. Throughput: 0: 686.1. Samples: 27442. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:19:53,503][07135] Avg episode reward: [(0, '4.438')] +[2024-12-19 10:19:55,959][15938] Updated weights for policy 0, policy_version 590 (0.0023) +[2024-12-19 10:19:58,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3003.7, 300 sec: 3003.7). Total num frames: 2428928. Throughput: 0: 735.1. Samples: 33080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:19:58,504][07135] Avg episode reward: [(0, '4.520')] +[2024-12-19 10:20:03,502][07135] Fps is (10 sec: 4505.7, 60 sec: 3113.0, 300 sec: 3113.0). Total num frames: 2449408. Throughput: 0: 883.3. Samples: 40170. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:20:03,509][07135] Avg episode reward: [(0, '4.567')] +[2024-12-19 10:20:04,564][15938] Updated weights for policy 0, policy_version 600 (0.0028) +[2024-12-19 10:20:08,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3127.9, 300 sec: 3127.9). Total num frames: 2465792. Throughput: 0: 911.0. Samples: 42748. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:20:08,504][07135] Avg episode reward: [(0, '4.511')] +[2024-12-19 10:20:13,507][07135] Fps is (10 sec: 2865.7, 60 sec: 3071.7, 300 sec: 3071.7). Total num frames: 2478080. Throughput: 0: 920.0. Samples: 46634. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-12-19 10:20:13,509][07135] Avg episode reward: [(0, '4.581')] +[2024-12-19 10:20:18,502][07135] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3087.8). Total num frames: 2494464. Throughput: 0: 906.6. Samples: 51076. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-12-19 10:20:18,504][07135] Avg episode reward: [(0, '4.723')] +[2024-12-19 10:20:18,884][15938] Updated weights for policy 0, policy_version 610 (0.0026) +[2024-12-19 10:20:23,506][07135] Fps is (10 sec: 3686.6, 60 sec: 3686.1, 300 sec: 3159.6). Total num frames: 2514944. Throughput: 0: 942.4. Samples: 54576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:20:23,508][07135] Avg episode reward: [(0, '4.824')] +[2024-12-19 10:20:28,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3167.6). Total num frames: 2531328. Throughput: 0: 903.9. Samples: 59282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:20:28,511][07135] Avg episode reward: [(0, '4.751')] +[2024-12-19 10:20:30,517][15938] Updated weights for policy 0, policy_version 620 (0.0020) +[2024-12-19 10:20:33,502][07135] Fps is (10 sec: 3688.1, 60 sec: 3686.5, 300 sec: 3225.6). Total num frames: 2551808. Throughput: 0: 892.1. Samples: 65490. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:20:33,506][07135] Avg episode reward: [(0, '4.653')] +[2024-12-19 10:20:38,502][07135] Fps is (10 sec: 4505.5, 60 sec: 3822.9, 300 sec: 3325.0). Total num frames: 2576384. Throughput: 0: 919.2. Samples: 68808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:20:38,504][07135] Avg episode reward: [(0, '4.574')] +[2024-12-19 10:20:39,393][15938] Updated weights for policy 0, policy_version 630 (0.0028) +[2024-12-19 10:20:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3276.8). Total num frames: 2588672. Throughput: 0: 917.0. Samples: 74346. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:20:43,507][07135] Avg episode reward: [(0, '4.621')] +[2024-12-19 10:20:48,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3319.9). Total num frames: 2609152. Throughput: 0: 874.0. Samples: 79502. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:20:48,508][07135] Avg episode reward: [(0, '4.516')] +[2024-12-19 10:20:48,517][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000637_2609152.pth... +[2024-12-19 10:20:48,648][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000516_2113536.pth +[2024-12-19 10:20:51,050][15938] Updated weights for policy 0, policy_version 640 (0.0039) +[2024-12-19 10:20:53,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3358.7). Total num frames: 2629632. Throughput: 0: 891.6. Samples: 82870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:20:53,506][07135] Avg episode reward: [(0, '4.581')] +[2024-12-19 10:20:58,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3393.8). Total num frames: 2650112. Throughput: 0: 954.2. Samples: 89566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:20:58,509][07135] Avg episode reward: [(0, '4.621')] +[2024-12-19 10:21:01,795][15938] Updated weights for policy 0, policy_version 650 (0.0014) +[2024-12-19 10:21:03,503][07135] Fps is (10 sec: 3685.8, 60 sec: 3618.0, 300 sec: 3388.5). Total num frames: 2666496. Throughput: 0: 950.7. Samples: 93860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:21:03,508][07135] Avg episode reward: [(0, '4.475')] +[2024-12-19 10:21:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3419.3). Total num frames: 2686976. Throughput: 0: 942.9. Samples: 97002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:21:08,504][07135] Avg episode reward: [(0, '4.724')] +[2024-12-19 10:21:11,510][15938] Updated weights for policy 0, policy_version 660 (0.0013) +[2024-12-19 10:21:13,502][07135] Fps is (10 sec: 4506.3, 60 sec: 3891.5, 300 sec: 3481.6). Total num frames: 2711552. Throughput: 0: 994.8. Samples: 104050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:21:13,506][07135] Avg episode reward: [(0, '4.740')] +[2024-12-19 10:21:18,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3440.6). Total num frames: 2723840. Throughput: 0: 967.2. Samples: 109016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:21:18,504][07135] Avg episode reward: [(0, '4.631')] +[2024-12-19 10:21:23,106][15938] Updated weights for policy 0, policy_version 670 (0.0024) +[2024-12-19 10:21:23,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3823.2, 300 sec: 3465.8). Total num frames: 2744320. Throughput: 0: 945.2. Samples: 111340. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:21:23,504][07135] Avg episode reward: [(0, '4.505')] +[2024-12-19 10:21:28,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3519.5). Total num frames: 2768896. Throughput: 0: 974.6. Samples: 118204. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:21:28,504][07135] Avg episode reward: [(0, '4.650')] +[2024-12-19 10:21:32,475][15938] Updated weights for policy 0, policy_version 680 (0.0013) +[2024-12-19 10:21:33,502][07135] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3510.9). Total num frames: 2785280. Throughput: 0: 989.8. Samples: 124042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:21:33,507][07135] Avg episode reward: [(0, '4.568')] +[2024-12-19 10:21:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3502.8). Total num frames: 2801664. Throughput: 0: 962.2. Samples: 126170. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:21:38,509][07135] Avg episode reward: [(0, '4.570')] +[2024-12-19 10:21:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3522.6). Total num frames: 2822144. Throughput: 0: 951.2. Samples: 132372. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:21:43,509][07135] Avg episode reward: [(0, '4.722')] +[2024-12-19 10:21:43,723][15938] Updated weights for policy 0, policy_version 690 (0.0022) +[2024-12-19 10:21:48,502][07135] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3567.5). Total num frames: 2846720. Throughput: 0: 1001.5. Samples: 138926. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:21:48,505][07135] Avg episode reward: [(0, '4.656')] +[2024-12-19 10:21:53,504][07135] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3532.7). Total num frames: 2859008. Throughput: 0: 977.5. Samples: 140990. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:21:53,510][07135] Avg episode reward: [(0, '4.640')] +[2024-12-19 10:21:55,395][15938] Updated weights for policy 0, policy_version 700 (0.0016) +[2024-12-19 10:21:58,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3549.9). Total num frames: 2879488. Throughput: 0: 937.2. Samples: 146224. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:21:58,504][07135] Avg episode reward: [(0, '4.710')] +[2024-12-19 10:22:03,502][07135] Fps is (10 sec: 4506.7, 60 sec: 3959.6, 300 sec: 3590.0). Total num frames: 2904064. Throughput: 0: 977.0. Samples: 152982. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:22:03,505][07135] Avg episode reward: [(0, '4.544')] +[2024-12-19 10:22:04,321][15938] Updated weights for policy 0, policy_version 710 (0.0018) +[2024-12-19 10:22:08,504][07135] Fps is (10 sec: 4094.9, 60 sec: 3891.0, 300 sec: 3581.0). Total num frames: 2920448. Throughput: 0: 989.7. Samples: 155880. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:22:08,507][07135] Avg episode reward: [(0, '4.637')] +[2024-12-19 10:22:13,502][07135] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3549.9). Total num frames: 2932736. Throughput: 0: 927.3. Samples: 159932. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:22:13,505][07135] Avg episode reward: [(0, '4.579')] +[2024-12-19 10:22:16,411][15938] Updated weights for policy 0, policy_version 720 (0.0018) +[2024-12-19 10:22:18,502][07135] Fps is (10 sec: 3687.4, 60 sec: 3891.2, 300 sec: 3586.8). Total num frames: 2957312. Throughput: 0: 949.3. Samples: 166760. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:22:18,504][07135] Avg episode reward: [(0, '4.699')] +[2024-12-19 10:22:23,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3600.2). Total num frames: 2977792. Throughput: 0: 981.1. Samples: 170320. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:22:23,508][07135] Avg episode reward: [(0, '4.798')] +[2024-12-19 10:22:26,743][15938] Updated weights for policy 0, policy_version 730 (0.0023) +[2024-12-19 10:22:28,507][07135] Fps is (10 sec: 3684.6, 60 sec: 3754.4, 300 sec: 3591.8). Total num frames: 2994176. Throughput: 0: 949.7. Samples: 175112. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:22:28,513][07135] Avg episode reward: [(0, '4.594')] +[2024-12-19 10:22:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3604.5). Total num frames: 3014656. Throughput: 0: 940.6. Samples: 181254. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:22:33,509][07135] Avg episode reward: [(0, '4.366')] +[2024-12-19 10:22:36,498][15938] Updated weights for policy 0, policy_version 740 (0.0024) +[2024-12-19 10:22:38,502][07135] Fps is (10 sec: 4507.8, 60 sec: 3959.5, 300 sec: 3636.5). Total num frames: 3039232. Throughput: 0: 973.9. Samples: 184812. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:22:38,506][07135] Avg episode reward: [(0, '4.548')] +[2024-12-19 10:22:43,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3627.9). Total num frames: 3055616. Throughput: 0: 981.5. Samples: 190390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:22:43,507][07135] Avg episode reward: [(0, '4.763')] +[2024-12-19 10:22:48,148][15938] Updated weights for policy 0, policy_version 750 (0.0028) +[2024-12-19 10:22:48,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3619.7). Total num frames: 3072000. Throughput: 0: 945.2. Samples: 195516. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:22:48,506][07135] Avg episode reward: [(0, '4.733')] +[2024-12-19 10:22:48,521][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000750_3072000.pth... +[2024-12-19 10:22:48,672][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000560_2293760.pth +[2024-12-19 10:22:53,503][07135] Fps is (10 sec: 3685.8, 60 sec: 3891.3, 300 sec: 3630.5). Total num frames: 3092480. Throughput: 0: 957.8. Samples: 198978. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:22:53,506][07135] Avg episode reward: [(0, '4.635')] +[2024-12-19 10:22:57,081][15938] Updated weights for policy 0, policy_version 760 (0.0026) +[2024-12-19 10:22:58,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3659.1). Total num frames: 3117056. Throughput: 0: 1017.8. Samples: 205732. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:22:58,504][07135] Avg episode reward: [(0, '4.691')] +[2024-12-19 10:23:03,503][07135] Fps is (10 sec: 3686.5, 60 sec: 3754.6, 300 sec: 3633.0). Total num frames: 3129344. Throughput: 0: 960.9. Samples: 210002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:23:03,511][07135] Avg episode reward: [(0, '4.712')] +[2024-12-19 10:23:08,440][15938] Updated weights for policy 0, policy_version 770 (0.0026) +[2024-12-19 10:23:08,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3891.4, 300 sec: 3660.3). Total num frames: 3153920. Throughput: 0: 955.9. Samples: 213334. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:23:08,506][07135] Avg episode reward: [(0, '4.590')] +[2024-12-19 10:23:13,502][07135] Fps is (10 sec: 4506.2, 60 sec: 4027.7, 300 sec: 3669.3). Total num frames: 3174400. Throughput: 0: 1006.0. Samples: 220378. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:23:13,504][07135] Avg episode reward: [(0, '4.632')] +[2024-12-19 10:23:18,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3661.3). Total num frames: 3190784. Throughput: 0: 978.7. Samples: 225296. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:23:18,509][07135] Avg episode reward: [(0, '4.633')] +[2024-12-19 10:23:19,247][15938] Updated weights for policy 0, policy_version 780 (0.0033) +[2024-12-19 10:23:23,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3670.0). Total num frames: 3211264. Throughput: 0: 952.3. Samples: 227666. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:23:23,505][07135] Avg episode reward: [(0, '4.489')] +[2024-12-19 10:23:28,502][07135] Fps is (10 sec: 4095.7, 60 sec: 3959.8, 300 sec: 3678.4). Total num frames: 3231744. Throughput: 0: 986.3. Samples: 234776. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:23:28,512][07135] Avg episode reward: [(0, '4.491')] +[2024-12-19 10:23:28,545][15938] Updated weights for policy 0, policy_version 790 (0.0024) +[2024-12-19 10:23:33,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3686.4). Total num frames: 3252224. Throughput: 0: 1005.6. Samples: 240766. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:23:33,507][07135] Avg episode reward: [(0, '4.471')] +[2024-12-19 10:23:38,502][07135] Fps is (10 sec: 3686.6, 60 sec: 3822.9, 300 sec: 3678.7). Total num frames: 3268608. Throughput: 0: 973.9. Samples: 242804. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:23:38,506][07135] Avg episode reward: [(0, '4.419')] +[2024-12-19 10:23:40,274][15938] Updated weights for policy 0, policy_version 800 (0.0014) +[2024-12-19 10:23:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3686.4). Total num frames: 3289088. Throughput: 0: 963.4. Samples: 249086. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:23:43,504][07135] Avg episode reward: [(0, '4.607')] +[2024-12-19 10:23:48,502][07135] Fps is (10 sec: 4505.3, 60 sec: 4027.7, 300 sec: 3708.7). Total num frames: 3313664. Throughput: 0: 1022.7. Samples: 256022. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:23:48,505][07135] Avg episode reward: [(0, '4.542')] +[2024-12-19 10:23:49,508][15938] Updated weights for policy 0, policy_version 810 (0.0015) +[2024-12-19 10:23:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3686.4). Total num frames: 3325952. Throughput: 0: 995.6. Samples: 258138. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:23:53,504][07135] Avg episode reward: [(0, '4.610')] +[2024-12-19 10:23:58,502][07135] Fps is (10 sec: 3277.0, 60 sec: 3822.9, 300 sec: 3693.6). Total num frames: 3346432. Throughput: 0: 958.7. Samples: 263520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:23:58,504][07135] Avg episode reward: [(0, '4.731')] +[2024-12-19 10:24:00,369][15938] Updated weights for policy 0, policy_version 820 (0.0034) +[2024-12-19 10:24:03,506][07135] Fps is (10 sec: 4503.4, 60 sec: 4027.5, 300 sec: 3714.6). Total num frames: 3371008. Throughput: 0: 1003.3. Samples: 270450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:24:03,509][07135] Avg episode reward: [(0, '4.809')] +[2024-12-19 10:24:08,502][07135] Fps is (10 sec: 4095.9, 60 sec: 3891.2, 300 sec: 3707.2). Total num frames: 3387392. Throughput: 0: 1018.8. Samples: 273512. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:24:08,511][07135] Avg episode reward: [(0, '4.750')] +[2024-12-19 10:24:11,431][15938] Updated weights for policy 0, policy_version 830 (0.0013) +[2024-12-19 10:24:13,502][07135] Fps is (10 sec: 3278.4, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3403776. Throughput: 0: 955.7. Samples: 277784. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:24:13,508][07135] Avg episode reward: [(0, '4.597')] +[2024-12-19 10:24:18,502][07135] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 3428352. Throughput: 0: 975.1. Samples: 284646. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-12-19 10:24:18,507][07135] Avg episode reward: [(0, '4.729')] +[2024-12-19 10:24:20,823][15938] Updated weights for policy 0, policy_version 840 (0.0020) +[2024-12-19 10:24:23,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 3448832. Throughput: 0: 1007.5. Samples: 288140. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-12-19 10:24:23,505][07135] Avg episode reward: [(0, '4.883')] +[2024-12-19 10:24:28,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 3465216. Throughput: 0: 973.2. Samples: 292882. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-12-19 10:24:28,507][07135] Avg episode reward: [(0, '4.650')] +[2024-12-19 10:24:32,414][15938] Updated weights for policy 0, policy_version 850 (0.0021) +[2024-12-19 10:24:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 3485696. Throughput: 0: 953.2. Samples: 298916. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:24:33,508][07135] Avg episode reward: [(0, '4.403')] +[2024-12-19 10:24:38,503][07135] Fps is (10 sec: 4505.2, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 3510272. Throughput: 0: 982.7. Samples: 302362. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:24:38,507][07135] Avg episode reward: [(0, '4.568')] +[2024-12-19 10:24:42,055][15938] Updated weights for policy 0, policy_version 860 (0.0021) +[2024-12-19 10:24:43,503][07135] Fps is (10 sec: 3685.9, 60 sec: 3891.1, 300 sec: 3832.2). Total num frames: 3522560. Throughput: 0: 986.9. Samples: 307932. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:24:43,508][07135] Avg episode reward: [(0, '4.698')] +[2024-12-19 10:24:48,502][07135] Fps is (10 sec: 3277.1, 60 sec: 3823.0, 300 sec: 3860.0). Total num frames: 3543040. Throughput: 0: 943.3. Samples: 312894. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:24:48,507][07135] Avg episode reward: [(0, '4.633')] +[2024-12-19 10:24:48,514][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000865_3543040.pth... +[2024-12-19 10:24:48,654][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000637_2609152.pth +[2024-12-19 10:24:53,502][07135] Fps is (10 sec: 3277.2, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 3555328. Throughput: 0: 928.7. Samples: 315302. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:24:53,504][07135] Avg episode reward: [(0, '4.491')] +[2024-12-19 10:24:55,177][15938] Updated weights for policy 0, policy_version 870 (0.0019) +[2024-12-19 10:24:58,502][07135] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3804.4). Total num frames: 3571712. Throughput: 0: 929.4. Samples: 319608. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:24:58,506][07135] Avg episode reward: [(0, '4.692')] +[2024-12-19 10:25:03,502][07135] Fps is (10 sec: 2867.2, 60 sec: 3550.2, 300 sec: 3790.5). Total num frames: 3584000. Throughput: 0: 867.2. Samples: 323668. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:25:03,507][07135] Avg episode reward: [(0, '4.747')] +[2024-12-19 10:25:07,593][15938] Updated weights for policy 0, policy_version 880 (0.0020) +[2024-12-19 10:25:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3832.3). Total num frames: 3608576. Throughput: 0: 864.6. Samples: 327046. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:25:08,510][07135] Avg episode reward: [(0, '4.956')] +[2024-12-19 10:25:13,502][07135] Fps is (10 sec: 4505.5, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 3629056. Throughput: 0: 910.4. Samples: 333850. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:25:13,504][07135] Avg episode reward: [(0, '5.239')] +[2024-12-19 10:25:13,510][15925] Saving new best policy, reward=5.239! +[2024-12-19 10:25:17,929][15938] Updated weights for policy 0, policy_version 890 (0.0032) +[2024-12-19 10:25:18,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3832.3). Total num frames: 3645440. Throughput: 0: 885.7. Samples: 338774. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:25:18,510][07135] Avg episode reward: [(0, '5.385')] +[2024-12-19 10:25:18,523][15925] Saving new best policy, reward=5.385! +[2024-12-19 10:25:23,502][07135] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3832.2). Total num frames: 3661824. Throughput: 0: 861.2. Samples: 341116. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:25:23,508][07135] Avg episode reward: [(0, '5.147')] +[2024-12-19 10:25:27,989][15938] Updated weights for policy 0, policy_version 900 (0.0025) +[2024-12-19 10:25:28,502][07135] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 3686400. Throughput: 0: 891.5. Samples: 348050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:25:28,504][07135] Avg episode reward: [(0, '5.007')] +[2024-12-19 10:25:33,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 3706880. Throughput: 0: 914.6. Samples: 354052. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:25:33,504][07135] Avg episode reward: [(0, '4.992')] +[2024-12-19 10:25:38,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3846.1). Total num frames: 3723264. Throughput: 0: 907.4. Samples: 356134. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:25:38,509][07135] Avg episode reward: [(0, '5.332')] +[2024-12-19 10:25:39,482][15938] Updated weights for policy 0, policy_version 910 (0.0022) +[2024-12-19 10:25:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3846.1). Total num frames: 3743744. Throughput: 0: 949.9. Samples: 362352. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:25:43,509][07135] Avg episode reward: [(0, '5.171')] +[2024-12-19 10:25:48,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 3764224. Throughput: 0: 1011.6. Samples: 369190. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:25:48,506][07135] Avg episode reward: [(0, '4.697')] +[2024-12-19 10:25:48,620][15938] Updated weights for policy 0, policy_version 920 (0.0025) +[2024-12-19 10:25:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 3780608. Throughput: 0: 982.5. Samples: 371260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:25:53,508][07135] Avg episode reward: [(0, '4.698')] +[2024-12-19 10:25:58,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 3801088. Throughput: 0: 950.1. Samples: 376604. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:25:58,504][07135] Avg episode reward: [(0, '4.555')] +[2024-12-19 10:25:59,809][15938] Updated weights for policy 0, policy_version 930 (0.0024) +[2024-12-19 10:26:03,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3860.0). Total num frames: 3825664. Throughput: 0: 998.9. Samples: 383726. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:26:03,508][07135] Avg episode reward: [(0, '4.456')] +[2024-12-19 10:26:08,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 3842048. Throughput: 0: 1010.6. Samples: 386592. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:26:08,506][07135] Avg episode reward: [(0, '4.648')] +[2024-12-19 10:26:10,848][15938] Updated weights for policy 0, policy_version 940 (0.0040) +[2024-12-19 10:26:13,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3846.1). Total num frames: 3858432. Throughput: 0: 953.1. Samples: 390940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:26:13,504][07135] Avg episode reward: [(0, '4.520')] +[2024-12-19 10:26:18,502][07135] Fps is (10 sec: 4095.9, 60 sec: 3959.4, 300 sec: 3860.0). Total num frames: 3883008. Throughput: 0: 976.1. Samples: 397978. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:26:18,505][07135] Avg episode reward: [(0, '4.514')] +[2024-12-19 10:26:20,097][15938] Updated weights for policy 0, policy_version 950 (0.0034) +[2024-12-19 10:26:23,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3846.1). Total num frames: 3903488. Throughput: 0: 1007.4. Samples: 401468. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:26:23,505][07135] Avg episode reward: [(0, '4.505')] +[2024-12-19 10:26:28,507][07135] Fps is (10 sec: 3275.1, 60 sec: 3822.6, 300 sec: 3832.1). Total num frames: 3915776. Throughput: 0: 973.3. Samples: 406154. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:26:28,510][07135] Avg episode reward: [(0, '4.470')] +[2024-12-19 10:26:31,639][15938] Updated weights for policy 0, policy_version 960 (0.0018) +[2024-12-19 10:26:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 3940352. Throughput: 0: 957.1. Samples: 412260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:26:33,506][07135] Avg episode reward: [(0, '4.458')] +[2024-12-19 10:26:38,502][07135] Fps is (10 sec: 4508.2, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 3960832. Throughput: 0: 991.3. Samples: 415870. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:26:38,508][07135] Avg episode reward: [(0, '4.886')] +[2024-12-19 10:26:40,839][15938] Updated weights for policy 0, policy_version 970 (0.0018) +[2024-12-19 10:26:43,503][07135] Fps is (10 sec: 3686.0, 60 sec: 3891.1, 300 sec: 3832.2). Total num frames: 3977216. Throughput: 0: 998.4. Samples: 421534. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:26:43,509][07135] Avg episode reward: [(0, '4.938')] +[2024-12-19 10:26:48,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 3997696. Throughput: 0: 951.5. Samples: 426542. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:26:48,504][07135] Avg episode reward: [(0, '4.788')] +[2024-12-19 10:26:48,514][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000976_3997696.pth... +[2024-12-19 10:26:48,632][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000750_3072000.pth +[2024-12-19 10:26:51,954][15938] Updated weights for policy 0, policy_version 980 (0.0034) +[2024-12-19 10:26:53,502][07135] Fps is (10 sec: 4096.4, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 4018176. Throughput: 0: 965.2. Samples: 430028. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:26:53,512][07135] Avg episode reward: [(0, '4.962')] +[2024-12-19 10:26:58,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 4038656. Throughput: 0: 1018.7. Samples: 436780. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:26:58,508][07135] Avg episode reward: [(0, '4.909')] +[2024-12-19 10:27:03,339][15938] Updated weights for policy 0, policy_version 990 (0.0017) +[2024-12-19 10:27:03,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 4055040. Throughput: 0: 956.9. Samples: 441038. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:27:03,507][07135] Avg episode reward: [(0, '4.653')] +[2024-12-19 10:27:08,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 4075520. Throughput: 0: 956.0. Samples: 444488. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:27:08,508][07135] Avg episode reward: [(0, '4.875')] +[2024-12-19 10:27:12,062][15938] Updated weights for policy 0, policy_version 1000 (0.0018) +[2024-12-19 10:27:13,502][07135] Fps is (10 sec: 4505.3, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 4100096. Throughput: 0: 1007.8. Samples: 451502. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:27:13,505][07135] Avg episode reward: [(0, '4.983')] +[2024-12-19 10:27:18,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 4116480. Throughput: 0: 978.3. Samples: 456286. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:27:18,507][07135] Avg episode reward: [(0, '4.918')] +[2024-12-19 10:27:23,503][07135] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 4132864. Throughput: 0: 955.3. Samples: 458860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:27:23,510][07135] Avg episode reward: [(0, '4.909')] +[2024-12-19 10:27:23,597][15938] Updated weights for policy 0, policy_version 1010 (0.0014) +[2024-12-19 10:27:28,502][07135] Fps is (10 sec: 4096.1, 60 sec: 4028.1, 300 sec: 3873.8). Total num frames: 4157440. Throughput: 0: 986.1. Samples: 465906. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:27:28,505][07135] Avg episode reward: [(0, '4.777')] +[2024-12-19 10:27:33,293][15938] Updated weights for policy 0, policy_version 1020 (0.0029) +[2024-12-19 10:27:33,502][07135] Fps is (10 sec: 4505.9, 60 sec: 3959.4, 300 sec: 3860.0). Total num frames: 4177920. Throughput: 0: 1001.7. Samples: 471618. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:27:33,508][07135] Avg episode reward: [(0, '4.593')] +[2024-12-19 10:27:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 4190208. Throughput: 0: 971.7. Samples: 473756. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:27:38,505][07135] Avg episode reward: [(0, '4.490')] +[2024-12-19 10:27:43,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4214784. Throughput: 0: 960.4. Samples: 480000. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:27:43,508][07135] Avg episode reward: [(0, '4.556')] +[2024-12-19 10:27:44,080][15938] Updated weights for policy 0, policy_version 1030 (0.0042) +[2024-12-19 10:27:48,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.9). Total num frames: 4235264. Throughput: 0: 1015.3. Samples: 486726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:27:48,504][07135] Avg episode reward: [(0, '4.647')] +[2024-12-19 10:27:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 4251648. Throughput: 0: 987.2. Samples: 488914. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:27:53,507][07135] Avg episode reward: [(0, '4.567')] +[2024-12-19 10:27:55,661][15938] Updated weights for policy 0, policy_version 1040 (0.0015) +[2024-12-19 10:27:58,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 4272128. Throughput: 0: 949.2. Samples: 494216. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:27:58,510][07135] Avg episode reward: [(0, '4.412')] +[2024-12-19 10:28:03,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 4292608. Throughput: 0: 1000.2. Samples: 501296. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:28:03,509][07135] Avg episode reward: [(0, '4.665')] +[2024-12-19 10:28:04,369][15938] Updated weights for policy 0, policy_version 1050 (0.0020) +[2024-12-19 10:28:08,503][07135] Fps is (10 sec: 4095.6, 60 sec: 3959.4, 300 sec: 3859.9). Total num frames: 4313088. Throughput: 0: 1006.9. Samples: 504170. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:28:08,508][07135] Avg episode reward: [(0, '4.837')] +[2024-12-19 10:28:13,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3860.0). Total num frames: 4329472. Throughput: 0: 945.0. Samples: 508432. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:28:13,507][07135] Avg episode reward: [(0, '4.877')] +[2024-12-19 10:28:16,044][15938] Updated weights for policy 0, policy_version 1060 (0.0029) +[2024-12-19 10:28:18,502][07135] Fps is (10 sec: 3686.8, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 4349952. Throughput: 0: 973.7. Samples: 515436. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:28:18,504][07135] Avg episode reward: [(0, '4.625')] +[2024-12-19 10:28:23,508][07135] Fps is (10 sec: 4502.8, 60 sec: 4027.4, 300 sec: 3873.8). Total num frames: 4374528. Throughput: 0: 1001.3. Samples: 518822. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:28:23,517][07135] Avg episode reward: [(0, '4.914')] +[2024-12-19 10:28:26,483][15938] Updated weights for policy 0, policy_version 1070 (0.0028) +[2024-12-19 10:28:28,502][07135] Fps is (10 sec: 3686.1, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 4386816. Throughput: 0: 966.4. Samples: 523488. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:28:28,505][07135] Avg episode reward: [(0, '4.922')] +[2024-12-19 10:28:33,502][07135] Fps is (10 sec: 3278.8, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 4407296. Throughput: 0: 955.0. Samples: 529700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:28:33,508][07135] Avg episode reward: [(0, '4.559')] +[2024-12-19 10:28:36,442][15938] Updated weights for policy 0, policy_version 1080 (0.0025) +[2024-12-19 10:28:38,502][07135] Fps is (10 sec: 4505.8, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 4431872. Throughput: 0: 982.5. Samples: 533128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:28:38,504][07135] Avg episode reward: [(0, '4.355')] +[2024-12-19 10:28:43,503][07135] Fps is (10 sec: 4095.5, 60 sec: 3891.1, 300 sec: 3846.1). Total num frames: 4448256. Throughput: 0: 989.2. Samples: 538732. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:28:43,509][07135] Avg episode reward: [(0, '4.223')] +[2024-12-19 10:28:48,107][15938] Updated weights for policy 0, policy_version 1090 (0.0013) +[2024-12-19 10:28:48,502][07135] Fps is (10 sec: 3276.9, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 4464640. Throughput: 0: 942.5. Samples: 543708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:28:48,509][07135] Avg episode reward: [(0, '4.511')] +[2024-12-19 10:28:48,525][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001090_4464640.pth... +[2024-12-19 10:28:48,650][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000865_3543040.pth +[2024-12-19 10:28:53,502][07135] Fps is (10 sec: 4096.5, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4489216. Throughput: 0: 955.0. Samples: 547146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:28:53,506][07135] Avg episode reward: [(0, '4.728')] +[2024-12-19 10:28:56,828][15938] Updated weights for policy 0, policy_version 1100 (0.0027) +[2024-12-19 10:28:58,502][07135] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 4509696. Throughput: 0: 1009.8. Samples: 553872. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:28:58,507][07135] Avg episode reward: [(0, '4.685')] +[2024-12-19 10:29:03,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 4521984. Throughput: 0: 947.9. Samples: 558090. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:29:03,504][07135] Avg episode reward: [(0, '4.748')] +[2024-12-19 10:29:08,262][15938] Updated weights for policy 0, policy_version 1110 (0.0025) +[2024-12-19 10:29:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3873.8). Total num frames: 4546560. Throughput: 0: 949.0. Samples: 561520. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:29:08,503][07135] Avg episode reward: [(0, '4.607')] +[2024-12-19 10:29:13,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 4567040. Throughput: 0: 999.8. Samples: 568480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:29:13,506][07135] Avg episode reward: [(0, '4.524')] +[2024-12-19 10:29:18,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 4583424. Throughput: 0: 969.5. Samples: 573328. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:29:18,504][07135] Avg episode reward: [(0, '4.618')] +[2024-12-19 10:29:19,204][15938] Updated weights for policy 0, policy_version 1120 (0.0028) +[2024-12-19 10:29:23,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3823.3, 300 sec: 3860.0). Total num frames: 4603904. Throughput: 0: 949.6. Samples: 575858. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:29:23,508][07135] Avg episode reward: [(0, '4.728')] +[2024-12-19 10:29:28,502][07135] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 4624384. Throughput: 0: 978.9. Samples: 582782. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:29:28,512][07135] Avg episode reward: [(0, '4.640')] +[2024-12-19 10:29:29,091][15938] Updated weights for policy 0, policy_version 1130 (0.0026) +[2024-12-19 10:29:33,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 4636672. Throughput: 0: 962.2. Samples: 587008. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:29:33,503][07135] Avg episode reward: [(0, '4.709')] +[2024-12-19 10:29:38,504][07135] Fps is (10 sec: 2456.9, 60 sec: 3618.0, 300 sec: 3818.3). Total num frames: 4648960. Throughput: 0: 923.2. Samples: 588692. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:29:38,510][07135] Avg episode reward: [(0, '4.666')] +[2024-12-19 10:29:43,306][15938] Updated weights for policy 0, policy_version 1140 (0.0021) +[2024-12-19 10:29:43,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3818.3). Total num frames: 4669440. Throughput: 0: 881.6. Samples: 593542. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:29:43,507][07135] Avg episode reward: [(0, '4.591')] +[2024-12-19 10:29:48,502][07135] Fps is (10 sec: 4097.0, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 4689920. Throughput: 0: 942.7. Samples: 600514. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:29:48,504][07135] Avg episode reward: [(0, '4.567')] +[2024-12-19 10:29:53,388][15938] Updated weights for policy 0, policy_version 1150 (0.0023) +[2024-12-19 10:29:53,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 4710400. Throughput: 0: 931.8. Samples: 603450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:29:53,506][07135] Avg episode reward: [(0, '4.565')] +[2024-12-19 10:29:58,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3873.8). Total num frames: 4726784. Throughput: 0: 874.0. Samples: 607810. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:29:58,506][07135] Avg episode reward: [(0, '4.773')] +[2024-12-19 10:30:03,503][07135] Fps is (10 sec: 3686.1, 60 sec: 3754.6, 300 sec: 3859.9). Total num frames: 4747264. Throughput: 0: 920.6. Samples: 614756. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:30:03,505][07135] Avg episode reward: [(0, '4.762')] +[2024-12-19 10:30:03,702][15938] Updated weights for policy 0, policy_version 1160 (0.0020) +[2024-12-19 10:30:08,502][07135] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 4771840. Throughput: 0: 941.4. Samples: 618220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:30:08,504][07135] Avg episode reward: [(0, '4.648')] +[2024-12-19 10:30:13,502][07135] Fps is (10 sec: 3686.6, 60 sec: 3618.1, 300 sec: 3860.0). Total num frames: 4784128. Throughput: 0: 889.9. Samples: 622828. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:30:13,510][07135] Avg episode reward: [(0, '4.702')] +[2024-12-19 10:30:15,345][15938] Updated weights for policy 0, policy_version 1170 (0.0017) +[2024-12-19 10:30:18,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3873.8). Total num frames: 4804608. Throughput: 0: 930.9. Samples: 628898. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:30:18,505][07135] Avg episode reward: [(0, '4.625')] +[2024-12-19 10:30:23,502][07135] Fps is (10 sec: 4505.7, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 4829184. Throughput: 0: 971.2. Samples: 632394. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:30:23,504][07135] Avg episode reward: [(0, '4.611')] +[2024-12-19 10:30:24,054][15938] Updated weights for policy 0, policy_version 1180 (0.0015) +[2024-12-19 10:30:28,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 4845568. Throughput: 0: 991.2. Samples: 638146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:30:28,506][07135] Avg episode reward: [(0, '4.614')] +[2024-12-19 10:30:33,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 4861952. Throughput: 0: 947.7. Samples: 643160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:30:33,508][07135] Avg episode reward: [(0, '4.532')] +[2024-12-19 10:30:35,585][15938] Updated weights for policy 0, policy_version 1190 (0.0021) +[2024-12-19 10:30:38,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.7, 300 sec: 3873.8). Total num frames: 4886528. Throughput: 0: 961.4. Samples: 646714. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:30:38,508][07135] Avg episode reward: [(0, '4.661')] +[2024-12-19 10:30:43,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4907008. Throughput: 0: 1010.7. Samples: 653292. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:30:43,508][07135] Avg episode reward: [(0, '4.762')] +[2024-12-19 10:30:46,126][15938] Updated weights for policy 0, policy_version 1200 (0.0023) +[2024-12-19 10:30:48,503][07135] Fps is (10 sec: 3276.2, 60 sec: 3822.8, 300 sec: 3859.9). Total num frames: 4919296. Throughput: 0: 952.6. Samples: 657624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:30:48,508][07135] Avg episode reward: [(0, '4.656')] +[2024-12-19 10:30:48,518][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001201_4919296.pth... +[2024-12-19 10:30:48,711][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000976_3997696.pth +[2024-12-19 10:30:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 4943872. Throughput: 0: 945.6. Samples: 660772. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:30:53,506][07135] Avg episode reward: [(0, '4.770')] +[2024-12-19 10:30:56,179][15938] Updated weights for policy 0, policy_version 1210 (0.0022) +[2024-12-19 10:30:58,502][07135] Fps is (10 sec: 4506.4, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 4964352. Throughput: 0: 997.8. Samples: 667730. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:30:58,505][07135] Avg episode reward: [(0, '4.645')] +[2024-12-19 10:31:03,502][07135] Fps is (10 sec: 3686.1, 60 sec: 3891.2, 300 sec: 3859.9). Total num frames: 4980736. Throughput: 0: 971.6. Samples: 672622. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:31:03,508][07135] Avg episode reward: [(0, '4.666')] +[2024-12-19 10:31:07,700][15938] Updated weights for policy 0, policy_version 1220 (0.0025) +[2024-12-19 10:31:08,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 4997120. Throughput: 0: 946.4. Samples: 674980. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:31:08,506][07135] Avg episode reward: [(0, '4.843')] +[2024-12-19 10:31:13,505][07135] Fps is (10 sec: 4094.8, 60 sec: 3959.2, 300 sec: 3859.9). Total num frames: 5021696. Throughput: 0: 973.7. Samples: 681968. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-19 10:31:13,511][07135] Avg episode reward: [(0, '4.993')] +[2024-12-19 10:31:16,839][15938] Updated weights for policy 0, policy_version 1230 (0.0017) +[2024-12-19 10:31:18,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 5042176. Throughput: 0: 992.6. Samples: 687826. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:31:18,504][07135] Avg episode reward: [(0, '4.934')] +[2024-12-19 10:31:23,502][07135] Fps is (10 sec: 3278.0, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 5054464. Throughput: 0: 958.7. Samples: 689856. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-19 10:31:23,503][07135] Avg episode reward: [(0, '4.917')] +[2024-12-19 10:31:28,112][15938] Updated weights for policy 0, policy_version 1240 (0.0023) +[2024-12-19 10:31:28,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 5079040. Throughput: 0: 952.0. Samples: 696130. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:31:28,507][07135] Avg episode reward: [(0, '4.963')] +[2024-12-19 10:31:33,502][07135] Fps is (10 sec: 4505.3, 60 sec: 3959.4, 300 sec: 3860.0). Total num frames: 5099520. Throughput: 0: 1009.4. Samples: 703046. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:31:33,506][07135] Avg episode reward: [(0, '4.955')] +[2024-12-19 10:31:38,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5115904. Throughput: 0: 984.0. Samples: 705050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:31:38,504][07135] Avg episode reward: [(0, '4.668')] +[2024-12-19 10:31:39,327][15938] Updated weights for policy 0, policy_version 1250 (0.0031) +[2024-12-19 10:31:43,502][07135] Fps is (10 sec: 3686.6, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5136384. Throughput: 0: 947.3. Samples: 710358. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:31:43,504][07135] Avg episode reward: [(0, '4.616')] +[2024-12-19 10:31:48,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3860.0). Total num frames: 5156864. Throughput: 0: 989.7. Samples: 717156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:31:48,507][07135] Avg episode reward: [(0, '4.672')] +[2024-12-19 10:31:48,603][15938] Updated weights for policy 0, policy_version 1260 (0.0022) +[2024-12-19 10:31:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5173248. Throughput: 0: 1001.8. Samples: 720060. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:31:53,506][07135] Avg episode reward: [(0, '4.689')] +[2024-12-19 10:31:58,502][07135] Fps is (10 sec: 3686.2, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5193728. Throughput: 0: 945.0. Samples: 724490. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:31:58,511][07135] Avg episode reward: [(0, '4.609')] +[2024-12-19 10:32:00,267][15938] Updated weights for policy 0, policy_version 1270 (0.0024) +[2024-12-19 10:32:03,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 5214208. Throughput: 0: 967.8. Samples: 731376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:32:03,504][07135] Avg episode reward: [(0, '4.744')] +[2024-12-19 10:32:08,509][07135] Fps is (10 sec: 4093.0, 60 sec: 3959.0, 300 sec: 3846.0). Total num frames: 5234688. Throughput: 0: 999.4. Samples: 734838. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:32:08,513][07135] Avg episode reward: [(0, '4.537')] +[2024-12-19 10:32:10,108][15938] Updated weights for policy 0, policy_version 1280 (0.0027) +[2024-12-19 10:32:13,504][07135] Fps is (10 sec: 3685.7, 60 sec: 3823.0, 300 sec: 3846.1). Total num frames: 5251072. Throughput: 0: 963.5. Samples: 739488. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:32:13,505][07135] Avg episode reward: [(0, '4.396')] +[2024-12-19 10:32:18,502][07135] Fps is (10 sec: 3689.2, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5271552. Throughput: 0: 943.4. Samples: 745500. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:32:18,503][07135] Avg episode reward: [(0, '4.494')] +[2024-12-19 10:32:20,757][15938] Updated weights for policy 0, policy_version 1290 (0.0031) +[2024-12-19 10:32:23,502][07135] Fps is (10 sec: 4506.5, 60 sec: 4027.7, 300 sec: 3860.0). Total num frames: 5296128. Throughput: 0: 976.6. Samples: 748998. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:32:23,507][07135] Avg episode reward: [(0, '4.603')] +[2024-12-19 10:32:28,505][07135] Fps is (10 sec: 4094.5, 60 sec: 3891.0, 300 sec: 3846.0). Total num frames: 5312512. Throughput: 0: 982.7. Samples: 754584. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:32:28,508][07135] Avg episode reward: [(0, '4.675')] +[2024-12-19 10:32:32,226][15938] Updated weights for policy 0, policy_version 1300 (0.0013) +[2024-12-19 10:32:33,502][07135] Fps is (10 sec: 3276.6, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5328896. Throughput: 0: 945.8. Samples: 759716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:32:33,507][07135] Avg episode reward: [(0, '4.647')] +[2024-12-19 10:32:38,502][07135] Fps is (10 sec: 4097.5, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 5353472. Throughput: 0: 959.5. Samples: 763236. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:32:38,504][07135] Avg episode reward: [(0, '4.542')] +[2024-12-19 10:32:40,953][15938] Updated weights for policy 0, policy_version 1310 (0.0028) +[2024-12-19 10:32:43,502][07135] Fps is (10 sec: 4096.2, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5369856. Throughput: 0: 1008.1. Samples: 769854. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:32:43,510][07135] Avg episode reward: [(0, '4.710')] +[2024-12-19 10:32:48,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5386240. Throughput: 0: 948.7. Samples: 774068. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:32:48,506][07135] Avg episode reward: [(0, '4.644')] +[2024-12-19 10:32:48,523][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001315_5386240.pth... +[2024-12-19 10:32:48,712][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001090_4464640.pth +[2024-12-19 10:32:52,673][15938] Updated weights for policy 0, policy_version 1320 (0.0029) +[2024-12-19 10:32:53,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5406720. Throughput: 0: 945.6. Samples: 777382. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:32:53,507][07135] Avg episode reward: [(0, '4.515')] +[2024-12-19 10:32:58,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 5431296. Throughput: 0: 996.3. Samples: 784320. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:32:58,507][07135] Avg episode reward: [(0, '4.630')] +[2024-12-19 10:33:03,217][15938] Updated weights for policy 0, policy_version 1330 (0.0014) +[2024-12-19 10:33:03,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5447680. Throughput: 0: 971.3. Samples: 789208. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:33:03,506][07135] Avg episode reward: [(0, '4.667')] +[2024-12-19 10:33:08,502][07135] Fps is (10 sec: 3276.7, 60 sec: 3823.4, 300 sec: 3846.1). Total num frames: 5464064. Throughput: 0: 946.4. Samples: 791586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:33:08,510][07135] Avg episode reward: [(0, '4.856')] +[2024-12-19 10:33:13,183][15938] Updated weights for policy 0, policy_version 1340 (0.0020) +[2024-12-19 10:33:13,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3860.0). Total num frames: 5488640. Throughput: 0: 974.4. Samples: 798430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:33:13,505][07135] Avg episode reward: [(0, '4.830')] +[2024-12-19 10:33:18,502][07135] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3832.3). Total num frames: 5505024. Throughput: 0: 989.4. Samples: 804238. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:33:18,504][07135] Avg episode reward: [(0, '4.824')] +[2024-12-19 10:33:23,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 5521408. Throughput: 0: 955.6. Samples: 806238. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:33:23,505][07135] Avg episode reward: [(0, '4.826')] +[2024-12-19 10:33:24,760][15938] Updated weights for policy 0, policy_version 1350 (0.0033) +[2024-12-19 10:33:28,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.4, 300 sec: 3860.0). Total num frames: 5545984. Throughput: 0: 949.0. Samples: 812560. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:33:28,512][07135] Avg episode reward: [(0, '4.666')] +[2024-12-19 10:33:33,503][07135] Fps is (10 sec: 4504.9, 60 sec: 3959.4, 300 sec: 3846.1). Total num frames: 5566464. Throughput: 0: 1008.4. Samples: 819446. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:33:33,510][07135] Avg episode reward: [(0, '4.782')] +[2024-12-19 10:33:33,747][15938] Updated weights for policy 0, policy_version 1360 (0.0020) +[2024-12-19 10:33:38,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5582848. Throughput: 0: 981.6. Samples: 821552. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:33:38,504][07135] Avg episode reward: [(0, '4.573')] +[2024-12-19 10:33:43,502][07135] Fps is (10 sec: 3687.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 5603328. Throughput: 0: 945.9. Samples: 826886. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:33:43,514][07135] Avg episode reward: [(0, '4.268')] +[2024-12-19 10:33:45,242][15938] Updated weights for policy 0, policy_version 1370 (0.0024) +[2024-12-19 10:33:48,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 5623808. Throughput: 0: 989.6. Samples: 833740. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:33:48,508][07135] Avg episode reward: [(0, '4.651')] +[2024-12-19 10:33:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 5640192. Throughput: 0: 1000.7. Samples: 836616. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:33:53,510][07135] Avg episode reward: [(0, '4.738')] +[2024-12-19 10:33:56,573][15938] Updated weights for policy 0, policy_version 1380 (0.0035) +[2024-12-19 10:33:58,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 5656576. Throughput: 0: 946.7. Samples: 841030. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:33:58,504][07135] Avg episode reward: [(0, '4.653')] +[2024-12-19 10:34:03,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5681152. Throughput: 0: 975.6. Samples: 848142. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:34:03,509][07135] Avg episode reward: [(0, '4.806')] +[2024-12-19 10:34:05,580][15938] Updated weights for policy 0, policy_version 1390 (0.0030) +[2024-12-19 10:34:08,504][07135] Fps is (10 sec: 4504.4, 60 sec: 3959.3, 300 sec: 3846.0). Total num frames: 5701632. Throughput: 0: 1006.2. Samples: 851520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:34:08,512][07135] Avg episode reward: [(0, '4.705')] +[2024-12-19 10:34:13,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5713920. Throughput: 0: 957.7. Samples: 855656. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:34:13,508][07135] Avg episode reward: [(0, '4.837')] +[2024-12-19 10:34:18,502][07135] Fps is (10 sec: 2458.2, 60 sec: 3686.4, 300 sec: 3804.4). Total num frames: 5726208. Throughput: 0: 880.8. Samples: 859080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:34:18,508][07135] Avg episode reward: [(0, '4.803')] +[2024-12-19 10:34:19,871][15938] Updated weights for policy 0, policy_version 1400 (0.0035) +[2024-12-19 10:34:23,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 5750784. Throughput: 0: 911.0. Samples: 862546. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:34:23,508][07135] Avg episode reward: [(0, '4.942')] +[2024-12-19 10:34:28,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 5771264. Throughput: 0: 947.6. Samples: 869528. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:34:28,505][07135] Avg episode reward: [(0, '4.858')] +[2024-12-19 10:34:29,317][15938] Updated weights for policy 0, policy_version 1410 (0.0015) +[2024-12-19 10:34:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3860.0). Total num frames: 5787648. Throughput: 0: 893.3. Samples: 873938. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:34:33,506][07135] Avg episode reward: [(0, '4.964')] +[2024-12-19 10:34:38,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 5808128. Throughput: 0: 894.0. Samples: 876846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:34:38,509][07135] Avg episode reward: [(0, '4.859')] +[2024-12-19 10:34:40,288][15938] Updated weights for policy 0, policy_version 1420 (0.0025) +[2024-12-19 10:34:43,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 5828608. Throughput: 0: 947.8. Samples: 883680. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:34:43,504][07135] Avg episode reward: [(0, '4.666')] +[2024-12-19 10:34:48,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 5844992. Throughput: 0: 907.5. Samples: 888980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:34:48,504][07135] Avg episode reward: [(0, '4.726')] +[2024-12-19 10:34:48,521][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001427_5844992.pth... +[2024-12-19 10:34:48,708][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001201_4919296.pth +[2024-12-19 10:34:52,007][15938] Updated weights for policy 0, policy_version 1430 (0.0042) +[2024-12-19 10:34:53,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 5861376. Throughput: 0: 878.8. Samples: 891062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:34:53,509][07135] Avg episode reward: [(0, '4.904')] +[2024-12-19 10:34:58,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5885952. Throughput: 0: 937.1. Samples: 897824. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:34:58,504][07135] Avg episode reward: [(0, '4.671')] +[2024-12-19 10:35:00,815][15938] Updated weights for policy 0, policy_version 1440 (0.0020) +[2024-12-19 10:35:03,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 5906432. Throughput: 0: 1002.7. Samples: 904200. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:35:03,504][07135] Avg episode reward: [(0, '4.624')] +[2024-12-19 10:35:08,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3846.1). Total num frames: 5918720. Throughput: 0: 971.4. Samples: 906258. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:35:08,507][07135] Avg episode reward: [(0, '4.564')] +[2024-12-19 10:35:12,309][15938] Updated weights for policy 0, policy_version 1450 (0.0027) +[2024-12-19 10:35:13,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5943296. Throughput: 0: 947.9. Samples: 912182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:35:13,503][07135] Avg episode reward: [(0, '4.302')] +[2024-12-19 10:35:18,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 5963776. Throughput: 0: 998.1. Samples: 918854. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-19 10:35:18,507][07135] Avg episode reward: [(0, '4.376')] +[2024-12-19 10:35:22,822][15938] Updated weights for policy 0, policy_version 1460 (0.0020) +[2024-12-19 10:35:23,504][07135] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3846.0). Total num frames: 5980160. Throughput: 0: 987.2. Samples: 921272. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:35:23,507][07135] Avg episode reward: [(0, '4.500')] +[2024-12-19 10:35:28,502][07135] Fps is (10 sec: 3276.5, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 5996544. Throughput: 0: 942.8. Samples: 926108. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:35:28,508][07135] Avg episode reward: [(0, '4.603')] +[2024-12-19 10:35:33,050][15938] Updated weights for policy 0, policy_version 1470 (0.0017) +[2024-12-19 10:35:33,502][07135] Fps is (10 sec: 4097.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6021120. Throughput: 0: 975.6. Samples: 932884. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:35:33,505][07135] Avg episode reward: [(0, '4.738')] +[2024-12-19 10:35:38,502][07135] Fps is (10 sec: 4505.9, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6041600. Throughput: 0: 1003.4. Samples: 936214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:35:38,506][07135] Avg episode reward: [(0, '4.636')] +[2024-12-19 10:35:43,504][07135] Fps is (10 sec: 3275.9, 60 sec: 3754.5, 300 sec: 3846.1). Total num frames: 6053888. Throughput: 0: 947.1. Samples: 940448. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:35:43,510][07135] Avg episode reward: [(0, '4.399')] +[2024-12-19 10:35:44,719][15938] Updated weights for policy 0, policy_version 1480 (0.0037) +[2024-12-19 10:35:48,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6078464. Throughput: 0: 949.0. Samples: 946906. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:35:48,504][07135] Avg episode reward: [(0, '4.386')] +[2024-12-19 10:35:53,502][07135] Fps is (10 sec: 4506.8, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 6098944. Throughput: 0: 978.9. Samples: 950308. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:35:53,507][07135] Avg episode reward: [(0, '4.439')] +[2024-12-19 10:35:54,029][15938] Updated weights for policy 0, policy_version 1490 (0.0024) +[2024-12-19 10:35:58,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6111232. Throughput: 0: 952.6. Samples: 955048. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:35:58,507][07135] Avg episode reward: [(0, '4.466')] +[2024-12-19 10:36:03,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 6131712. Throughput: 0: 926.2. Samples: 960534. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:36:03,508][07135] Avg episode reward: [(0, '4.493')] +[2024-12-19 10:36:05,735][15938] Updated weights for policy 0, policy_version 1500 (0.0025) +[2024-12-19 10:36:08,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 6152192. Throughput: 0: 946.5. Samples: 963862. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:36:08,514][07135] Avg episode reward: [(0, '4.557')] +[2024-12-19 10:36:13,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6172672. Throughput: 0: 966.1. Samples: 969580. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:36:13,506][07135] Avg episode reward: [(0, '4.492')] +[2024-12-19 10:36:17,746][15938] Updated weights for policy 0, policy_version 1510 (0.0020) +[2024-12-19 10:36:18,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 6184960. Throughput: 0: 917.9. Samples: 974188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:36:18,507][07135] Avg episode reward: [(0, '4.407')] +[2024-12-19 10:36:23,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3823.1, 300 sec: 3832.2). Total num frames: 6209536. Throughput: 0: 922.2. Samples: 977712. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:36:23,504][07135] Avg episode reward: [(0, '4.647')] +[2024-12-19 10:36:26,253][15938] Updated weights for policy 0, policy_version 1520 (0.0021) +[2024-12-19 10:36:28,502][07135] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 6234112. Throughput: 0: 986.3. Samples: 984828. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:36:28,505][07135] Avg episode reward: [(0, '4.842')] +[2024-12-19 10:36:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6246400. Throughput: 0: 937.7. Samples: 989104. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-12-19 10:36:33,504][07135] Avg episode reward: [(0, '4.667')] +[2024-12-19 10:36:37,786][15938] Updated weights for policy 0, policy_version 1530 (0.0030) +[2024-12-19 10:36:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6266880. Throughput: 0: 929.2. Samples: 992124. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:36:38,504][07135] Avg episode reward: [(0, '4.615')] +[2024-12-19 10:36:43,502][07135] Fps is (10 sec: 4505.5, 60 sec: 3959.6, 300 sec: 3846.1). Total num frames: 6291456. Throughput: 0: 982.1. Samples: 999242. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:36:43,508][07135] Avg episode reward: [(0, '4.737')] +[2024-12-19 10:36:47,945][15938] Updated weights for policy 0, policy_version 1540 (0.0021) +[2024-12-19 10:36:48,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6307840. Throughput: 0: 976.3. Samples: 1004468. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:36:48,506][07135] Avg episode reward: [(0, '4.524')] +[2024-12-19 10:36:48,517][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001540_6307840.pth... +[2024-12-19 10:36:48,679][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001315_5386240.pth +[2024-12-19 10:36:53,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6324224. Throughput: 0: 949.3. Samples: 1006582. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:36:53,507][07135] Avg episode reward: [(0, '4.445')] +[2024-12-19 10:36:58,080][15938] Updated weights for policy 0, policy_version 1550 (0.0020) +[2024-12-19 10:36:58,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 6348800. Throughput: 0: 973.9. Samples: 1013404. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:36:58,503][07135] Avg episode reward: [(0, '4.542')] +[2024-12-19 10:37:03,502][07135] Fps is (10 sec: 4505.8, 60 sec: 3959.5, 300 sec: 3846.2). Total num frames: 6369280. Throughput: 0: 1012.5. Samples: 1019750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:37:03,506][07135] Avg episode reward: [(0, '4.514')] +[2024-12-19 10:37:08,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6381568. Throughput: 0: 980.7. Samples: 1021842. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:37:08,508][07135] Avg episode reward: [(0, '4.552')] +[2024-12-19 10:37:09,774][15938] Updated weights for policy 0, policy_version 1560 (0.0041) +[2024-12-19 10:37:13,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6406144. Throughput: 0: 954.5. Samples: 1027780. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:37:13,508][07135] Avg episode reward: [(0, '4.579')] +[2024-12-19 10:37:18,351][15938] Updated weights for policy 0, policy_version 1570 (0.0038) +[2024-12-19 10:37:18,502][07135] Fps is (10 sec: 4915.2, 60 sec: 4096.0, 300 sec: 3846.1). Total num frames: 6430720. Throughput: 0: 1013.4. Samples: 1034708. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:37:18,504][07135] Avg episode reward: [(0, '4.551')] +[2024-12-19 10:37:23,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 6443008. Throughput: 0: 999.2. Samples: 1037086. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:37:23,505][07135] Avg episode reward: [(0, '4.563')] +[2024-12-19 10:37:28,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6463488. Throughput: 0: 949.1. Samples: 1041952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:37:28,505][07135] Avg episode reward: [(0, '4.418')] +[2024-12-19 10:37:30,089][15938] Updated weights for policy 0, policy_version 1580 (0.0041) +[2024-12-19 10:37:33,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 6483968. Throughput: 0: 988.7. Samples: 1048960. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:37:33,504][07135] Avg episode reward: [(0, '4.613')] +[2024-12-19 10:37:38,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 6504448. Throughput: 0: 1018.1. Samples: 1052396. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:37:38,507][07135] Avg episode reward: [(0, '4.848')] +[2024-12-19 10:37:40,268][15938] Updated weights for policy 0, policy_version 1590 (0.0020) +[2024-12-19 10:37:43,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6520832. Throughput: 0: 962.3. Samples: 1056708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:37:43,507][07135] Avg episode reward: [(0, '4.589')] +[2024-12-19 10:37:48,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6541312. Throughput: 0: 967.2. Samples: 1063276. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:37:48,508][07135] Avg episode reward: [(0, '4.535')] +[2024-12-19 10:37:50,413][15938] Updated weights for policy 0, policy_version 1600 (0.0016) +[2024-12-19 10:37:53,502][07135] Fps is (10 sec: 4505.7, 60 sec: 4027.8, 300 sec: 3846.1). Total num frames: 6565888. Throughput: 0: 998.1. Samples: 1066758. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:37:53,509][07135] Avg episode reward: [(0, '4.564')] +[2024-12-19 10:37:58,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6582272. Throughput: 0: 982.7. Samples: 1072000. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:37:58,507][07135] Avg episode reward: [(0, '4.605')] +[2024-12-19 10:38:01,866][15938] Updated weights for policy 0, policy_version 1610 (0.0018) +[2024-12-19 10:38:03,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6598656. Throughput: 0: 953.7. Samples: 1077626. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:38:03,503][07135] Avg episode reward: [(0, '4.439')] +[2024-12-19 10:38:08,502][07135] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3846.1). Total num frames: 6623232. Throughput: 0: 978.2. Samples: 1081104. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:38:08,508][07135] Avg episode reward: [(0, '4.685')] +[2024-12-19 10:38:10,564][15938] Updated weights for policy 0, policy_version 1620 (0.0016) +[2024-12-19 10:38:13,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 6643712. Throughput: 0: 1008.7. Samples: 1087344. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:38:13,506][07135] Avg episode reward: [(0, '4.895')] +[2024-12-19 10:38:18,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 6656000. Throughput: 0: 954.4. Samples: 1091910. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:38:18,504][07135] Avg episode reward: [(0, '4.740')] +[2024-12-19 10:38:22,108][15938] Updated weights for policy 0, policy_version 1630 (0.0027) +[2024-12-19 10:38:23,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 6680576. Throughput: 0: 955.9. Samples: 1095412. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:38:23,509][07135] Avg episode reward: [(0, '4.579')] +[2024-12-19 10:38:28,502][07135] Fps is (10 sec: 4915.2, 60 sec: 4027.7, 300 sec: 3860.0). Total num frames: 6705152. Throughput: 0: 1017.7. Samples: 1102506. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:38:28,504][07135] Avg episode reward: [(0, '4.554')] +[2024-12-19 10:38:32,614][15938] Updated weights for policy 0, policy_version 1640 (0.0025) +[2024-12-19 10:38:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6717440. Throughput: 0: 972.5. Samples: 1107038. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:38:33,510][07135] Avg episode reward: [(0, '4.442')] +[2024-12-19 10:38:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6737920. Throughput: 0: 959.2. Samples: 1109920. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:38:38,503][07135] Avg episode reward: [(0, '4.430')] +[2024-12-19 10:38:42,199][15938] Updated weights for policy 0, policy_version 1650 (0.0014) +[2024-12-19 10:38:43,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.8, 300 sec: 3860.0). Total num frames: 6762496. Throughput: 0: 1002.5. Samples: 1117112. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:38:43,508][07135] Avg episode reward: [(0, '4.710')] +[2024-12-19 10:38:48,503][07135] Fps is (10 sec: 4095.6, 60 sec: 3959.4, 300 sec: 3859.9). Total num frames: 6778880. Throughput: 0: 987.8. Samples: 1122080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:38:48,505][07135] Avg episode reward: [(0, '4.741')] +[2024-12-19 10:38:48,529][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001655_6778880.pth... +[2024-12-19 10:38:48,790][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001427_5844992.pth +[2024-12-19 10:38:53,503][07135] Fps is (10 sec: 2866.9, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 6791168. Throughput: 0: 948.1. Samples: 1123770. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:38:53,509][07135] Avg episode reward: [(0, '4.675')] +[2024-12-19 10:38:56,514][15938] Updated weights for policy 0, policy_version 1660 (0.0019) +[2024-12-19 10:38:58,502][07135] Fps is (10 sec: 2867.4, 60 sec: 3754.6, 300 sec: 3818.3). Total num frames: 6807552. Throughput: 0: 903.9. Samples: 1128020. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:38:58,511][07135] Avg episode reward: [(0, '4.485')] +[2024-12-19 10:39:03,502][07135] Fps is (10 sec: 3686.8, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 6828032. Throughput: 0: 960.6. Samples: 1135138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:39:03,505][07135] Avg episode reward: [(0, '4.479')] +[2024-12-19 10:39:05,207][15938] Updated weights for policy 0, policy_version 1670 (0.0017) +[2024-12-19 10:39:08,502][07135] Fps is (10 sec: 4096.1, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 6848512. Throughput: 0: 947.5. Samples: 1138048. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:39:08,504][07135] Avg episode reward: [(0, '4.441')] +[2024-12-19 10:39:13,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 6864896. Throughput: 0: 887.8. Samples: 1142456. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:39:13,504][07135] Avg episode reward: [(0, '4.760')] +[2024-12-19 10:39:16,683][15938] Updated weights for policy 0, policy_version 1680 (0.0019) +[2024-12-19 10:39:18,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6889472. Throughput: 0: 943.5. Samples: 1149494. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:39:18,508][07135] Avg episode reward: [(0, '5.061')] +[2024-12-19 10:39:23,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 6909952. Throughput: 0: 957.1. Samples: 1152988. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:39:23,504][07135] Avg episode reward: [(0, '5.036')] +[2024-12-19 10:39:27,157][15938] Updated weights for policy 0, policy_version 1690 (0.0028) +[2024-12-19 10:39:28,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3846.1). Total num frames: 6922240. Throughput: 0: 902.6. Samples: 1157728. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:39:28,507][07135] Avg episode reward: [(0, '4.919')] +[2024-12-19 10:39:33,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 6946816. Throughput: 0: 931.2. Samples: 1163982. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:39:33,505][07135] Avg episode reward: [(0, '4.644')] +[2024-12-19 10:39:36,970][15938] Updated weights for policy 0, policy_version 1700 (0.0033) +[2024-12-19 10:39:38,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 6967296. Throughput: 0: 970.1. Samples: 1167422. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:39:38,508][07135] Avg episode reward: [(0, '4.577')] +[2024-12-19 10:39:43,506][07135] Fps is (10 sec: 3684.8, 60 sec: 3686.1, 300 sec: 3859.9). Total num frames: 6983680. Throughput: 0: 1002.0. Samples: 1173116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:39:43,509][07135] Avg episode reward: [(0, '4.562')] +[2024-12-19 10:39:48,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3860.0). Total num frames: 7000064. Throughput: 0: 953.1. Samples: 1178028. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:39:48,506][07135] Avg episode reward: [(0, '4.446')] +[2024-12-19 10:39:48,738][15938] Updated weights for policy 0, policy_version 1710 (0.0014) +[2024-12-19 10:39:53,502][07135] Fps is (10 sec: 4097.9, 60 sec: 3891.3, 300 sec: 3860.0). Total num frames: 7024640. Throughput: 0: 959.6. Samples: 1181232. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:39:53,504][07135] Avg episode reward: [(0, '4.648')] +[2024-12-19 10:39:58,502][07135] Fps is (10 sec: 4095.7, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 7041024. Throughput: 0: 1005.3. Samples: 1187694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:39:58,511][07135] Avg episode reward: [(0, '4.611')] +[2024-12-19 10:39:58,514][15938] Updated weights for policy 0, policy_version 1720 (0.0015) +[2024-12-19 10:40:03,503][07135] Fps is (10 sec: 3276.3, 60 sec: 3822.8, 300 sec: 3859.9). Total num frames: 7057408. Throughput: 0: 943.1. Samples: 1191936. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:40:03,509][07135] Avg episode reward: [(0, '4.681')] +[2024-12-19 10:40:08,502][07135] Fps is (10 sec: 4096.3, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7081984. Throughput: 0: 941.0. Samples: 1195334. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:40:08,504][07135] Avg episode reward: [(0, '4.924')] +[2024-12-19 10:40:09,229][15938] Updated weights for policy 0, policy_version 1730 (0.0025) +[2024-12-19 10:40:13,502][07135] Fps is (10 sec: 4506.3, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7102464. Throughput: 0: 991.2. Samples: 1202332. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:40:13,504][07135] Avg episode reward: [(0, '4.835')] +[2024-12-19 10:40:18,505][07135] Fps is (10 sec: 3685.1, 60 sec: 3822.7, 300 sec: 3859.9). Total num frames: 7118848. Throughput: 0: 959.1. Samples: 1207146. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-12-19 10:40:18,512][07135] Avg episode reward: [(0, '4.766')] +[2024-12-19 10:40:20,897][15938] Updated weights for policy 0, policy_version 1740 (0.0023) +[2024-12-19 10:40:23,505][07135] Fps is (10 sec: 3275.6, 60 sec: 3754.4, 300 sec: 3859.9). Total num frames: 7135232. Throughput: 0: 931.1. Samples: 1209326. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:40:23,512][07135] Avg episode reward: [(0, '4.622')] +[2024-12-19 10:40:28,502][07135] Fps is (10 sec: 4097.5, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7159808. Throughput: 0: 959.8. Samples: 1216302. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:40:28,506][07135] Avg episode reward: [(0, '4.626')] +[2024-12-19 10:40:29,717][15938] Updated weights for policy 0, policy_version 1750 (0.0020) +[2024-12-19 10:40:33,502][07135] Fps is (10 sec: 4507.2, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7180288. Throughput: 0: 979.3. Samples: 1222098. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-12-19 10:40:33,506][07135] Avg episode reward: [(0, '4.728')] +[2024-12-19 10:40:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 7192576. Throughput: 0: 954.8. Samples: 1224198. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:40:38,504][07135] Avg episode reward: [(0, '4.724')] +[2024-12-19 10:40:41,516][15938] Updated weights for policy 0, policy_version 1760 (0.0021) +[2024-12-19 10:40:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.5, 300 sec: 3860.0). Total num frames: 7217152. Throughput: 0: 950.6. Samples: 1230470. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-19 10:40:43,504][07135] Avg episode reward: [(0, '4.762')] +[2024-12-19 10:40:48,502][07135] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7237632. Throughput: 0: 1000.6. Samples: 1236960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:40:48,506][07135] Avg episode reward: [(0, '4.707')] +[2024-12-19 10:40:48,525][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001767_7237632.pth... +[2024-12-19 10:40:48,733][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001540_6307840.pth +[2024-12-19 10:40:52,447][15938] Updated weights for policy 0, policy_version 1770 (0.0023) +[2024-12-19 10:40:53,502][07135] Fps is (10 sec: 3276.7, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 7249920. Throughput: 0: 968.7. Samples: 1238926. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:40:53,511][07135] Avg episode reward: [(0, '4.631')] +[2024-12-19 10:40:58,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3860.0). Total num frames: 7270400. Throughput: 0: 925.3. Samples: 1243972. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:40:58,505][07135] Avg episode reward: [(0, '4.846')] +[2024-12-19 10:41:02,833][15938] Updated weights for policy 0, policy_version 1780 (0.0014) +[2024-12-19 10:41:03,502][07135] Fps is (10 sec: 4096.1, 60 sec: 3891.3, 300 sec: 3860.0). Total num frames: 7290880. Throughput: 0: 965.1. Samples: 1250574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:41:03,503][07135] Avg episode reward: [(0, '5.026')] +[2024-12-19 10:41:08,502][07135] Fps is (10 sec: 3686.2, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 7307264. Throughput: 0: 978.1. Samples: 1253338. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:41:08,506][07135] Avg episode reward: [(0, '4.908')] +[2024-12-19 10:41:13,507][07135] Fps is (10 sec: 3275.0, 60 sec: 3686.1, 300 sec: 3859.9). Total num frames: 7323648. Throughput: 0: 914.8. Samples: 1257472. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:41:13,513][07135] Avg episode reward: [(0, '4.830')] +[2024-12-19 10:41:14,971][15938] Updated weights for policy 0, policy_version 1790 (0.0028) +[2024-12-19 10:41:18,502][07135] Fps is (10 sec: 3686.6, 60 sec: 3754.9, 300 sec: 3846.1). Total num frames: 7344128. Throughput: 0: 932.0. Samples: 1264036. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:41:18,507][07135] Avg episode reward: [(0, '4.980')] +[2024-12-19 10:41:23,503][07135] Fps is (10 sec: 4097.5, 60 sec: 3823.1, 300 sec: 3832.2). Total num frames: 7364608. Throughput: 0: 954.2. Samples: 1267138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:41:23,506][07135] Avg episode reward: [(0, '5.035')] +[2024-12-19 10:41:25,301][15938] Updated weights for policy 0, policy_version 1800 (0.0030) +[2024-12-19 10:41:28,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 7380992. Throughput: 0: 916.6. Samples: 1271716. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:41:28,508][07135] Avg episode reward: [(0, '4.938')] +[2024-12-19 10:41:33,502][07135] Fps is (10 sec: 3687.0, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 7401472. Throughput: 0: 901.6. Samples: 1277532. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:41:33,509][07135] Avg episode reward: [(0, '4.708')] +[2024-12-19 10:41:36,128][15938] Updated weights for policy 0, policy_version 1810 (0.0013) +[2024-12-19 10:41:38,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 7421952. Throughput: 0: 933.2. Samples: 1280920. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:41:38,503][07135] Avg episode reward: [(0, '4.635')] +[2024-12-19 10:41:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 7438336. Throughput: 0: 946.8. Samples: 1286580. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:41:43,507][07135] Avg episode reward: [(0, '4.728')] +[2024-12-19 10:41:47,811][15938] Updated weights for policy 0, policy_version 1820 (0.0024) +[2024-12-19 10:41:48,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3832.2). Total num frames: 7454720. Throughput: 0: 911.2. Samples: 1291580. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:41:48,504][07135] Avg episode reward: [(0, '4.879')] +[2024-12-19 10:41:53,502][07135] Fps is (10 sec: 4095.9, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 7479296. Throughput: 0: 923.8. Samples: 1294910. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:41:53,504][07135] Avg episode reward: [(0, '4.921')] +[2024-12-19 10:41:57,063][15938] Updated weights for policy 0, policy_version 1830 (0.0015) +[2024-12-19 10:41:58,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 7499776. Throughput: 0: 975.4. Samples: 1301362. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:41:58,505][07135] Avg episode reward: [(0, '4.785')] +[2024-12-19 10:42:03,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 7512064. Throughput: 0: 923.3. Samples: 1305584. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:42:03,509][07135] Avg episode reward: [(0, '4.683')] +[2024-12-19 10:42:08,415][15938] Updated weights for policy 0, policy_version 1840 (0.0021) +[2024-12-19 10:42:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 7536640. Throughput: 0: 931.4. Samples: 1309050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:42:08,503][07135] Avg episode reward: [(0, '4.599')] +[2024-12-19 10:42:13,504][07135] Fps is (10 sec: 4504.4, 60 sec: 3891.4, 300 sec: 3818.3). Total num frames: 7557120. Throughput: 0: 983.5. Samples: 1315974. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:42:13,507][07135] Avg episode reward: [(0, '4.816')] +[2024-12-19 10:42:18,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 7573504. Throughput: 0: 956.8. Samples: 1320586. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:42:18,508][07135] Avg episode reward: [(0, '4.813')] +[2024-12-19 10:42:19,848][15938] Updated weights for policy 0, policy_version 1850 (0.0018) +[2024-12-19 10:42:23,502][07135] Fps is (10 sec: 3277.6, 60 sec: 3754.8, 300 sec: 3818.3). Total num frames: 7589888. Throughput: 0: 939.5. Samples: 1323196. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:42:23,510][07135] Avg episode reward: [(0, '4.691')] +[2024-12-19 10:42:28,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 7614464. Throughput: 0: 965.2. Samples: 1330016. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:42:28,505][07135] Avg episode reward: [(0, '4.682')] +[2024-12-19 10:42:28,926][15938] Updated weights for policy 0, policy_version 1860 (0.0031) +[2024-12-19 10:42:33,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 7630848. Throughput: 0: 972.5. Samples: 1335342. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:42:33,508][07135] Avg episode reward: [(0, '4.660')] +[2024-12-19 10:42:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 7647232. Throughput: 0: 942.7. Samples: 1337330. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:42:38,510][07135] Avg episode reward: [(0, '4.630')] +[2024-12-19 10:42:41,044][15938] Updated weights for policy 0, policy_version 1870 (0.0035) +[2024-12-19 10:42:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 7667712. Throughput: 0: 940.9. Samples: 1343704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:42:43,508][07135] Avg episode reward: [(0, '4.525')] +[2024-12-19 10:42:48,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 7688192. Throughput: 0: 984.1. Samples: 1349870. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:42:48,508][07135] Avg episode reward: [(0, '4.690')] +[2024-12-19 10:42:48,521][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001877_7688192.pth... +[2024-12-19 10:42:48,718][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001655_6778880.pth +[2024-12-19 10:42:52,144][15938] Updated weights for policy 0, policy_version 1880 (0.0022) +[2024-12-19 10:42:53,505][07135] Fps is (10 sec: 3275.6, 60 sec: 3686.2, 300 sec: 3790.5). Total num frames: 7700480. Throughput: 0: 949.3. Samples: 1351772. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:42:53,510][07135] Avg episode reward: [(0, '4.846')] +[2024-12-19 10:42:58,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 7725056. Throughput: 0: 920.2. Samples: 1357382. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:42:58,509][07135] Avg episode reward: [(0, '4.724')] +[2024-12-19 10:43:01,933][15938] Updated weights for policy 0, policy_version 1890 (0.0018) +[2024-12-19 10:43:03,502][07135] Fps is (10 sec: 4507.3, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 7745536. Throughput: 0: 969.5. Samples: 1364214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:43:03,504][07135] Avg episode reward: [(0, '4.650')] +[2024-12-19 10:43:08,505][07135] Fps is (10 sec: 3685.1, 60 sec: 3754.4, 300 sec: 3790.5). Total num frames: 7761920. Throughput: 0: 966.3. Samples: 1366682. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:43:08,508][07135] Avg episode reward: [(0, '4.632')] +[2024-12-19 10:43:13,315][15938] Updated weights for policy 0, policy_version 1900 (0.0022) +[2024-12-19 10:43:13,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3818.3). Total num frames: 7782400. Throughput: 0: 926.7. Samples: 1371716. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:43:13,509][07135] Avg episode reward: [(0, '4.711')] +[2024-12-19 10:43:18,502][07135] Fps is (10 sec: 4507.3, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 7806976. Throughput: 0: 969.7. Samples: 1378980. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:43:18,504][07135] Avg episode reward: [(0, '4.904')] +[2024-12-19 10:43:22,512][15938] Updated weights for policy 0, policy_version 1910 (0.0020) +[2024-12-19 10:43:23,505][07135] Fps is (10 sec: 4095.5, 60 sec: 3891.1, 300 sec: 3790.5). Total num frames: 7823360. Throughput: 0: 996.3. Samples: 1382166. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:43:23,507][07135] Avg episode reward: [(0, '4.870')] +[2024-12-19 10:43:28,502][07135] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3790.5). Total num frames: 7835648. Throughput: 0: 945.3. Samples: 1386242. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:43:28,506][07135] Avg episode reward: [(0, '4.610')] +[2024-12-19 10:43:33,502][07135] Fps is (10 sec: 2867.6, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 7852032. Throughput: 0: 904.6. Samples: 1390576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:43:33,508][07135] Avg episode reward: [(0, '4.553')] +[2024-12-19 10:43:35,723][15938] Updated weights for policy 0, policy_version 1920 (0.0027) +[2024-12-19 10:43:38,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 7876608. Throughput: 0: 939.8. Samples: 1394060. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:43:38,508][07135] Avg episode reward: [(0, '4.495')] +[2024-12-19 10:43:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 7888896. Throughput: 0: 932.3. Samples: 1399336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:43:43,508][07135] Avg episode reward: [(0, '4.428')] +[2024-12-19 10:43:47,057][15938] Updated weights for policy 0, policy_version 1930 (0.0020) +[2024-12-19 10:43:48,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3790.6). Total num frames: 7909376. Throughput: 0: 910.7. Samples: 1405194. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:43:48,508][07135] Avg episode reward: [(0, '4.599')] +[2024-12-19 10:43:53,502][07135] Fps is (10 sec: 4505.3, 60 sec: 3891.4, 300 sec: 3818.3). Total num frames: 7933952. Throughput: 0: 933.7. Samples: 1408696. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:43:53,507][07135] Avg episode reward: [(0, '4.625')] +[2024-12-19 10:43:55,710][15938] Updated weights for policy 0, policy_version 1940 (0.0015) +[2024-12-19 10:43:58,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 7954432. Throughput: 0: 959.8. Samples: 1414906. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:43:58,512][07135] Avg episode reward: [(0, '4.639')] +[2024-12-19 10:44:03,502][07135] Fps is (10 sec: 3686.6, 60 sec: 3754.7, 300 sec: 3804.4). Total num frames: 7970816. Throughput: 0: 910.8. Samples: 1419968. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:44:03,511][07135] Avg episode reward: [(0, '4.738')] +[2024-12-19 10:44:06,703][15938] Updated weights for policy 0, policy_version 1950 (0.0015) +[2024-12-19 10:44:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3823.2, 300 sec: 3818.3). Total num frames: 7991296. Throughput: 0: 919.8. Samples: 1423556. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:44:08,508][07135] Avg episode reward: [(0, '4.868')] +[2024-12-19 10:44:13,502][07135] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 8015872. Throughput: 0: 982.9. Samples: 1430474. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:44:13,507][07135] Avg episode reward: [(0, '4.506')] +[2024-12-19 10:44:17,421][15938] Updated weights for policy 0, policy_version 1960 (0.0024) +[2024-12-19 10:44:18,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3790.5). Total num frames: 8028160. Throughput: 0: 984.3. Samples: 1434870. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:44:18,504][07135] Avg episode reward: [(0, '4.510')] +[2024-12-19 10:44:23,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 8052736. Throughput: 0: 978.8. Samples: 1438104. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:44:23,504][07135] Avg episode reward: [(0, '4.340')] +[2024-12-19 10:44:26,693][15938] Updated weights for policy 0, policy_version 1970 (0.0023) +[2024-12-19 10:44:28,502][07135] Fps is (10 sec: 4915.3, 60 sec: 4027.7, 300 sec: 3832.2). Total num frames: 8077312. Throughput: 0: 1021.1. Samples: 1445286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:44:28,508][07135] Avg episode reward: [(0, '4.378')] +[2024-12-19 10:44:33,506][07135] Fps is (10 sec: 3684.7, 60 sec: 3959.2, 300 sec: 3804.4). Total num frames: 8089600. Throughput: 0: 999.0. Samples: 1450152. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:44:33,509][07135] Avg episode reward: [(0, '4.582')] +[2024-12-19 10:44:37,949][15938] Updated weights for policy 0, policy_version 1980 (0.0013) +[2024-12-19 10:44:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3818.4). Total num frames: 8110080. Throughput: 0: 979.9. Samples: 1452792. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:44:38,511][07135] Avg episode reward: [(0, '4.723')] +[2024-12-19 10:44:43,502][07135] Fps is (10 sec: 4507.7, 60 sec: 4096.0, 300 sec: 3846.1). Total num frames: 8134656. Throughput: 0: 1002.1. Samples: 1460002. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:44:43,510][07135] Avg episode reward: [(0, '4.524')] +[2024-12-19 10:44:47,460][15938] Updated weights for policy 0, policy_version 1990 (0.0027) +[2024-12-19 10:44:48,504][07135] Fps is (10 sec: 4094.9, 60 sec: 4027.6, 300 sec: 3818.3). Total num frames: 8151040. Throughput: 0: 1014.4. Samples: 1465618. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:44:48,507][07135] Avg episode reward: [(0, '4.380')] +[2024-12-19 10:44:48,515][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001990_8151040.pth... +[2024-12-19 10:44:48,674][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001767_7237632.pth +[2024-12-19 10:44:53,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 8167424. Throughput: 0: 982.2. Samples: 1467754. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:44:53,510][07135] Avg episode reward: [(0, '4.542')] +[2024-12-19 10:44:58,008][15938] Updated weights for policy 0, policy_version 2000 (0.0022) +[2024-12-19 10:44:58,502][07135] Fps is (10 sec: 4096.9, 60 sec: 3959.4, 300 sec: 3846.1). Total num frames: 8192000. Throughput: 0: 977.5. Samples: 1474464. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:44:58,504][07135] Avg episode reward: [(0, '4.635')] +[2024-12-19 10:45:03,504][07135] Fps is (10 sec: 4504.4, 60 sec: 4027.6, 300 sec: 3832.2). Total num frames: 8212480. Throughput: 0: 1026.7. Samples: 1481072. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:45:03,510][07135] Avg episode reward: [(0, '4.654')] +[2024-12-19 10:45:08,502][07135] Fps is (10 sec: 3686.6, 60 sec: 3959.5, 300 sec: 3818.3). Total num frames: 8228864. Throughput: 0: 1001.5. Samples: 1483172. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:45:08,511][07135] Avg episode reward: [(0, '4.692')] +[2024-12-19 10:45:09,270][15938] Updated weights for policy 0, policy_version 2010 (0.0015) +[2024-12-19 10:45:13,502][07135] Fps is (10 sec: 4097.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 8253440. Throughput: 0: 977.7. Samples: 1489282. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:45:13,503][07135] Avg episode reward: [(0, '4.632')] +[2024-12-19 10:45:17,761][15938] Updated weights for policy 0, policy_version 2020 (0.0024) +[2024-12-19 10:45:18,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3860.0). Total num frames: 8273920. Throughput: 0: 1028.7. Samples: 1496438. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:45:18,504][07135] Avg episode reward: [(0, '4.677')] +[2024-12-19 10:45:23,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 8290304. Throughput: 0: 1020.6. Samples: 1498720. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:45:23,507][07135] Avg episode reward: [(0, '4.750')] +[2024-12-19 10:45:28,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 8310784. Throughput: 0: 977.0. Samples: 1503966. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:45:28,508][07135] Avg episode reward: [(0, '4.576')] +[2024-12-19 10:45:29,105][15938] Updated weights for policy 0, policy_version 2030 (0.0024) +[2024-12-19 10:45:33,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4096.3, 300 sec: 3873.8). Total num frames: 8335360. Throughput: 0: 1010.1. Samples: 1511072. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:45:33,504][07135] Avg episode reward: [(0, '4.478')] +[2024-12-19 10:45:38,502][07135] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3846.1). Total num frames: 8351744. Throughput: 0: 1034.8. Samples: 1514322. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:45:38,507][07135] Avg episode reward: [(0, '4.623')] +[2024-12-19 10:45:38,876][15938] Updated weights for policy 0, policy_version 2040 (0.0023) +[2024-12-19 10:45:43,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 8368128. Throughput: 0: 982.9. Samples: 1518694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:45:43,505][07135] Avg episode reward: [(0, '4.685')] +[2024-12-19 10:45:48,502][07135] Fps is (10 sec: 4096.0, 60 sec: 4027.9, 300 sec: 3873.8). Total num frames: 8392704. Throughput: 0: 994.1. Samples: 1525806. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:45:48,509][07135] Avg episode reward: [(0, '4.738')] +[2024-12-19 10:45:48,905][15938] Updated weights for policy 0, policy_version 2050 (0.0024) +[2024-12-19 10:45:53,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3873.8). Total num frames: 8413184. Throughput: 0: 1025.1. Samples: 1529302. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:45:53,504][07135] Avg episode reward: [(0, '4.629')] +[2024-12-19 10:45:58,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 8429568. Throughput: 0: 993.7. Samples: 1533998. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:45:58,506][07135] Avg episode reward: [(0, '4.468')] +[2024-12-19 10:46:00,357][15938] Updated weights for policy 0, policy_version 2060 (0.0023) +[2024-12-19 10:46:03,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.6, 300 sec: 3873.9). Total num frames: 8450048. Throughput: 0: 976.3. Samples: 1540372. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:46:03,510][07135] Avg episode reward: [(0, '4.633')] +[2024-12-19 10:46:08,502][07135] Fps is (10 sec: 4505.5, 60 sec: 4096.0, 300 sec: 3901.7). Total num frames: 8474624. Throughput: 0: 1004.0. Samples: 1543900. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:46:08,510][07135] Avg episode reward: [(0, '5.012')] +[2024-12-19 10:46:08,881][15938] Updated weights for policy 0, policy_version 2070 (0.0014) +[2024-12-19 10:46:13,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 8491008. Throughput: 0: 1015.4. Samples: 1549660. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:46:13,506][07135] Avg episode reward: [(0, '5.131')] +[2024-12-19 10:46:18,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 8511488. Throughput: 0: 979.7. Samples: 1555160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:46:18,505][07135] Avg episode reward: [(0, '4.950')] +[2024-12-19 10:46:19,991][15938] Updated weights for policy 0, policy_version 2080 (0.0019) +[2024-12-19 10:46:23,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 8536064. Throughput: 0: 988.6. Samples: 1558810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:46:23,504][07135] Avg episode reward: [(0, '4.990')] +[2024-12-19 10:46:28,506][07135] Fps is (10 sec: 4094.2, 60 sec: 4027.4, 300 sec: 3901.6). Total num frames: 8552448. Throughput: 0: 1033.3. Samples: 1565196. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:46:28,509][07135] Avg episode reward: [(0, '4.823')] +[2024-12-19 10:46:30,382][15938] Updated weights for policy 0, policy_version 2090 (0.0026) +[2024-12-19 10:46:33,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 8568832. Throughput: 0: 981.2. Samples: 1569958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:46:33,504][07135] Avg episode reward: [(0, '4.575')] +[2024-12-19 10:46:38,502][07135] Fps is (10 sec: 4097.7, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 8593408. Throughput: 0: 982.6. Samples: 1573518. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:46:38,504][07135] Avg episode reward: [(0, '4.562')] +[2024-12-19 10:46:39,800][15938] Updated weights for policy 0, policy_version 2100 (0.0020) +[2024-12-19 10:46:43,505][07135] Fps is (10 sec: 4504.0, 60 sec: 4095.8, 300 sec: 3929.3). Total num frames: 8613888. Throughput: 0: 1036.3. Samples: 1580636. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:46:43,508][07135] Avg episode reward: [(0, '4.534')] +[2024-12-19 10:46:48,503][07135] Fps is (10 sec: 3685.9, 60 sec: 3959.4, 300 sec: 3901.6). Total num frames: 8630272. Throughput: 0: 994.4. Samples: 1585120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:46:48,506][07135] Avg episode reward: [(0, '4.705')] +[2024-12-19 10:46:48,518][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002107_8630272.pth... +[2024-12-19 10:46:48,684][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001877_7688192.pth +[2024-12-19 10:46:51,206][15938] Updated weights for policy 0, policy_version 2110 (0.0022) +[2024-12-19 10:46:53,502][07135] Fps is (10 sec: 3687.7, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 8650752. Throughput: 0: 984.3. Samples: 1588192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:46:53,506][07135] Avg episode reward: [(0, '4.713')] +[2024-12-19 10:46:58,502][07135] Fps is (10 sec: 4506.3, 60 sec: 4096.0, 300 sec: 3943.3). Total num frames: 8675328. Throughput: 0: 1011.8. Samples: 1595192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:46:58,504][07135] Avg episode reward: [(0, '4.535')] +[2024-12-19 10:47:00,162][15938] Updated weights for policy 0, policy_version 2120 (0.0034) +[2024-12-19 10:47:03,502][07135] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 8691712. Throughput: 0: 1006.3. Samples: 1600442. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:47:03,508][07135] Avg episode reward: [(0, '4.468')] +[2024-12-19 10:47:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 8712192. Throughput: 0: 979.1. Samples: 1602868. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:47:08,510][07135] Avg episode reward: [(0, '4.656')] +[2024-12-19 10:47:10,946][15938] Updated weights for policy 0, policy_version 2130 (0.0021) +[2024-12-19 10:47:13,502][07135] Fps is (10 sec: 4505.5, 60 sec: 4096.0, 300 sec: 3943.3). Total num frames: 8736768. Throughput: 0: 997.7. Samples: 1610088. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:47:13,509][07135] Avg episode reward: [(0, '4.555')] +[2024-12-19 10:47:18,502][07135] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 8753152. Throughput: 0: 1025.6. Samples: 1616108. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:47:18,504][07135] Avg episode reward: [(0, '4.509')] +[2024-12-19 10:47:21,728][15938] Updated weights for policy 0, policy_version 2140 (0.0022) +[2024-12-19 10:47:23,502][07135] Fps is (10 sec: 3276.9, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 8769536. Throughput: 0: 995.5. Samples: 1618316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:47:23,504][07135] Avg episode reward: [(0, '4.780')] +[2024-12-19 10:47:28,502][07135] Fps is (10 sec: 4096.0, 60 sec: 4028.0, 300 sec: 3943.3). Total num frames: 8794112. Throughput: 0: 980.9. Samples: 1624772. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:47:28,504][07135] Avg episode reward: [(0, '4.896')] +[2024-12-19 10:47:30,678][15938] Updated weights for policy 0, policy_version 2150 (0.0027) +[2024-12-19 10:47:33,502][07135] Fps is (10 sec: 4505.4, 60 sec: 4096.0, 300 sec: 3957.1). Total num frames: 8814592. Throughput: 0: 1031.4. Samples: 1631532. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:47:33,505][07135] Avg episode reward: [(0, '4.442')] +[2024-12-19 10:47:38,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3959.5, 300 sec: 3943.3). Total num frames: 8830976. Throughput: 0: 1011.6. Samples: 1633716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:47:38,509][07135] Avg episode reward: [(0, '4.580')] +[2024-12-19 10:47:41,816][15938] Updated weights for policy 0, policy_version 2160 (0.0015) +[2024-12-19 10:47:43,502][07135] Fps is (10 sec: 4096.2, 60 sec: 4028.0, 300 sec: 3957.2). Total num frames: 8855552. Throughput: 0: 988.2. Samples: 1639660. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:47:43,508][07135] Avg episode reward: [(0, '4.725')] +[2024-12-19 10:47:48,502][07135] Fps is (10 sec: 4505.7, 60 sec: 4096.1, 300 sec: 3985.0). Total num frames: 8876032. Throughput: 0: 1029.9. Samples: 1646786. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:47:48,509][07135] Avg episode reward: [(0, '4.612')] +[2024-12-19 10:47:51,619][15938] Updated weights for policy 0, policy_version 2170 (0.0016) +[2024-12-19 10:47:53,502][07135] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3957.2). Total num frames: 8892416. Throughput: 0: 1029.1. Samples: 1649178. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:47:53,504][07135] Avg episode reward: [(0, '4.595')] +[2024-12-19 10:47:58,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3957.2). Total num frames: 8912896. Throughput: 0: 982.1. Samples: 1654282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:47:58,511][07135] Avg episode reward: [(0, '4.694')] +[2024-12-19 10:48:01,861][15938] Updated weights for policy 0, policy_version 2180 (0.0019) +[2024-12-19 10:48:03,503][07135] Fps is (10 sec: 4095.3, 60 sec: 4027.6, 300 sec: 3971.1). Total num frames: 8933376. Throughput: 0: 990.0. Samples: 1660658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:48:03,511][07135] Avg episode reward: [(0, '4.707')] +[2024-12-19 10:48:08,504][07135] Fps is (10 sec: 3275.9, 60 sec: 3891.0, 300 sec: 3943.2). Total num frames: 8945664. Throughput: 0: 987.1. Samples: 1662740. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:48:08,507][07135] Avg episode reward: [(0, '4.570')] +[2024-12-19 10:48:13,502][07135] Fps is (10 sec: 2458.0, 60 sec: 3686.4, 300 sec: 3901.6). Total num frames: 8957952. Throughput: 0: 929.2. Samples: 1666588. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:48:13,503][07135] Avg episode reward: [(0, '4.402')] +[2024-12-19 10:48:15,795][15938] Updated weights for policy 0, policy_version 2190 (0.0030) +[2024-12-19 10:48:18,502][07135] Fps is (10 sec: 3687.4, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 8982528. Throughput: 0: 917.4. Samples: 1672816. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:48:18,505][07135] Avg episode reward: [(0, '4.447')] +[2024-12-19 10:48:23,502][07135] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 3971.0). Total num frames: 9007104. Throughput: 0: 949.9. Samples: 1676460. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:48:23,504][07135] Avg episode reward: [(0, '4.499')] +[2024-12-19 10:48:24,589][15938] Updated weights for policy 0, policy_version 2200 (0.0015) +[2024-12-19 10:48:28,504][07135] Fps is (10 sec: 3685.5, 60 sec: 3754.5, 300 sec: 3957.1). Total num frames: 9019392. Throughput: 0: 941.0. Samples: 1682006. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:48:28,509][07135] Avg episode reward: [(0, '4.643')] +[2024-12-19 10:48:33,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3943.3). Total num frames: 9039872. Throughput: 0: 906.0. Samples: 1687554. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:48:33,509][07135] Avg episode reward: [(0, '4.772')] +[2024-12-19 10:48:35,588][15938] Updated weights for policy 0, policy_version 2210 (0.0022) +[2024-12-19 10:48:38,502][07135] Fps is (10 sec: 4506.8, 60 sec: 3891.2, 300 sec: 3984.9). Total num frames: 9064448. Throughput: 0: 932.5. Samples: 1691140. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:48:38,509][07135] Avg episode reward: [(0, '4.819')] +[2024-12-19 10:48:43,503][07135] Fps is (10 sec: 4505.1, 60 sec: 3822.9, 300 sec: 3984.9). Total num frames: 9084928. Throughput: 0: 961.0. Samples: 1697530. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:48:43,511][07135] Avg episode reward: [(0, '4.690')] +[2024-12-19 10:48:46,148][15938] Updated weights for policy 0, policy_version 2220 (0.0019) +[2024-12-19 10:48:48,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3957.2). Total num frames: 9101312. Throughput: 0: 927.6. Samples: 1702400. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:48:48,507][07135] Avg episode reward: [(0, '4.623')] +[2024-12-19 10:48:48,522][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002222_9101312.pth... +[2024-12-19 10:48:48,654][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001990_8151040.pth +[2024-12-19 10:48:53,502][07135] Fps is (10 sec: 3686.8, 60 sec: 3822.9, 300 sec: 3957.2). Total num frames: 9121792. Throughput: 0: 958.3. Samples: 1705860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:48:53,504][07135] Avg episode reward: [(0, '4.726')] +[2024-12-19 10:48:55,606][15938] Updated weights for policy 0, policy_version 2230 (0.0015) +[2024-12-19 10:48:58,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3971.0). Total num frames: 9142272. Throughput: 0: 1027.3. Samples: 1712816. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:48:58,507][07135] Avg episode reward: [(0, '4.757')] +[2024-12-19 10:49:03,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3957.2). Total num frames: 9158656. Throughput: 0: 986.5. Samples: 1717210. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:49:03,507][07135] Avg episode reward: [(0, '4.642')] +[2024-12-19 10:49:06,869][15938] Updated weights for policy 0, policy_version 2240 (0.0017) +[2024-12-19 10:49:08,502][07135] Fps is (10 sec: 3686.3, 60 sec: 3891.4, 300 sec: 3943.3). Total num frames: 9179136. Throughput: 0: 977.3. Samples: 1720440. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:49:08,510][07135] Avg episode reward: [(0, '4.672')] +[2024-12-19 10:49:13,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3984.9). Total num frames: 9203712. Throughput: 0: 1007.7. Samples: 1727352. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:49:13,503][07135] Avg episode reward: [(0, '4.449')] +[2024-12-19 10:49:16,683][15938] Updated weights for policy 0, policy_version 2250 (0.0024) +[2024-12-19 10:49:18,504][07135] Fps is (10 sec: 4095.2, 60 sec: 3959.3, 300 sec: 3957.1). Total num frames: 9220096. Throughput: 0: 995.1. Samples: 1732336. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:49:18,511][07135] Avg episode reward: [(0, '4.604')] +[2024-12-19 10:49:23,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 9236480. Throughput: 0: 967.7. Samples: 1734688. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:49:23,504][07135] Avg episode reward: [(0, '4.674')] +[2024-12-19 10:49:27,202][15938] Updated weights for policy 0, policy_version 2260 (0.0014) +[2024-12-19 10:49:28,502][07135] Fps is (10 sec: 4096.9, 60 sec: 4027.9, 300 sec: 3971.1). Total num frames: 9261056. Throughput: 0: 980.4. Samples: 1741646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:49:28,505][07135] Avg episode reward: [(0, '4.900')] +[2024-12-19 10:49:33,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3971.0). Total num frames: 9281536. Throughput: 0: 1004.3. Samples: 1747594. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:49:33,505][07135] Avg episode reward: [(0, '4.864')] +[2024-12-19 10:49:38,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 9293824. Throughput: 0: 973.2. Samples: 1749654. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:49:38,504][07135] Avg episode reward: [(0, '4.946')] +[2024-12-19 10:49:38,592][15938] Updated weights for policy 0, policy_version 2270 (0.0021) +[2024-12-19 10:49:43,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3957.2). Total num frames: 9318400. Throughput: 0: 960.6. Samples: 1756042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:49:43,504][07135] Avg episode reward: [(0, '4.789')] +[2024-12-19 10:49:47,447][15938] Updated weights for policy 0, policy_version 2280 (0.0017) +[2024-12-19 10:49:48,504][07135] Fps is (10 sec: 4504.4, 60 sec: 3959.3, 300 sec: 3971.0). Total num frames: 9338880. Throughput: 0: 1006.5. Samples: 1762506. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:49:48,508][07135] Avg episode reward: [(0, '4.785')] +[2024-12-19 10:49:53,505][07135] Fps is (10 sec: 3685.1, 60 sec: 3891.0, 300 sec: 3943.2). Total num frames: 9355264. Throughput: 0: 979.5. Samples: 1764520. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:49:53,510][07135] Avg episode reward: [(0, '4.804')] +[2024-12-19 10:49:58,502][07135] Fps is (10 sec: 3687.4, 60 sec: 3891.2, 300 sec: 3943.3). Total num frames: 9375744. Throughput: 0: 948.4. Samples: 1770030. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:49:58,510][07135] Avg episode reward: [(0, '4.695')] +[2024-12-19 10:49:59,162][15938] Updated weights for policy 0, policy_version 2290 (0.0029) +[2024-12-19 10:50:03,502][07135] Fps is (10 sec: 4097.4, 60 sec: 3959.4, 300 sec: 3957.1). Total num frames: 9396224. Throughput: 0: 990.7. Samples: 1776916. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:50:03,504][07135] Avg episode reward: [(0, '4.499')] +[2024-12-19 10:50:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 9412608. Throughput: 0: 990.6. Samples: 1779266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-19 10:50:08,503][07135] Avg episode reward: [(0, '4.589')] +[2024-12-19 10:50:10,673][15938] Updated weights for policy 0, policy_version 2300 (0.0036) +[2024-12-19 10:50:13,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 9433088. Throughput: 0: 943.8. Samples: 1784116. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-12-19 10:50:13,503][07135] Avg episode reward: [(0, '4.576')] +[2024-12-19 10:50:18,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3943.3). Total num frames: 9453568. Throughput: 0: 968.8. Samples: 1791188. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:50:18,511][07135] Avg episode reward: [(0, '4.588')] +[2024-12-19 10:50:19,518][15938] Updated weights for policy 0, policy_version 2310 (0.0021) +[2024-12-19 10:50:23,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3943.3). Total num frames: 9474048. Throughput: 0: 995.9. Samples: 1794470. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-19 10:50:23,508][07135] Avg episode reward: [(0, '4.688')] +[2024-12-19 10:50:28,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 9490432. Throughput: 0: 950.4. Samples: 1798808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:50:28,509][07135] Avg episode reward: [(0, '4.938')] +[2024-12-19 10:50:30,932][15938] Updated weights for policy 0, policy_version 2320 (0.0014) +[2024-12-19 10:50:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 9510912. Throughput: 0: 963.2. Samples: 1805846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:50:33,504][07135] Avg episode reward: [(0, '4.833')] +[2024-12-19 10:50:38,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3957.2). Total num frames: 9535488. Throughput: 0: 995.1. Samples: 1809294. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:50:38,504][07135] Avg episode reward: [(0, '4.666')] +[2024-12-19 10:50:40,719][15938] Updated weights for policy 0, policy_version 2330 (0.0016) +[2024-12-19 10:50:43,502][07135] Fps is (10 sec: 4095.9, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 9551872. Throughput: 0: 982.1. Samples: 1814224. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:50:43,507][07135] Avg episode reward: [(0, '4.755')] +[2024-12-19 10:50:48,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.4, 300 sec: 3929.4). Total num frames: 9572352. Throughput: 0: 964.8. Samples: 1820334. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:50:48,511][07135] Avg episode reward: [(0, '4.526')] +[2024-12-19 10:50:48,523][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002337_9572352.pth... +[2024-12-19 10:50:48,645][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002107_8630272.pth +[2024-12-19 10:50:51,094][15938] Updated weights for policy 0, policy_version 2340 (0.0017) +[2024-12-19 10:50:53,502][07135] Fps is (10 sec: 4096.1, 60 sec: 3959.7, 300 sec: 3943.3). Total num frames: 9592832. Throughput: 0: 989.8. Samples: 1823806. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:50:53,503][07135] Avg episode reward: [(0, '4.631')] +[2024-12-19 10:50:58,502][07135] Fps is (10 sec: 3686.1, 60 sec: 3891.1, 300 sec: 3929.4). Total num frames: 9609216. Throughput: 0: 1008.1. Samples: 1829480. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:50:58,507][07135] Avg episode reward: [(0, '4.732')] +[2024-12-19 10:51:02,455][15938] Updated weights for policy 0, policy_version 2350 (0.0017) +[2024-12-19 10:51:03,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 9629696. Throughput: 0: 966.3. Samples: 1834670. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:03,506][07135] Avg episode reward: [(0, '4.849')] +[2024-12-19 10:51:08,504][07135] Fps is (10 sec: 4095.2, 60 sec: 3959.3, 300 sec: 3929.3). Total num frames: 9650176. Throughput: 0: 973.6. Samples: 1838284. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:08,506][07135] Avg episode reward: [(0, '4.752')] +[2024-12-19 10:51:11,138][15938] Updated weights for policy 0, policy_version 2360 (0.0021) +[2024-12-19 10:51:13,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 9670656. Throughput: 0: 1024.7. Samples: 1844918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:13,504][07135] Avg episode reward: [(0, '4.715')] +[2024-12-19 10:51:18,502][07135] Fps is (10 sec: 3687.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 9687040. Throughput: 0: 968.5. Samples: 1849430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-19 10:51:18,504][07135] Avg episode reward: [(0, '4.872')] +[2024-12-19 10:51:22,505][15938] Updated weights for policy 0, policy_version 2370 (0.0033) +[2024-12-19 10:51:23,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 9711616. Throughput: 0: 968.8. Samples: 1852888. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:23,503][07135] Avg episode reward: [(0, '4.686')] +[2024-12-19 10:51:28,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 9732096. Throughput: 0: 1014.5. Samples: 1859874. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:28,508][07135] Avg episode reward: [(0, '4.701')] +[2024-12-19 10:51:33,044][15938] Updated weights for policy 0, policy_version 2380 (0.0029) +[2024-12-19 10:51:33,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 9748480. Throughput: 0: 982.6. Samples: 1864552. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:51:33,506][07135] Avg episode reward: [(0, '4.878')] +[2024-12-19 10:51:38,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 9768960. Throughput: 0: 968.1. Samples: 1867370. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:38,510][07135] Avg episode reward: [(0, '4.727')] +[2024-12-19 10:51:42,614][15938] Updated weights for policy 0, policy_version 2390 (0.0015) +[2024-12-19 10:51:43,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.8, 300 sec: 3943.3). Total num frames: 9793536. Throughput: 0: 999.8. Samples: 1874470. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-19 10:51:43,503][07135] Avg episode reward: [(0, '4.721')] +[2024-12-19 10:51:48,502][07135] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 9809920. Throughput: 0: 1000.7. Samples: 1879702. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:48,504][07135] Avg episode reward: [(0, '4.718')] +[2024-12-19 10:51:53,502][07135] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 9826304. Throughput: 0: 968.9. Samples: 1881880. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:51:53,507][07135] Avg episode reward: [(0, '4.726')] +[2024-12-19 10:51:54,017][15938] Updated weights for policy 0, policy_version 2400 (0.0016) +[2024-12-19 10:51:58,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 9846784. Throughput: 0: 973.0. Samples: 1888704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:51:58,504][07135] Avg episode reward: [(0, '4.644')] +[2024-12-19 10:52:03,502][07135] Fps is (10 sec: 4505.5, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 9871360. Throughput: 0: 1011.5. Samples: 1894950. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:52:03,509][15938] Updated weights for policy 0, policy_version 2410 (0.0022) +[2024-12-19 10:52:03,509][07135] Avg episode reward: [(0, '4.663')] +[2024-12-19 10:52:08,502][07135] Fps is (10 sec: 3686.4, 60 sec: 3891.4, 300 sec: 3887.7). Total num frames: 9883648. Throughput: 0: 979.8. Samples: 1896980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-19 10:52:08,509][07135] Avg episode reward: [(0, '4.684')] +[2024-12-19 10:52:13,502][07135] Fps is (10 sec: 3686.5, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 9908224. Throughput: 0: 961.7. Samples: 1903150. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:52:13,511][07135] Avg episode reward: [(0, '5.198')] +[2024-12-19 10:52:14,371][15938] Updated weights for policy 0, policy_version 2420 (0.0016) +[2024-12-19 10:52:18,502][07135] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 9928704. Throughput: 0: 1009.3. Samples: 1909972. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-19 10:52:18,504][07135] Avg episode reward: [(0, '5.091')] +[2024-12-19 10:52:23,505][07135] Fps is (10 sec: 3685.0, 60 sec: 3890.9, 300 sec: 3901.6). Total num frames: 9945088. Throughput: 0: 992.8. Samples: 1912048. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:52:23,510][07135] Avg episode reward: [(0, '4.750')] +[2024-12-19 10:52:25,808][15938] Updated weights for policy 0, policy_version 2430 (0.0013) +[2024-12-19 10:52:28,502][07135] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 9961472. Throughput: 0: 951.0. Samples: 1917264. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:52:28,504][07135] Avg episode reward: [(0, '4.453')] +[2024-12-19 10:52:33,502][07135] Fps is (10 sec: 4097.6, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 9986048. Throughput: 0: 986.7. Samples: 1924104. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:52:33,506][07135] Avg episode reward: [(0, '4.646')] +[2024-12-19 10:52:34,878][15938] Updated weights for policy 0, policy_version 2440 (0.0029) +[2024-12-19 10:52:38,502][07135] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 10002432. Throughput: 0: 1001.3. Samples: 1926938. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-19 10:52:38,504][07135] Avg episode reward: [(0, '4.720')] +[2024-12-19 10:52:39,015][07135] Component Batcher_0 stopped! +[2024-12-19 10:52:39,012][15925] Stopping Batcher_0... +[2024-12-19 10:52:39,013][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-12-19 10:52:39,020][15925] Loop batcher_evt_loop terminating... +[2024-12-19 10:52:39,126][15938] Weights refcount: 2 0 +[2024-12-19 10:52:39,141][15925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002222_9101312.pth +[2024-12-19 10:52:39,144][15938] Stopping InferenceWorker_p0-w0... +[2024-12-19 10:52:39,145][15938] Loop inference_proc0-0_evt_loop terminating... +[2024-12-19 10:52:39,145][07135] Component InferenceWorker_p0-w0 stopped! +[2024-12-19 10:52:39,161][15925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-12-19 10:52:39,423][07135] Component LearnerWorker_p0 stopped! +[2024-12-19 10:52:39,426][15925] Stopping LearnerWorker_p0... +[2024-12-19 10:52:39,427][15925] Loop learner_proc0_evt_loop terminating... +[2024-12-19 10:52:39,759][15947] Stopping RolloutWorker_w3... +[2024-12-19 10:52:39,759][07135] Component RolloutWorker_w3 stopped! +[2024-12-19 10:52:39,760][15947] Loop rollout_proc3_evt_loop terminating... +[2024-12-19 10:52:39,786][15946] Stopping RolloutWorker_w4... +[2024-12-19 10:52:39,791][07135] Component RolloutWorker_w4 stopped! +[2024-12-19 10:52:39,791][15946] Loop rollout_proc4_evt_loop terminating... +[2024-12-19 10:52:39,801][15945] Stopping RolloutWorker_w2... +[2024-12-19 10:52:39,802][15945] Loop rollout_proc2_evt_loop terminating... +[2024-12-19 10:52:39,801][07135] Component RolloutWorker_w2 stopped! +[2024-12-19 10:52:39,813][15950] Stopping RolloutWorker_w6... +[2024-12-19 10:52:39,814][15950] Loop rollout_proc6_evt_loop terminating... +[2024-12-19 10:52:39,810][07135] Component RolloutWorker_w7 stopped! +[2024-12-19 10:52:39,817][07135] Component RolloutWorker_w6 stopped! +[2024-12-19 10:52:39,819][15949] Stopping RolloutWorker_w7... +[2024-12-19 10:52:39,827][15949] Loop rollout_proc7_evt_loop terminating... +[2024-12-19 10:52:39,831][07135] Component RolloutWorker_w5 stopped! +[2024-12-19 10:52:39,832][15948] Stopping RolloutWorker_w5... +[2024-12-19 10:52:39,836][15948] Loop rollout_proc5_evt_loop terminating... +[2024-12-19 10:52:39,842][15939] Stopping RolloutWorker_w0... +[2024-12-19 10:52:39,843][15939] Loop rollout_proc0_evt_loop terminating... +[2024-12-19 10:52:39,843][07135] Component RolloutWorker_w0 stopped! +[2024-12-19 10:52:39,850][07135] Component RolloutWorker_w1 stopped! +[2024-12-19 10:52:39,852][07135] Waiting for process learner_proc0 to stop... +[2024-12-19 10:52:39,860][15944] Stopping RolloutWorker_w1... +[2024-12-19 10:52:39,861][15944] Loop rollout_proc1_evt_loop terminating... +[2024-12-19 10:52:41,770][07135] Waiting for process inference_proc0-0 to join... +[2024-12-19 10:52:41,903][07135] Waiting for process rollout_proc0 to join... +[2024-12-19 10:52:44,931][07135] Waiting for process rollout_proc1 to join... +[2024-12-19 10:52:44,934][07135] Waiting for process rollout_proc2 to join... +[2024-12-19 10:52:44,938][07135] Waiting for process rollout_proc3 to join... +[2024-12-19 10:52:44,943][07135] Waiting for process rollout_proc4 to join... +[2024-12-19 10:52:44,947][07135] Waiting for process rollout_proc5 to join... +[2024-12-19 10:52:44,951][07135] Waiting for process rollout_proc6 to join... +[2024-12-19 10:52:44,955][07135] Waiting for process rollout_proc7 to join... +[2024-12-19 10:52:44,958][07135] Batcher 0 profile tree view: +batching: 53.5451, releasing_batches: 0.0503 +[2024-12-19 10:52:44,961][07135] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0000 - wait_policy_total: 857.3821 -update_model: 17.3992 - weight_update: 0.0028 -one_step: 0.0217 - handle_policy_step: 1216.1592 - deserialize: 30.2266, stack: 6.4708, obs_to_device_normalize: 255.8477, forward: 614.5762, send_messages: 60.8620 - prepare_outputs: 187.5784 - to_cpu: 112.7828 -[2024-12-18 23:58:49,476][00179] Learner 0 profile tree view: -misc: 0.0117, prepare_batch: 24.0494 -train: 144.5285 - epoch_init: 0.0115, minibatch_init: 0.0147, losses_postprocess: 1.1773, kl_divergence: 1.2991, after_optimizer: 68.8153 - calculate_losses: 50.2202 - losses_init: 0.0075, forward_head: 2.0726, bptt_initial: 33.1901, tail: 2.2609, advantages_returns: 0.5434, losses: 7.4606 - bptt: 4.0179 - bptt_forward_core: 3.8082 - update: 21.7951 - clip: 1.7085 -[2024-12-18 23:58:49,478][00179] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.7593, enqueue_policy_requests: 212.8441, env_step: 1704.2607, overhead: 29.1434, complete_rollouts: 15.3297 -save_policy_outputs: 45.8964 - split_output_tensors: 18.2226 -[2024-12-18 23:58:49,479][00179] RolloutWorker_w7 profile tree view: -wait_for_trajectories: 0.6932, enqueue_policy_requests: 224.7196, env_step: 1696.3918, overhead: 29.0218, complete_rollouts: 14.4014 -save_policy_outputs: 45.1897 - split_output_tensors: 18.1468 -[2024-12-18 23:58:49,481][00179] Loop Runner_EvtLoop terminating... -[2024-12-18 23:58:49,483][00179] Runner profile tree view: -main_loop: 2210.5337 -[2024-12-18 23:58:49,485][00179] Collected {0: 8007680}, FPS: 3622.5 -[2024-12-19 00:04:16,624][00179] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-12-19 00:04:16,626][00179] Overriding arg 'num_workers' with value 1 passed from command line -[2024-12-19 00:04:16,628][00179] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-12-19 00:04:16,630][00179] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-12-19 00:04:16,633][00179] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-12-19 00:04:16,634][00179] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-12-19 00:04:16,636][00179] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-12-19 00:04:16,638][00179] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-12-19 00:04:16,640][00179] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-12-19 00:04:16,641][00179] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-12-19 00:04:16,646][00179] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-12-19 00:04:16,646][00179] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-12-19 00:04:16,648][00179] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-12-19 00:04:16,650][00179] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-12-19 00:04:16,652][00179] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-12-19 00:04:16,681][00179] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-12-19 00:04:16,684][00179] RunningMeanStd input shape: (3, 72, 128) -[2024-12-19 00:04:16,687][00179] RunningMeanStd input shape: (1,) -[2024-12-19 00:04:16,703][00179] ConvEncoder: input_channels=3 -[2024-12-19 00:04:16,805][00179] Conv encoder output size: 512 -[2024-12-19 00:04:16,807][00179] Policy head output size: 512 -[2024-12-19 00:04:16,984][00179] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... -[2024-12-19 00:04:17,777][00179] Num frames 100... -[2024-12-19 00:04:17,899][00179] Num frames 200... -[2024-12-19 00:04:18,029][00179] Num frames 300... -[2024-12-19 00:04:18,155][00179] Num frames 400... -[2024-12-19 00:04:18,275][00179] Num frames 500... -[2024-12-19 00:04:18,395][00179] Num frames 600... -[2024-12-19 00:04:18,519][00179] Num frames 700... -[2024-12-19 00:04:18,698][00179] Avg episode rewards: #0: 17.680, true rewards: #0: 7.680 -[2024-12-19 00:04:18,700][00179] Avg episode reward: 17.680, avg true_objective: 7.680 -[2024-12-19 00:04:18,760][00179] Num frames 800... -[2024-12-19 00:04:18,931][00179] Num frames 900... -[2024-12-19 00:04:19,114][00179] Num frames 1000... -[2024-12-19 00:04:19,285][00179] Num frames 1100... -[2024-12-19 00:04:19,455][00179] Num frames 1200... -[2024-12-19 00:04:19,624][00179] Num frames 1300... -[2024-12-19 00:04:19,791][00179] Num frames 1400... -[2024-12-19 00:04:19,964][00179] Num frames 1500... -[2024-12-19 00:04:20,142][00179] Num frames 1600... -[2024-12-19 00:04:20,321][00179] Num frames 1700... -[2024-12-19 00:04:20,499][00179] Num frames 1800... -[2024-12-19 00:04:20,694][00179] Num frames 1900... -[2024-12-19 00:04:20,866][00179] Num frames 2000... -[2024-12-19 00:04:21,045][00179] Num frames 2100... -[2024-12-19 00:04:21,190][00179] Num frames 2200... -[2024-12-19 00:04:21,316][00179] Num frames 2300... -[2024-12-19 00:04:21,434][00179] Num frames 2400... -[2024-12-19 00:04:21,561][00179] Num frames 2500... -[2024-12-19 00:04:21,691][00179] Num frames 2600... -[2024-12-19 00:04:21,815][00179] Num frames 2700... -[2024-12-19 00:04:21,934][00179] Num frames 2800... -[2024-12-19 00:04:22,071][00179] Avg episode rewards: #0: 36.340, true rewards: #0: 14.340 -[2024-12-19 00:04:22,072][00179] Avg episode reward: 36.340, avg true_objective: 14.340 -[2024-12-19 00:04:22,115][00179] Num frames 2900... -[2024-12-19 00:04:22,259][00179] Num frames 3000... -[2024-12-19 00:04:22,382][00179] Num frames 3100... -[2024-12-19 00:04:22,509][00179] Num frames 3200... -[2024-12-19 00:04:22,647][00179] Num frames 3300... -[2024-12-19 00:04:22,769][00179] Num frames 3400... -[2024-12-19 00:04:22,894][00179] Num frames 3500... -[2024-12-19 00:04:23,050][00179] Num frames 3600... -[2024-12-19 00:04:23,219][00179] Avg episode rewards: #0: 29.226, true rewards: #0: 12.227 -[2024-12-19 00:04:23,221][00179] Avg episode reward: 29.226, avg true_objective: 12.227 -[2024-12-19 00:04:23,281][00179] Num frames 3700... -[2024-12-19 00:04:23,460][00179] Num frames 3800... -[2024-12-19 00:04:23,644][00179] Num frames 3900... -[2024-12-19 00:04:23,811][00179] Num frames 4000... -[2024-12-19 00:04:23,974][00179] Num frames 4100... -[2024-12-19 00:04:24,134][00179] Num frames 4200... -[2024-12-19 00:04:24,304][00179] Num frames 4300... -[2024-12-19 00:04:24,481][00179] Num frames 4400... -[2024-12-19 00:04:24,686][00179] Num frames 4500... -[2024-12-19 00:04:24,862][00179] Num frames 4600... -[2024-12-19 00:04:25,043][00179] Num frames 4700... -[2024-12-19 00:04:25,232][00179] Num frames 4800... -[2024-12-19 00:04:25,324][00179] Avg episode rewards: #0: 28.047, true rewards: #0: 12.048 -[2024-12-19 00:04:25,327][00179] Avg episode reward: 28.047, avg true_objective: 12.048 -[2024-12-19 00:04:25,473][00179] Num frames 4900... -[2024-12-19 00:04:25,627][00179] Num frames 5000... -[2024-12-19 00:04:25,752][00179] Num frames 5100... -[2024-12-19 00:04:25,873][00179] Num frames 5200... -[2024-12-19 00:04:25,997][00179] Num frames 5300... -[2024-12-19 00:04:26,120][00179] Num frames 5400... -[2024-12-19 00:04:26,239][00179] Num frames 5500... -[2024-12-19 00:04:26,360][00179] Num frames 5600... -[2024-12-19 00:04:26,478][00179] Num frames 5700... -[2024-12-19 00:04:26,610][00179] Num frames 5800... -[2024-12-19 00:04:26,743][00179] Num frames 5900... -[2024-12-19 00:04:26,867][00179] Num frames 6000... -[2024-12-19 00:04:26,987][00179] Num frames 6100... -[2024-12-19 00:04:27,107][00179] Num frames 6200... -[2024-12-19 00:04:27,228][00179] Num frames 6300... -[2024-12-19 00:04:27,361][00179] Num frames 6400... -[2024-12-19 00:04:27,481][00179] Num frames 6500... -[2024-12-19 00:04:27,614][00179] Num frames 6600... -[2024-12-19 00:04:27,746][00179] Num frames 6700... -[2024-12-19 00:04:27,915][00179] Avg episode rewards: #0: 33.990, true rewards: #0: 13.590 -[2024-12-19 00:04:27,917][00179] Avg episode reward: 33.990, avg true_objective: 13.590 -[2024-12-19 00:04:27,928][00179] Num frames 6800... -[2024-12-19 00:04:28,047][00179] Num frames 6900... -[2024-12-19 00:04:28,163][00179] Num frames 7000... -[2024-12-19 00:04:28,287][00179] Num frames 7100... -[2024-12-19 00:04:28,412][00179] Num frames 7200... -[2024-12-19 00:04:28,535][00179] Num frames 7300... -[2024-12-19 00:04:28,672][00179] Num frames 7400... -[2024-12-19 00:04:28,837][00179] Num frames 7500... -[2024-12-19 00:04:29,086][00179] Num frames 7600... -[2024-12-19 00:04:29,332][00179] Num frames 7700... -[2024-12-19 00:04:29,713][00179] Num frames 7800... -[2024-12-19 00:04:29,960][00179] Num frames 7900... -[2024-12-19 00:04:30,182][00179] Num frames 8000... -[2024-12-19 00:04:30,302][00179] Num frames 8100... -[2024-12-19 00:04:30,430][00179] Num frames 8200... -[2024-12-19 00:04:30,554][00179] Num frames 8300... -[2024-12-19 00:04:30,678][00179] Num frames 8400... -[2024-12-19 00:04:30,744][00179] Avg episode rewards: #0: 35.346, true rewards: #0: 14.013 -[2024-12-19 00:04:30,746][00179] Avg episode reward: 35.346, avg true_objective: 14.013 -[2024-12-19 00:04:30,867][00179] Num frames 8500... -[2024-12-19 00:04:30,990][00179] Num frames 8600... -[2024-12-19 00:04:31,121][00179] Num frames 8700... -[2024-12-19 00:04:31,301][00179] Num frames 8800... -[2024-12-19 00:04:31,477][00179] Num frames 8900... -[2024-12-19 00:04:31,647][00179] Num frames 9000... -[2024-12-19 00:04:31,825][00179] Num frames 9100... -[2024-12-19 00:04:31,991][00179] Num frames 9200... -[2024-12-19 00:04:32,157][00179] Num frames 9300... -[2024-12-19 00:04:32,325][00179] Num frames 9400... -[2024-12-19 00:04:32,380][00179] Avg episode rewards: #0: 33.571, true rewards: #0: 13.429 -[2024-12-19 00:04:32,382][00179] Avg episode reward: 33.571, avg true_objective: 13.429 -[2024-12-19 00:04:32,571][00179] Num frames 9500... -[2024-12-19 00:04:32,748][00179] Num frames 9600... -[2024-12-19 00:04:32,920][00179] Num frames 9700... -[2024-12-19 00:04:33,099][00179] Num frames 9800... -[2024-12-19 00:04:33,279][00179] Num frames 9900... -[2024-12-19 00:04:33,458][00179] Num frames 10000... -[2024-12-19 00:04:33,642][00179] Num frames 10100... -[2024-12-19 00:04:33,769][00179] Num frames 10200... -[2024-12-19 00:04:33,899][00179] Num frames 10300... -[2024-12-19 00:04:34,020][00179] Num frames 10400... -[2024-12-19 00:04:34,148][00179] Num frames 10500... -[2024-12-19 00:04:34,271][00179] Num frames 10600... -[2024-12-19 00:04:34,396][00179] Num frames 10700... -[2024-12-19 00:04:34,519][00179] Num frames 10800... -[2024-12-19 00:04:34,650][00179] Num frames 10900... -[2024-12-19 00:04:34,779][00179] Num frames 11000... -[2024-12-19 00:04:34,831][00179] Avg episode rewards: #0: 33.750, true rewards: #0: 13.750 -[2024-12-19 00:04:34,833][00179] Avg episode reward: 33.750, avg true_objective: 13.750 -[2024-12-19 00:04:34,962][00179] Num frames 11100... -[2024-12-19 00:04:35,083][00179] Num frames 11200... -[2024-12-19 00:04:35,209][00179] Num frames 11300... -[2024-12-19 00:04:35,339][00179] Num frames 11400... -[2024-12-19 00:04:35,459][00179] Num frames 11500... -[2024-12-19 00:04:35,589][00179] Num frames 11600... -[2024-12-19 00:04:35,713][00179] Num frames 11700... -[2024-12-19 00:04:35,836][00179] Num frames 11800... -[2024-12-19 00:04:35,966][00179] Num frames 11900... -[2024-12-19 00:04:36,092][00179] Num frames 12000... -[2024-12-19 00:04:36,214][00179] Num frames 12100... -[2024-12-19 00:04:36,332][00179] Avg episode rewards: #0: 32.946, true rewards: #0: 13.502 -[2024-12-19 00:04:36,333][00179] Avg episode reward: 32.946, avg true_objective: 13.502 -[2024-12-19 00:04:36,399][00179] Num frames 12200... -[2024-12-19 00:04:36,519][00179] Num frames 12300... -[2024-12-19 00:04:36,652][00179] Num frames 12400... -[2024-12-19 00:04:36,772][00179] Num frames 12500... -[2024-12-19 00:04:36,894][00179] Num frames 12600... -[2024-12-19 00:04:37,027][00179] Num frames 12700... -[2024-12-19 00:04:37,147][00179] Num frames 12800... -[2024-12-19 00:04:37,271][00179] Num frames 12900... -[2024-12-19 00:04:37,351][00179] Avg episode rewards: #0: 31.420, true rewards: #0: 12.920 -[2024-12-19 00:04:37,354][00179] Avg episode reward: 31.420, avg true_objective: 12.920 -[2024-12-19 00:05:55,879][00179] Replay video saved to /content/train_dir/default_experiment/replay.mp4! -[2024-12-19 00:14:06,778][00179] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-12-19 00:14:06,779][00179] Overriding arg 'num_workers' with value 1 passed from command line -[2024-12-19 00:14:06,781][00179] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-12-19 00:14:06,783][00179] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-12-19 00:14:06,785][00179] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-12-19 00:14:06,787][00179] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-12-19 00:14:06,788][00179] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! -[2024-12-19 00:14:06,789][00179] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-12-19 00:14:06,790][00179] Adding new argument 'push_to_hub'=True that is not in the saved config file! -[2024-12-19 00:14:06,792][00179] Adding new argument 'hf_repository'='Esteban00007/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! -[2024-12-19 00:14:06,793][00179] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-12-19 00:14:06,794][00179] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-12-19 00:14:06,795][00179] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-12-19 00:14:06,796][00179] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-12-19 00:14:06,797][00179] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-12-19 00:14:06,826][00179] RunningMeanStd input shape: (3, 72, 128) -[2024-12-19 00:14:06,829][00179] RunningMeanStd input shape: (1,) -[2024-12-19 00:14:06,842][00179] ConvEncoder: input_channels=3 -[2024-12-19 00:14:06,883][00179] Conv encoder output size: 512 -[2024-12-19 00:14:06,885][00179] Policy head output size: 512 -[2024-12-19 00:14:06,904][00179] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... -[2024-12-19 00:14:07,321][00179] Num frames 100... -[2024-12-19 00:14:07,447][00179] Num frames 200... -[2024-12-19 00:14:07,592][00179] Num frames 300... -[2024-12-19 00:14:07,732][00179] Num frames 400... -[2024-12-19 00:14:07,854][00179] Num frames 500... -[2024-12-19 00:14:07,987][00179] Num frames 600... -[2024-12-19 00:14:08,114][00179] Num frames 700... -[2024-12-19 00:14:08,287][00179] Avg episode rewards: #0: 16.980, true rewards: #0: 7.980 -[2024-12-19 00:14:08,289][00179] Avg episode reward: 16.980, avg true_objective: 7.980 -[2024-12-19 00:14:08,294][00179] Num frames 800... -[2024-12-19 00:14:08,415][00179] Num frames 900... -[2024-12-19 00:14:08,535][00179] Num frames 1000... -[2024-12-19 00:14:08,661][00179] Num frames 1100... -[2024-12-19 00:14:08,786][00179] Num frames 1200... -[2024-12-19 00:14:08,907][00179] Num frames 1300... -[2024-12-19 00:14:09,012][00179] Avg episode rewards: #0: 12.210, true rewards: #0: 6.710 -[2024-12-19 00:14:09,013][00179] Avg episode reward: 12.210, avg true_objective: 6.710 -[2024-12-19 00:14:09,084][00179] Num frames 1400... -[2024-12-19 00:14:09,201][00179] Num frames 1500... -[2024-12-19 00:14:09,317][00179] Num frames 1600... -[2024-12-19 00:14:09,440][00179] Num frames 1700... -[2024-12-19 00:14:09,575][00179] Num frames 1800... -[2024-12-19 00:14:09,698][00179] Num frames 1900... -[2024-12-19 00:14:09,868][00179] Avg episode rewards: #0: 12.623, true rewards: #0: 6.623 -[2024-12-19 00:14:09,870][00179] Avg episode reward: 12.623, avg true_objective: 6.623 -[2024-12-19 00:14:09,891][00179] Num frames 2000... -[2024-12-19 00:14:10,013][00179] Num frames 2100... -[2024-12-19 00:14:10,131][00179] Num frames 2200... -[2024-12-19 00:14:10,252][00179] Num frames 2300... -[2024-12-19 00:14:10,376][00179] Num frames 2400... -[2024-12-19 00:14:10,494][00179] Num frames 2500... -[2024-12-19 00:14:10,632][00179] Num frames 2600... -[2024-12-19 00:14:10,756][00179] Num frames 2700... -[2024-12-19 00:14:10,879][00179] Num frames 2800... -[2024-12-19 00:14:10,996][00179] Num frames 2900... -[2024-12-19 00:14:11,071][00179] Avg episode rewards: #0: 14.038, true rewards: #0: 7.287 -[2024-12-19 00:14:11,074][00179] Avg episode reward: 14.038, avg true_objective: 7.287 -[2024-12-19 00:14:11,175][00179] Num frames 3000... -[2024-12-19 00:14:11,293][00179] Num frames 3100... -[2024-12-19 00:14:11,410][00179] Num frames 3200... -[2024-12-19 00:14:11,544][00179] Avg episode rewards: #0: 12.134, true rewards: #0: 6.534 -[2024-12-19 00:14:11,546][00179] Avg episode reward: 12.134, avg true_objective: 6.534 -[2024-12-19 00:14:11,593][00179] Num frames 3300... -[2024-12-19 00:14:11,709][00179] Num frames 3400... -[2024-12-19 00:14:11,837][00179] Num frames 3500... -[2024-12-19 00:14:11,958][00179] Num frames 3600... -[2024-12-19 00:14:12,076][00179] Num frames 3700... -[2024-12-19 00:14:12,197][00179] Num frames 3800... -[2024-12-19 00:14:12,314][00179] Num frames 3900... -[2024-12-19 00:14:12,436][00179] Num frames 4000... -[2024-12-19 00:14:12,566][00179] Num frames 4100... -[2024-12-19 00:14:12,696][00179] Avg episode rewards: #0: 13.438, true rewards: #0: 6.938 -[2024-12-19 00:14:12,698][00179] Avg episode reward: 13.438, avg true_objective: 6.938 -[2024-12-19 00:14:12,742][00179] Num frames 4200... -[2024-12-19 00:14:12,869][00179] Num frames 4300... -[2024-12-19 00:14:12,989][00179] Num frames 4400... -[2024-12-19 00:14:13,104][00179] Num frames 4500... -[2024-12-19 00:14:13,224][00179] Num frames 4600... -[2024-12-19 00:14:13,343][00179] Num frames 4700... -[2024-12-19 00:14:13,462][00179] Num frames 4800... -[2024-12-19 00:14:13,593][00179] Num frames 4900... -[2024-12-19 00:14:13,711][00179] Num frames 5000... -[2024-12-19 00:14:13,839][00179] Num frames 5100... -[2024-12-19 00:14:13,960][00179] Num frames 5200... -[2024-12-19 00:14:14,080][00179] Num frames 5300... -[2024-12-19 00:14:14,198][00179] Num frames 5400... -[2024-12-19 00:14:14,318][00179] Num frames 5500... -[2024-12-19 00:14:14,384][00179] Avg episode rewards: #0: 16.296, true rewards: #0: 7.867 -[2024-12-19 00:14:14,385][00179] Avg episode reward: 16.296, avg true_objective: 7.867 -[2024-12-19 00:14:14,499][00179] Num frames 5600... -[2024-12-19 00:14:14,633][00179] Num frames 5700... -[2024-12-19 00:14:14,752][00179] Num frames 5800... -[2024-12-19 00:14:14,881][00179] Num frames 5900... -[2024-12-19 00:14:15,002][00179] Num frames 6000... -[2024-12-19 00:14:15,121][00179] Num frames 6100... -[2024-12-19 00:14:15,243][00179] Num frames 6200... -[2024-12-19 00:14:15,373][00179] Num frames 6300... -[2024-12-19 00:14:15,499][00179] Num frames 6400... -[2024-12-19 00:14:15,628][00179] Num frames 6500... -[2024-12-19 00:14:15,749][00179] Num frames 6600... -[2024-12-19 00:14:15,871][00179] Num frames 6700... -[2024-12-19 00:14:15,999][00179] Num frames 6800... -[2024-12-19 00:14:16,117][00179] Avg episode rewards: #0: 17.939, true rewards: #0: 8.564 -[2024-12-19 00:14:16,119][00179] Avg episode reward: 17.939, avg true_objective: 8.564 -[2024-12-19 00:14:16,180][00179] Num frames 6900... -[2024-12-19 00:14:16,299][00179] Num frames 7000... -[2024-12-19 00:14:16,422][00179] Num frames 7100... -[2024-12-19 00:14:16,545][00179] Num frames 7200... -[2024-12-19 00:14:16,683][00179] Num frames 7300... -[2024-12-19 00:14:16,849][00179] Avg episode rewards: #0: 17.515, true rewards: #0: 8.181 -[2024-12-19 00:14:16,851][00179] Avg episode reward: 17.515, avg true_objective: 8.181 -[2024-12-19 00:14:16,929][00179] Num frames 7400... -[2024-12-19 00:14:17,097][00179] Num frames 7500... -[2024-12-19 00:14:17,262][00179] Num frames 7600... -[2024-12-19 00:14:17,430][00179] Num frames 7700... -[2024-12-19 00:14:17,602][00179] Num frames 7800... -[2024-12-19 00:14:17,759][00179] Num frames 7900... -[2024-12-19 00:14:17,919][00179] Num frames 8000... -[2024-12-19 00:14:18,092][00179] Num frames 8100... -[2024-12-19 00:14:18,213][00179] Avg episode rewards: #0: 17.337, true rewards: #0: 8.137 -[2024-12-19 00:14:18,215][00179] Avg episode reward: 17.337, avg true_objective: 8.137 -[2024-12-19 00:15:07,796][00179] Replay video saved to /content/train_dir/default_experiment/replay.mp4! + wait_policy_total: 792.7970 +update_model: 16.6535 + weight_update: 0.0059 +one_step: 0.0145 + handle_policy_step: 1117.5331 + deserialize: 27.8088, stack: 6.3569, obs_to_device_normalize: 238.7164, forward: 559.1798, send_messages: 55.3780 + prepare_outputs: 172.9525 + to_cpu: 104.8202 +[2024-12-19 10:52:44,963][07135] Learner 0 profile tree view: +misc: 0.0094, prepare_batch: 23.0470 +train: 146.1731 + epoch_init: 0.0199, minibatch_init: 0.0261, losses_postprocess: 1.2230, kl_divergence: 1.1753, after_optimizer: 6.1288 + calculate_losses: 56.0662 + losses_init: 0.0107, forward_head: 2.2799, bptt_initial: 39.3433, tail: 2.1034, advantages_returns: 0.5457, losses: 7.3361 + bptt: 3.8160 + bptt_forward_core: 3.6135 + update: 80.2685 + clip: 1.5897 +[2024-12-19 10:52:44,965][07135] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.6654, enqueue_policy_requests: 187.2612, env_step: 1591.3768, overhead: 24.4357, complete_rollouts: 14.0090 +save_policy_outputs: 38.7890 + split_output_tensors: 15.6078 +[2024-12-19 10:52:44,966][07135] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.6041, enqueue_policy_requests: 190.3086, env_step: 1581.1314, overhead: 25.4398, complete_rollouts: 12.5946 +save_policy_outputs: 39.4126 + split_output_tensors: 15.9723 +[2024-12-19 10:52:44,968][07135] Loop Runner_EvtLoop terminating... +[2024-12-19 10:52:44,969][07135] Runner profile tree view: +main_loop: 2034.0511 +[2024-12-19 10:52:44,971][07135] Collected {0: 10006528}, FPS: 3791.8 +[2024-12-19 10:57:08,070][07135] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 10:57:08,071][07135] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-19 10:57:08,073][07135] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-19 10:57:08,075][07135] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-19 10:57:08,076][07135] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 10:57:08,077][07135] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-19 10:57:08,080][07135] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 10:57:08,080][07135] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-19 10:57:08,083][07135] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-12-19 10:57:08,084][07135] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-12-19 10:57:08,085][07135] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-19 10:57:08,086][07135] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-19 10:57:08,088][07135] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-19 10:57:08,089][07135] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-19 10:57:08,090][07135] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-19 10:57:08,125][07135] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 10:57:08,127][07135] RunningMeanStd input shape: (1,) +[2024-12-19 10:57:08,142][07135] ConvEncoder: input_channels=3 +[2024-12-19 10:57:08,179][07135] Conv encoder output size: 512 +[2024-12-19 10:57:08,182][07135] Policy head output size: 512 +[2024-12-19 10:57:08,202][07135] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-12-19 10:57:08,630][07135] Num frames 100... +[2024-12-19 10:57:08,749][07135] Num frames 200... +[2024-12-19 10:57:08,873][07135] Num frames 300... +[2024-12-19 10:57:08,992][07135] Avg episode rewards: #0: 4.520, true rewards: #0: 3.520 +[2024-12-19 10:57:08,993][07135] Avg episode reward: 4.520, avg true_objective: 3.520 +[2024-12-19 10:57:09,053][07135] Num frames 400... +[2024-12-19 10:57:09,176][07135] Num frames 500... +[2024-12-19 10:57:09,300][07135] Num frames 600... +[2024-12-19 10:57:09,427][07135] Num frames 700... +[2024-12-19 10:57:09,562][07135] Num frames 800... +[2024-12-19 10:57:09,615][07135] Avg episode rewards: #0: 6.000, true rewards: #0: 4.000 +[2024-12-19 10:57:09,616][07135] Avg episode reward: 6.000, avg true_objective: 4.000 +[2024-12-19 10:57:09,738][07135] Num frames 900... +[2024-12-19 10:57:09,859][07135] Num frames 1000... +[2024-12-19 10:57:09,978][07135] Num frames 1100... +[2024-12-19 10:57:10,096][07135] Num frames 1200... +[2024-12-19 10:57:10,208][07135] Avg episode rewards: #0: 5.827, true rewards: #0: 4.160 +[2024-12-19 10:57:10,209][07135] Avg episode reward: 5.827, avg true_objective: 4.160 +[2024-12-19 10:57:10,277][07135] Num frames 1300... +[2024-12-19 10:57:10,394][07135] Num frames 1400... +[2024-12-19 10:57:10,534][07135] Num frames 1500... +[2024-12-19 10:57:10,656][07135] Num frames 1600... +[2024-12-19 10:57:10,750][07135] Avg episode rewards: #0: 5.330, true rewards: #0: 4.080 +[2024-12-19 10:57:10,752][07135] Avg episode reward: 5.330, avg true_objective: 4.080 +[2024-12-19 10:57:10,838][07135] Num frames 1700... +[2024-12-19 10:57:10,960][07135] Num frames 1800... +[2024-12-19 10:57:11,082][07135] Num frames 1900... +[2024-12-19 10:57:11,202][07135] Num frames 2000... +[2024-12-19 10:57:11,315][07135] Avg episode rewards: #0: 5.296, true rewards: #0: 4.096 +[2024-12-19 10:57:11,317][07135] Avg episode reward: 5.296, avg true_objective: 4.096 +[2024-12-19 10:57:11,382][07135] Num frames 2100... +[2024-12-19 10:57:11,514][07135] Num frames 2200... +[2024-12-19 10:57:11,642][07135] Num frames 2300... +[2024-12-19 10:57:11,764][07135] Num frames 2400... +[2024-12-19 10:57:11,899][07135] Avg episode rewards: #0: 5.107, true rewards: #0: 4.107 +[2024-12-19 10:57:11,900][07135] Avg episode reward: 5.107, avg true_objective: 4.107 +[2024-12-19 10:57:11,948][07135] Num frames 2500... +[2024-12-19 10:57:12,071][07135] Num frames 2600... +[2024-12-19 10:57:12,192][07135] Num frames 2700... +[2024-12-19 10:57:12,314][07135] Num frames 2800... +[2024-12-19 10:57:12,437][07135] Num frames 2900... +[2024-12-19 10:57:12,507][07135] Avg episode rewards: #0: 5.160, true rewards: #0: 4.160 +[2024-12-19 10:57:12,510][07135] Avg episode reward: 5.160, avg true_objective: 4.160 +[2024-12-19 10:57:12,623][07135] Num frames 3000... +[2024-12-19 10:57:12,744][07135] Num frames 3100... +[2024-12-19 10:57:12,868][07135] Num frames 3200... +[2024-12-19 10:57:13,037][07135] Avg episode rewards: #0: 4.995, true rewards: #0: 4.120 +[2024-12-19 10:57:13,038][07135] Avg episode reward: 4.995, avg true_objective: 4.120 +[2024-12-19 10:57:13,048][07135] Num frames 3300... +[2024-12-19 10:57:13,167][07135] Num frames 3400... +[2024-12-19 10:57:13,288][07135] Num frames 3500... +[2024-12-19 10:57:13,413][07135] Num frames 3600... +[2024-12-19 10:57:13,607][07135] Avg episode rewards: #0: 4.867, true rewards: #0: 4.089 +[2024-12-19 10:57:13,609][07135] Avg episode reward: 4.867, avg true_objective: 4.089 +[2024-12-19 10:57:13,636][07135] Num frames 3700... +[2024-12-19 10:57:13,763][07135] Num frames 3800... +[2024-12-19 10:57:13,934][07135] Num frames 3900... +[2024-12-19 10:57:14,104][07135] Num frames 4000... +[2024-12-19 10:57:14,269][07135] Num frames 4100... +[2024-12-19 10:57:14,372][07135] Avg episode rewards: #0: 4.928, true rewards: #0: 4.128 +[2024-12-19 10:57:14,375][07135] Avg episode reward: 4.928, avg true_objective: 4.128 +[2024-12-19 10:57:37,228][07135] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-12-19 11:03:33,357][07135] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 11:03:33,358][07135] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-19 11:03:33,360][07135] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-19 11:03:33,361][07135] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-19 11:03:33,362][07135] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 11:03:33,364][07135] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-19 11:03:33,365][07135] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-12-19 11:03:33,366][07135] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-19 11:03:33,367][07135] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-12-19 11:03:33,368][07135] Adding new argument 'hf_repository'='ThomasSimonini/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-12-19 11:03:33,369][07135] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-19 11:03:33,370][07135] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-19 11:03:33,372][07135] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-19 11:03:33,373][07135] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-19 11:03:33,374][07135] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-19 11:03:33,404][07135] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 11:03:33,406][07135] RunningMeanStd input shape: (1,) +[2024-12-19 11:03:33,419][07135] ConvEncoder: input_channels=3 +[2024-12-19 11:03:33,458][07135] Conv encoder output size: 512 +[2024-12-19 11:03:33,460][07135] Policy head output size: 512 +[2024-12-19 11:03:33,478][07135] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-12-19 11:03:33,922][07135] Num frames 100... +[2024-12-19 11:03:34,048][07135] Num frames 200... +[2024-12-19 11:03:34,170][07135] Num frames 300... +[2024-12-19 11:03:34,295][07135] Num frames 400... +[2024-12-19 11:03:34,447][07135] Avg episode rewards: #0: 6.800, true rewards: #0: 4.800 +[2024-12-19 11:03:34,448][07135] Avg episode reward: 6.800, avg true_objective: 4.800 +[2024-12-19 11:03:34,475][07135] Num frames 500... +[2024-12-19 11:03:34,604][07135] Num frames 600... +[2024-12-19 11:03:34,745][07135] Avg episode rewards: #0: 4.815, true rewards: #0: 3.315 +[2024-12-19 11:03:34,746][07135] Avg episode reward: 4.815, avg true_objective: 3.315 +[2024-12-19 11:03:34,794][07135] Num frames 700... +[2024-12-19 11:03:34,911][07135] Num frames 800... +[2024-12-19 11:03:35,027][07135] Num frames 900... +[2024-12-19 11:03:35,143][07135] Num frames 1000... +[2024-12-19 11:03:35,254][07135] Avg episode rewards: #0: 4.490, true rewards: #0: 3.490 +[2024-12-19 11:03:35,255][07135] Avg episode reward: 4.490, avg true_objective: 3.490 +[2024-12-19 11:03:35,326][07135] Num frames 1100... +[2024-12-19 11:03:35,443][07135] Num frames 1200... +[2024-12-19 11:03:35,577][07135] Num frames 1300... +[2024-12-19 11:03:35,762][07135] Num frames 1400... +[2024-12-19 11:03:35,875][07135] Avg episode rewards: #0: 4.328, true rewards: #0: 3.577 +[2024-12-19 11:03:35,876][07135] Avg episode reward: 4.328, avg true_objective: 3.577 +[2024-12-19 11:03:35,997][07135] Num frames 1500... +[2024-12-19 11:03:36,164][07135] Num frames 1600... +[2024-12-19 11:03:36,328][07135] Num frames 1700... +[2024-12-19 11:03:36,493][07135] Num frames 1800... +[2024-12-19 11:03:36,578][07135] Avg episode rewards: #0: 4.230, true rewards: #0: 3.630 +[2024-12-19 11:03:36,580][07135] Avg episode reward: 4.230, avg true_objective: 3.630 +[2024-12-19 11:03:36,728][07135] Num frames 1900... +[2024-12-19 11:03:36,910][07135] Num frames 2000... +[2024-12-19 11:03:37,075][07135] Num frames 2100... +[2024-12-19 11:03:37,290][07135] Avg episode rewards: #0: 4.165, true rewards: #0: 3.665 +[2024-12-19 11:03:37,293][07135] Avg episode reward: 4.165, avg true_objective: 3.665 +[2024-12-19 11:03:37,296][07135] Num frames 2200... +[2024-12-19 11:03:37,466][07135] Num frames 2300... +[2024-12-19 11:03:37,659][07135] Num frames 2400... +[2024-12-19 11:03:37,836][07135] Num frames 2500... +[2024-12-19 11:03:38,037][07135] Avg episode rewards: #0: 4.119, true rewards: #0: 3.690 +[2024-12-19 11:03:38,039][07135] Avg episode reward: 4.119, avg true_objective: 3.690 +[2024-12-19 11:03:38,062][07135] Num frames 2600... +[2024-12-19 11:03:38,186][07135] Num frames 2700... +[2024-12-19 11:03:38,308][07135] Num frames 2800... +[2024-12-19 11:03:38,433][07135] Num frames 2900... +[2024-12-19 11:03:38,577][07135] Avg episode rewards: #0: 4.084, true rewards: #0: 3.709 +[2024-12-19 11:03:38,579][07135] Avg episode reward: 4.084, avg true_objective: 3.709 +[2024-12-19 11:03:38,621][07135] Num frames 3000... +[2024-12-19 11:03:38,750][07135] Num frames 3100... +[2024-12-19 11:03:38,882][07135] Num frames 3200... +[2024-12-19 11:03:39,004][07135] Num frames 3300... +[2024-12-19 11:03:39,120][07135] Avg episode rewards: #0: 4.057, true rewards: #0: 3.723 +[2024-12-19 11:03:39,121][07135] Avg episode reward: 4.057, avg true_objective: 3.723 +[2024-12-19 11:03:39,182][07135] Num frames 3400... +[2024-12-19 11:03:39,304][07135] Num frames 3500... +[2024-12-19 11:03:39,427][07135] Num frames 3600... +[2024-12-19 11:03:39,558][07135] Num frames 3700... +[2024-12-19 11:03:39,683][07135] Num frames 3800... +[2024-12-19 11:03:39,809][07135] Num frames 3900... +[2024-12-19 11:03:39,948][07135] Avg episode rewards: #0: 4.559, true rewards: #0: 3.959 +[2024-12-19 11:03:39,950][07135] Avg episode reward: 4.559, avg true_objective: 3.959 +[2024-12-19 11:03:57,893][07135] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-12-19 11:05:06,808][07135] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 11:05:06,810][07135] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-19 11:05:06,812][07135] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-19 11:05:06,814][07135] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-19 11:05:06,816][07135] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 11:05:06,817][07135] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-19 11:05:06,819][07135] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-12-19 11:05:06,821][07135] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-19 11:05:06,822][07135] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-12-19 11:05:06,823][07135] Adding new argument 'hf_repository'='Esteabn00007/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-12-19 11:05:06,824][07135] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-19 11:05:06,824][07135] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-19 11:05:06,825][07135] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-19 11:05:06,826][07135] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-19 11:05:06,827][07135] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-19 11:05:06,860][07135] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 11:05:06,862][07135] RunningMeanStd input shape: (1,) +[2024-12-19 11:05:06,875][07135] ConvEncoder: input_channels=3 +[2024-12-19 11:05:06,912][07135] Conv encoder output size: 512 +[2024-12-19 11:05:06,914][07135] Policy head output size: 512 +[2024-12-19 11:05:06,932][07135] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-12-19 11:05:07,339][07135] Num frames 100... +[2024-12-19 11:05:07,459][07135] Num frames 200... +[2024-12-19 11:05:07,602][07135] Num frames 300... +[2024-12-19 11:05:07,726][07135] Num frames 400... +[2024-12-19 11:05:07,801][07135] Avg episode rewards: #0: 5.160, true rewards: #0: 4.160 +[2024-12-19 11:05:07,803][07135] Avg episode reward: 5.160, avg true_objective: 4.160 +[2024-12-19 11:05:07,905][07135] Num frames 500... +[2024-12-19 11:05:08,025][07135] Num frames 600... +[2024-12-19 11:05:08,156][07135] Num frames 700... +[2024-12-19 11:05:08,281][07135] Num frames 800... +[2024-12-19 11:05:08,334][07135] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000 +[2024-12-19 11:05:08,336][07135] Avg episode reward: 4.500, avg true_objective: 4.000 +[2024-12-19 11:05:08,463][07135] Num frames 900... +[2024-12-19 11:05:08,599][07135] Num frames 1000... +[2024-12-19 11:05:08,725][07135] Num frames 1100... +[2024-12-19 11:05:08,856][07135] Num frames 1200... +[2024-12-19 11:05:08,970][07135] Avg episode rewards: #0: 4.827, true rewards: #0: 4.160 +[2024-12-19 11:05:08,972][07135] Avg episode reward: 4.827, avg true_objective: 4.160 +[2024-12-19 11:05:09,039][07135] Num frames 1300... +[2024-12-19 11:05:09,161][07135] Num frames 1400... +[2024-12-19 11:05:09,289][07135] Num frames 1500... +[2024-12-19 11:05:09,412][07135] Num frames 1600... +[2024-12-19 11:05:09,507][07135] Avg episode rewards: #0: 4.580, true rewards: #0: 4.080 +[2024-12-19 11:05:09,508][07135] Avg episode reward: 4.580, avg true_objective: 4.080 +[2024-12-19 11:05:09,602][07135] Num frames 1700... +[2024-12-19 11:05:09,729][07135] Num frames 1800... +[2024-12-19 11:05:09,858][07135] Num frames 1900... +[2024-12-19 11:05:09,981][07135] Num frames 2000... +[2024-12-19 11:05:10,057][07135] Avg episode rewards: #0: 4.432, true rewards: #0: 4.032 +[2024-12-19 11:05:10,058][07135] Avg episode reward: 4.432, avg true_objective: 4.032 +[2024-12-19 11:05:10,209][07135] Num frames 2100... +[2024-12-19 11:05:10,378][07135] Num frames 2200... +[2024-12-19 11:05:10,559][07135] Num frames 2300... +[2024-12-19 11:05:10,732][07135] Num frames 2400... +[2024-12-19 11:05:10,785][07135] Avg episode rewards: #0: 4.333, true rewards: #0: 4.000 +[2024-12-19 11:05:10,787][07135] Avg episode reward: 4.333, avg true_objective: 4.000 +[2024-12-19 11:05:10,969][07135] Num frames 2500... +[2024-12-19 11:05:11,135][07135] Num frames 2600... +[2024-12-19 11:05:11,300][07135] Num frames 2700... +[2024-12-19 11:05:11,486][07135] Num frames 2800... +[2024-12-19 11:05:11,686][07135] Avg episode rewards: #0: 4.686, true rewards: #0: 4.114 +[2024-12-19 11:05:11,689][07135] Avg episode reward: 4.686, avg true_objective: 4.114 +[2024-12-19 11:05:11,727][07135] Num frames 2900... +[2024-12-19 11:05:11,904][07135] Num frames 3000... +[2024-12-19 11:05:12,083][07135] Num frames 3100... +[2024-12-19 11:05:12,259][07135] Num frames 3200... +[2024-12-19 11:05:12,432][07135] Num frames 3300... +[2024-12-19 11:05:12,538][07135] Avg episode rewards: #0: 4.785, true rewards: #0: 4.160 +[2024-12-19 11:05:12,540][07135] Avg episode reward: 4.785, avg true_objective: 4.160 +[2024-12-19 11:05:12,641][07135] Num frames 3400... +[2024-12-19 11:05:12,763][07135] Num frames 3500... +[2024-12-19 11:05:12,888][07135] Num frames 3600... +[2024-12-19 11:05:13,018][07135] Num frames 3700... +[2024-12-19 11:05:13,128][07135] Avg episode rewards: #0: 4.716, true rewards: #0: 4.160 +[2024-12-19 11:05:13,130][07135] Avg episode reward: 4.716, avg true_objective: 4.160 +[2024-12-19 11:05:13,198][07135] Num frames 3800... +[2024-12-19 11:05:13,317][07135] Num frames 3900... +[2024-12-19 11:05:13,437][07135] Num frames 4000... +[2024-12-19 11:05:13,574][07135] Num frames 4100... +[2024-12-19 11:05:13,664][07135] Avg episode rewards: #0: 4.628, true rewards: #0: 4.128 +[2024-12-19 11:05:13,666][07135] Avg episode reward: 4.628, avg true_objective: 4.128 +[2024-12-19 11:05:32,918][07135] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-12-19 11:05:47,463][07135] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-19 11:05:47,465][07135] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-19 11:05:47,468][07135] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-19 11:05:47,470][07135] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-19 11:05:47,472][07135] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-19 11:05:47,474][07135] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-19 11:05:47,476][07135] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-12-19 11:05:47,478][07135] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-19 11:05:47,479][07135] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-12-19 11:05:47,481][07135] Adding new argument 'hf_repository'='Esteban00007/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-12-19 11:05:47,482][07135] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-19 11:05:47,483][07135] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-19 11:05:47,484][07135] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-19 11:05:47,485][07135] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-19 11:05:47,486][07135] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-19 11:05:47,539][07135] RunningMeanStd input shape: (3, 72, 128) +[2024-12-19 11:05:47,544][07135] RunningMeanStd input shape: (1,) +[2024-12-19 11:05:47,578][07135] ConvEncoder: input_channels=3 +[2024-12-19 11:05:47,640][07135] Conv encoder output size: 512 +[2024-12-19 11:05:47,642][07135] Policy head output size: 512 +[2024-12-19 11:05:47,672][07135] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-12-19 11:05:48,290][07135] Num frames 100... +[2024-12-19 11:05:48,472][07135] Num frames 200... +[2024-12-19 11:05:48,637][07135] Num frames 300... +[2024-12-19 11:05:48,832][07135] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840 +[2024-12-19 11:05:48,835][07135] Avg episode reward: 3.840, avg true_objective: 3.840 +[2024-12-19 11:05:48,864][07135] Num frames 400... +[2024-12-19 11:05:49,038][07135] Num frames 500... +[2024-12-19 11:05:49,210][07135] Num frames 600... +[2024-12-19 11:05:49,391][07135] Num frames 700... +[2024-12-19 11:05:49,573][07135] Num frames 800... +[2024-12-19 11:05:49,626][07135] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000 +[2024-12-19 11:05:49,629][07135] Avg episode reward: 4.500, avg true_objective: 4.000 +[2024-12-19 11:05:49,808][07135] Num frames 900... +[2024-12-19 11:05:49,951][07135] Num frames 1000... +[2024-12-19 11:05:50,082][07135] Num frames 1100... +[2024-12-19 11:05:50,243][07135] Avg episode rewards: #0: 4.280, true rewards: #0: 3.947 +[2024-12-19 11:05:50,245][07135] Avg episode reward: 4.280, avg true_objective: 3.947 +[2024-12-19 11:05:50,269][07135] Num frames 1200... +[2024-12-19 11:05:50,398][07135] Num frames 1300... +[2024-12-19 11:05:50,530][07135] Num frames 1400... +[2024-12-19 11:05:50,660][07135] Num frames 1500... +[2024-12-19 11:05:50,784][07135] Num frames 1600... +[2024-12-19 11:05:50,880][07135] Avg episode rewards: #0: 4.830, true rewards: #0: 4.080 +[2024-12-19 11:05:50,882][07135] Avg episode reward: 4.830, avg true_objective: 4.080 +[2024-12-19 11:05:50,968][07135] Num frames 1700... +[2024-12-19 11:05:51,094][07135] Num frames 1800... +[2024-12-19 11:05:51,223][07135] Num frames 1900... +[2024-12-19 11:05:51,346][07135] Num frames 2000... +[2024-12-19 11:05:51,421][07135] Avg episode rewards: #0: 4.632, true rewards: #0: 4.032 +[2024-12-19 11:05:51,423][07135] Avg episode reward: 4.632, avg true_objective: 4.032 +[2024-12-19 11:05:51,535][07135] Num frames 2100... +[2024-12-19 11:05:51,657][07135] Num frames 2200... +[2024-12-19 11:05:51,778][07135] Num frames 2300... +[2024-12-19 11:05:51,907][07135] Num frames 2400... +[2024-12-19 11:05:51,961][07135] Avg episode rewards: #0: 4.500, true rewards: #0: 4.000 +[2024-12-19 11:05:51,962][07135] Avg episode reward: 4.500, avg true_objective: 4.000 +[2024-12-19 11:05:52,087][07135] Num frames 2500... +[2024-12-19 11:05:52,218][07135] Num frames 2600... +[2024-12-19 11:05:52,340][07135] Num frames 2700... +[2024-12-19 11:05:52,497][07135] Avg episode rewards: #0: 4.406, true rewards: #0: 3.977 +[2024-12-19 11:05:52,500][07135] Avg episode reward: 4.406, avg true_objective: 3.977 +[2024-12-19 11:05:52,529][07135] Num frames 2800... +[2024-12-19 11:05:52,655][07135] Num frames 2900... +[2024-12-19 11:05:52,767][07135] Avg episode rewards: #0: 4.058, true rewards: #0: 3.682 +[2024-12-19 11:05:52,768][07135] Avg episode reward: 4.058, avg true_objective: 3.682 +[2024-12-19 11:05:52,839][07135] Num frames 3000... +[2024-12-19 11:05:52,972][07135] Num frames 3100... +[2024-12-19 11:05:53,091][07135] Num frames 3200... +[2024-12-19 11:05:53,222][07135] Num frames 3300... +[2024-12-19 11:05:53,313][07135] Avg episode rewards: #0: 4.033, true rewards: #0: 3.700 +[2024-12-19 11:05:53,315][07135] Avg episode reward: 4.033, avg true_objective: 3.700 +[2024-12-19 11:05:53,401][07135] Num frames 3400... +[2024-12-19 11:05:53,534][07135] Num frames 3500... +[2024-12-19 11:05:53,657][07135] Num frames 3600... +[2024-12-19 11:05:53,780][07135] Num frames 3700... +[2024-12-19 11:05:53,932][07135] Avg episode rewards: #0: 4.178, true rewards: #0: 3.778 +[2024-12-19 11:05:53,933][07135] Avg episode reward: 4.178, avg true_objective: 3.778 +[2024-12-19 11:06:11,341][07135] Replay video saved to /content/train_dir/default_experiment/replay.mp4!