diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1329,3 +1329,1319 @@ main_loop: 1158.9681 [2024-09-18 11:43:58,360][00268] Avg episode rewards: #0: 28.408, true rewards: #0: 11.708 [2024-09-18 11:43:58,361][00268] Avg episode reward: 28.408, avg true_objective: 11.708 [2024-09-18 11:45:12,588][00268] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-18 11:45:17,605][00268] The model has been pushed to https://huggingface.co./mkdem/rl_course_vizdoom_health_gathering_supreme +[2024-09-18 11:46:31,646][00268] Environment doom_basic already registered, overwriting... +[2024-09-18 11:46:31,649][00268] Environment doom_two_colors_easy already registered, overwriting... +[2024-09-18 11:46:31,651][00268] Environment doom_two_colors_hard already registered, overwriting... +[2024-09-18 11:46:31,654][00268] Environment doom_dm already registered, overwriting... +[2024-09-18 11:46:31,655][00268] Environment doom_dwango5 already registered, overwriting... +[2024-09-18 11:46:31,658][00268] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-09-18 11:46:31,659][00268] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-09-18 11:46:31,661][00268] Environment doom_my_way_home already registered, overwriting... +[2024-09-18 11:46:31,665][00268] Environment doom_deadly_corridor already registered, overwriting... +[2024-09-18 11:46:31,667][00268] Environment doom_defend_the_center already registered, overwriting... +[2024-09-18 11:46:31,669][00268] Environment doom_defend_the_line already registered, overwriting... +[2024-09-18 11:46:31,670][00268] Environment doom_health_gathering already registered, overwriting... +[2024-09-18 11:46:31,671][00268] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-09-18 11:46:31,672][00268] Environment doom_battle already registered, overwriting... +[2024-09-18 11:46:31,673][00268] Environment doom_battle2 already registered, overwriting... +[2024-09-18 11:46:31,674][00268] Environment doom_duel_bots already registered, overwriting... +[2024-09-18 11:46:31,675][00268] Environment doom_deathmatch_bots already registered, overwriting... +[2024-09-18 11:46:31,676][00268] Environment doom_duel already registered, overwriting... +[2024-09-18 11:46:31,677][00268] Environment doom_deathmatch_full already registered, overwriting... +[2024-09-18 11:46:31,678][00268] Environment doom_benchmark already registered, overwriting... +[2024-09-18 11:46:31,679][00268] register_encoder_factory: +[2024-09-18 11:46:31,704][00268] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-18 11:46:31,705][00268] Overriding arg 'train_for_env_steps' with value 8000000 passed from command line +[2024-09-18 11:46:31,712][00268] Experiment dir /content/train_dir/default_experiment already exists! +[2024-09-18 11:46:31,713][00268] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-09-18 11:46:31,714][00268] Weights and Biases integration disabled +[2024-09-18 11:46:31,718][00268] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2024-09-18 11:46:33,229][00268] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=8000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-09-18 11:46:33,231][00268] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-18 11:46:33,236][00268] Rollout worker 0 uses device cpu +[2024-09-18 11:46:33,237][00268] Rollout worker 1 uses device cpu +[2024-09-18 11:46:33,239][00268] Rollout worker 2 uses device cpu +[2024-09-18 11:46:33,240][00268] Rollout worker 3 uses device cpu +[2024-09-18 11:46:33,241][00268] Rollout worker 4 uses device cpu +[2024-09-18 11:46:33,242][00268] Rollout worker 5 uses device cpu +[2024-09-18 11:46:33,243][00268] Rollout worker 6 uses device cpu +[2024-09-18 11:46:33,245][00268] Rollout worker 7 uses device cpu +[2024-09-18 11:46:33,353][00268] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-18 11:46:33,354][00268] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-18 11:46:33,389][00268] Starting all processes... +[2024-09-18 11:46:33,391][00268] Starting process learner_proc0 +[2024-09-18 11:46:33,439][00268] Starting all processes... +[2024-09-18 11:46:33,445][00268] Starting process inference_proc0-0 +[2024-09-18 11:46:33,446][00268] Starting process rollout_proc0 +[2024-09-18 11:46:33,452][00268] Starting process rollout_proc1 +[2024-09-18 11:46:33,454][00268] Starting process rollout_proc2 +[2024-09-18 11:46:33,454][00268] Starting process rollout_proc3 +[2024-09-18 11:46:33,454][00268] Starting process rollout_proc4 +[2024-09-18 11:46:33,454][00268] Starting process rollout_proc5 +[2024-09-18 11:46:33,455][00268] Starting process rollout_proc6 +[2024-09-18 11:46:33,455][00268] Starting process rollout_proc7 +[2024-09-18 11:46:44,326][11908] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-18 11:46:44,329][11908] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-09-18 11:46:44,403][11908] Num visible devices: 1 +[2024-09-18 11:46:44,441][11908] Starting seed is not provided +[2024-09-18 11:46:44,442][11908] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-18 11:46:44,443][11908] Initializing actor-critic model on device cuda:0 +[2024-09-18 11:46:44,444][11908] RunningMeanStd input shape: (3, 72, 128) +[2024-09-18 11:46:44,451][11908] RunningMeanStd input shape: (1,) +[2024-09-18 11:46:44,529][11908] ConvEncoder: input_channels=3 +[2024-09-18 11:46:45,289][11922] Worker 0 uses CPU cores [0] +[2024-09-18 11:46:45,419][11925] Worker 3 uses CPU cores [1] +[2024-09-18 11:46:45,473][11921] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-18 11:46:45,479][11921] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-09-18 11:46:45,511][11908] Conv encoder output size: 512 +[2024-09-18 11:46:45,513][11908] Policy head output size: 512 +[2024-09-18 11:46:45,564][11921] Num visible devices: 1 +[2024-09-18 11:46:45,602][11927] Worker 6 uses CPU cores [0] +[2024-09-18 11:46:45,604][11908] Created Actor Critic model with architecture: +[2024-09-18 11:46:45,610][11908] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-09-18 11:46:45,682][11928] Worker 5 uses CPU cores [1] +[2024-09-18 11:46:45,712][11924] Worker 2 uses CPU cores [0] +[2024-09-18 11:46:45,724][11929] Worker 7 uses CPU cores [1] +[2024-09-18 11:46:45,730][11926] Worker 4 uses CPU cores [0] +[2024-09-18 11:46:45,744][11923] Worker 1 uses CPU cores [1] +[2024-09-18 11:46:47,399][11908] Using optimizer +[2024-09-18 11:46:47,400][11908] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-09-18 11:46:47,436][11908] Loading model from checkpoint +[2024-09-18 11:46:47,441][11908] Loaded experiment state at self.train_step=978, self.env_steps=4005888 +[2024-09-18 11:46:47,441][11908] Initialized policy 0 weights for model version 978 +[2024-09-18 11:46:47,444][11908] LearnerWorker_p0 finished initialization! +[2024-09-18 11:46:47,445][11908] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-18 11:46:47,637][11921] RunningMeanStd input shape: (3, 72, 128) +[2024-09-18 11:46:47,638][11921] RunningMeanStd input shape: (1,) +[2024-09-18 11:46:47,650][11921] ConvEncoder: input_channels=3 +[2024-09-18 11:46:47,750][11921] Conv encoder output size: 512 +[2024-09-18 11:46:47,750][11921] Policy head output size: 512 +[2024-09-18 11:46:49,350][00268] Inference worker 0-0 is ready! +[2024-09-18 11:46:49,352][00268] All inference workers are ready! Signal rollout workers to start! +[2024-09-18 11:46:49,474][11929] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:49,474][11923] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:49,480][11928] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:49,482][11925] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:49,558][11922] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:49,586][11927] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:49,588][11926] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:49,619][11924] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-18 11:46:50,920][11927] Decorrelating experience for 0 frames... +[2024-09-18 11:46:50,922][11926] Decorrelating experience for 0 frames... +[2024-09-18 11:46:50,924][11922] Decorrelating experience for 0 frames... +[2024-09-18 11:46:50,919][11925] Decorrelating experience for 0 frames... +[2024-09-18 11:46:50,924][11928] Decorrelating experience for 0 frames... +[2024-09-18 11:46:50,928][11929] Decorrelating experience for 0 frames... +[2024-09-18 11:46:51,688][11928] Decorrelating experience for 32 frames... +[2024-09-18 11:46:51,698][11929] Decorrelating experience for 32 frames... +[2024-09-18 11:46:51,719][00268] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4005888. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-18 11:46:52,174][11922] Decorrelating experience for 32 frames... +[2024-09-18 11:46:52,180][11927] Decorrelating experience for 32 frames... +[2024-09-18 11:46:52,185][11926] Decorrelating experience for 32 frames... +[2024-09-18 11:46:53,088][11923] Decorrelating experience for 0 frames... +[2024-09-18 11:46:53,222][11929] Decorrelating experience for 64 frames... +[2024-09-18 11:46:53,265][11924] Decorrelating experience for 0 frames... +[2024-09-18 11:46:53,344][00268] Heartbeat connected on Batcher_0 +[2024-09-18 11:46:53,348][00268] Heartbeat connected on LearnerWorker_p0 +[2024-09-18 11:46:53,385][00268] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-18 11:46:53,804][11922] Decorrelating experience for 64 frames... +[2024-09-18 11:46:53,830][11926] Decorrelating experience for 64 frames... +[2024-09-18 11:46:53,853][11928] Decorrelating experience for 64 frames... +[2024-09-18 11:46:53,948][11925] Decorrelating experience for 32 frames... +[2024-09-18 11:46:54,935][11923] Decorrelating experience for 32 frames... +[2024-09-18 11:46:55,510][11924] Decorrelating experience for 32 frames... +[2024-09-18 11:46:55,552][11927] Decorrelating experience for 64 frames... +[2024-09-18 11:46:55,697][11926] Decorrelating experience for 96 frames... +[2024-09-18 11:46:56,057][00268] Heartbeat connected on RolloutWorker_w4 +[2024-09-18 11:46:56,111][11929] Decorrelating experience for 96 frames... +[2024-09-18 11:46:56,709][00268] Heartbeat connected on RolloutWorker_w7 +[2024-09-18 11:46:56,719][00268] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-18 11:46:56,884][11925] Decorrelating experience for 64 frames... +[2024-09-18 11:46:57,644][11922] Decorrelating experience for 96 frames... +[2024-09-18 11:46:57,927][11927] Decorrelating experience for 96 frames... +[2024-09-18 11:46:58,075][00268] Heartbeat connected on RolloutWorker_w0 +[2024-09-18 11:46:58,433][00268] Heartbeat connected on RolloutWorker_w6 +[2024-09-18 11:46:58,565][11923] Decorrelating experience for 64 frames... +[2024-09-18 11:47:00,133][11925] Decorrelating experience for 96 frames... +[2024-09-18 11:47:00,378][00268] Heartbeat connected on RolloutWorker_w3 +[2024-09-18 11:47:00,402][11928] Decorrelating experience for 96 frames... +[2024-09-18 11:47:01,119][00268] Heartbeat connected on RolloutWorker_w5 +[2024-09-18 11:47:01,719][00268] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 174.4. Samples: 1744. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-18 11:47:01,722][00268] Avg episode reward: [(0, '3.475')] +[2024-09-18 11:47:02,068][11924] Decorrelating experience for 64 frames... +[2024-09-18 11:47:02,479][11908] Signal inference workers to stop experience collection... +[2024-09-18 11:47:02,499][11921] InferenceWorker_p0-w0: stopping experience collection +[2024-09-18 11:47:02,837][11923] Decorrelating experience for 96 frames... +[2024-09-18 11:47:03,015][11924] Decorrelating experience for 96 frames... +[2024-09-18 11:47:03,044][00268] Heartbeat connected on RolloutWorker_w1 +[2024-09-18 11:47:03,086][00268] Heartbeat connected on RolloutWorker_w2 +[2024-09-18 11:47:04,497][11908] Signal inference workers to resume experience collection... +[2024-09-18 11:47:04,499][11921] InferenceWorker_p0-w0: resuming experience collection +[2024-09-18 11:47:06,719][00268] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 4018176. Throughput: 0: 162.5. Samples: 2438. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-18 11:47:06,726][00268] Avg episode reward: [(0, '4.331')] +[2024-09-18 11:47:11,719][00268] Fps is (10 sec: 2867.2, 60 sec: 1433.6, 300 sec: 1433.6). Total num frames: 4034560. Throughput: 0: 384.9. Samples: 7698. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-18 11:47:11,727][00268] Avg episode reward: [(0, '10.969')] +[2024-09-18 11:47:15,667][11921] Updated weights for policy 0, policy_version 988 (0.0379) +[2024-09-18 11:47:16,722][00268] Fps is (10 sec: 2866.2, 60 sec: 1638.2, 300 sec: 1638.2). Total num frames: 4046848. Throughput: 0: 456.0. Samples: 11402. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:47:16,725][00268] Avg episode reward: [(0, '15.098')] +[2024-09-18 11:47:21,719][00268] Fps is (10 sec: 3276.8, 60 sec: 2048.0, 300 sec: 2048.0). Total num frames: 4067328. Throughput: 0: 454.8. Samples: 13644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:47:21,724][00268] Avg episode reward: [(0, '17.551')] +[2024-09-18 11:47:26,089][11921] Updated weights for policy 0, policy_version 998 (0.0023) +[2024-09-18 11:47:26,719][00268] Fps is (10 sec: 4097.5, 60 sec: 2340.6, 300 sec: 2340.6). Total num frames: 4087808. Throughput: 0: 576.7. Samples: 20184. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:47:26,726][00268] Avg episode reward: [(0, '18.977')] +[2024-09-18 11:47:31,724][00268] Fps is (10 sec: 3684.5, 60 sec: 2457.3, 300 sec: 2457.3). Total num frames: 4104192. Throughput: 0: 642.8. Samples: 25716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:47:31,727][00268] Avg episode reward: [(0, '19.347')] +[2024-09-18 11:47:36,719][00268] Fps is (10 sec: 3276.8, 60 sec: 2548.6, 300 sec: 2548.6). Total num frames: 4120576. Throughput: 0: 617.2. Samples: 27776. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:47:36,721][00268] Avg episode reward: [(0, '21.375')] +[2024-09-18 11:47:38,323][11921] Updated weights for policy 0, policy_version 1008 (0.0022) +[2024-09-18 11:47:41,719][00268] Fps is (10 sec: 3688.2, 60 sec: 2703.4, 300 sec: 2703.4). Total num frames: 4141056. Throughput: 0: 745.3. Samples: 33540. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:47:41,721][00268] Avg episode reward: [(0, '23.461')] +[2024-09-18 11:47:46,720][00268] Fps is (10 sec: 4095.4, 60 sec: 2829.9, 300 sec: 2829.9). Total num frames: 4161536. Throughput: 0: 848.6. Samples: 39930. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:47:46,723][00268] Avg episode reward: [(0, '23.648')] +[2024-09-18 11:47:48,762][11921] Updated weights for policy 0, policy_version 1018 (0.0025) +[2024-09-18 11:47:51,719][00268] Fps is (10 sec: 3686.4, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 4177920. Throughput: 0: 877.6. Samples: 41928. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:47:51,723][00268] Avg episode reward: [(0, '23.841')] +[2024-09-18 11:47:56,719][00268] Fps is (10 sec: 3277.3, 60 sec: 3140.3, 300 sec: 2898.7). Total num frames: 4194304. Throughput: 0: 868.5. Samples: 46782. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:47:56,726][00268] Avg episode reward: [(0, '23.940')] +[2024-09-18 11:47:59,826][11921] Updated weights for policy 0, policy_version 1028 (0.0017) +[2024-09-18 11:48:01,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 2984.2). Total num frames: 4214784. Throughput: 0: 933.2. Samples: 53394. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:48:01,725][00268] Avg episode reward: [(0, '25.074')] +[2024-09-18 11:48:06,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3003.7). Total num frames: 4231168. Throughput: 0: 948.6. Samples: 56332. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:48:06,724][00268] Avg episode reward: [(0, '25.138')] +[2024-09-18 11:48:11,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3020.8). Total num frames: 4247552. Throughput: 0: 892.9. Samples: 60364. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:48:11,721][00268] Avg episode reward: [(0, '24.553')] +[2024-09-18 11:48:12,035][11921] Updated weights for policy 0, policy_version 1038 (0.0027) +[2024-09-18 11:48:16,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.9, 300 sec: 3132.2). Total num frames: 4272128. Throughput: 0: 911.2. Samples: 66716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:48:16,721][00268] Avg episode reward: [(0, '26.331')] +[2024-09-18 11:48:21,647][11921] Updated weights for policy 0, policy_version 1048 (0.0012) +[2024-09-18 11:48:21,720][00268] Fps is (10 sec: 4505.3, 60 sec: 3754.6, 300 sec: 3185.7). Total num frames: 4292608. Throughput: 0: 938.4. Samples: 70004. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:48:21,726][00268] Avg episode reward: [(0, '25.587')] +[2024-09-18 11:48:26,721][00268] Fps is (10 sec: 3276.0, 60 sec: 3618.0, 300 sec: 3147.4). Total num frames: 4304896. Throughput: 0: 913.6. Samples: 74652. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:48:26,727][00268] Avg episode reward: [(0, '24.711')] +[2024-09-18 11:48:31,719][00268] Fps is (10 sec: 3276.9, 60 sec: 3686.7, 300 sec: 3194.9). Total num frames: 4325376. Throughput: 0: 894.0. Samples: 80160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:48:31,723][00268] Avg episode reward: [(0, '25.028')] +[2024-09-18 11:48:31,734][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001056_4325376.pth... +[2024-09-18 11:48:31,889][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000910_3727360.pth +[2024-09-18 11:48:33,602][11921] Updated weights for policy 0, policy_version 1058 (0.0019) +[2024-09-18 11:48:36,719][00268] Fps is (10 sec: 4096.9, 60 sec: 3754.7, 300 sec: 3237.8). Total num frames: 4345856. Throughput: 0: 918.5. Samples: 83262. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:48:36,721][00268] Avg episode reward: [(0, '25.041')] +[2024-09-18 11:48:41,724][00268] Fps is (10 sec: 3684.7, 60 sec: 3686.1, 300 sec: 3239.4). Total num frames: 4362240. Throughput: 0: 932.8. Samples: 88762. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:48:41,736][00268] Avg episode reward: [(0, '25.959')] +[2024-09-18 11:48:45,767][11921] Updated weights for policy 0, policy_version 1068 (0.0017) +[2024-09-18 11:48:46,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3205.6). Total num frames: 4374528. Throughput: 0: 887.6. Samples: 93336. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:48:46,722][00268] Avg episode reward: [(0, '25.444')] +[2024-09-18 11:48:51,719][00268] Fps is (10 sec: 3688.3, 60 sec: 3686.4, 300 sec: 3276.8). Total num frames: 4399104. Throughput: 0: 895.6. Samples: 96632. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 11:48:51,726][00268] Avg episode reward: [(0, '25.901')] +[2024-09-18 11:48:55,463][11921] Updated weights for policy 0, policy_version 1078 (0.0018) +[2024-09-18 11:48:56,720][00268] Fps is (10 sec: 4095.4, 60 sec: 3686.3, 300 sec: 3276.8). Total num frames: 4415488. Throughput: 0: 948.5. Samples: 103048. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 11:48:56,727][00268] Avg episode reward: [(0, '26.604')] +[2024-09-18 11:49:01,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 4431872. Throughput: 0: 898.6. Samples: 107154. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:49:01,724][00268] Avg episode reward: [(0, '28.095')] +[2024-09-18 11:49:01,733][11908] Saving new best policy, reward=28.095! +[2024-09-18 11:49:06,719][00268] Fps is (10 sec: 3687.0, 60 sec: 3686.4, 300 sec: 3307.1). Total num frames: 4452352. Throughput: 0: 886.7. Samples: 109904. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:49:06,722][00268] Avg episode reward: [(0, '28.559')] +[2024-09-18 11:49:06,725][11908] Saving new best policy, reward=28.559! +[2024-09-18 11:49:07,583][11921] Updated weights for policy 0, policy_version 1088 (0.0025) +[2024-09-18 11:49:11,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3335.3). Total num frames: 4472832. Throughput: 0: 926.6. Samples: 116348. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:49:11,721][00268] Avg episode reward: [(0, '26.669')] +[2024-09-18 11:49:16,722][00268] Fps is (10 sec: 3685.1, 60 sec: 3617.9, 300 sec: 3333.2). Total num frames: 4489216. Throughput: 0: 913.1. Samples: 121250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:49:16,725][00268] Avg episode reward: [(0, '25.952')] +[2024-09-18 11:49:19,341][11921] Updated weights for policy 0, policy_version 1098 (0.0032) +[2024-09-18 11:49:21,720][00268] Fps is (10 sec: 3276.3, 60 sec: 3549.8, 300 sec: 3331.4). Total num frames: 4505600. Throughput: 0: 888.6. Samples: 123250. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:49:21,723][00268] Avg episode reward: [(0, '24.703')] +[2024-09-18 11:49:26,719][00268] Fps is (10 sec: 3687.7, 60 sec: 3686.5, 300 sec: 3356.1). Total num frames: 4526080. Throughput: 0: 910.1. Samples: 129712. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:49:26,721][00268] Avg episode reward: [(0, '23.417')] +[2024-09-18 11:49:28,880][11921] Updated weights for policy 0, policy_version 1108 (0.0012) +[2024-09-18 11:49:31,719][00268] Fps is (10 sec: 4096.6, 60 sec: 3686.4, 300 sec: 3379.2). Total num frames: 4546560. Throughput: 0: 940.9. Samples: 135676. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:49:31,723][00268] Avg episode reward: [(0, '20.791')] +[2024-09-18 11:49:36,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3351.3). Total num frames: 4558848. Throughput: 0: 912.2. Samples: 137682. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:49:36,722][00268] Avg episode reward: [(0, '19.184')] +[2024-09-18 11:49:40,969][11921] Updated weights for policy 0, policy_version 1118 (0.0012) +[2024-09-18 11:49:41,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.4, 300 sec: 3373.2). Total num frames: 4579328. Throughput: 0: 893.0. Samples: 143230. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:49:41,728][00268] Avg episode reward: [(0, '19.430')] +[2024-09-18 11:49:46,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3417.2). Total num frames: 4603904. Throughput: 0: 949.7. Samples: 149890. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:49:46,725][00268] Avg episode reward: [(0, '20.006')] +[2024-09-18 11:49:51,725][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3390.6). Total num frames: 4616192. Throughput: 0: 936.0. Samples: 152022. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:49:51,728][00268] Avg episode reward: [(0, '21.042')] +[2024-09-18 11:49:52,336][11921] Updated weights for policy 0, policy_version 1128 (0.0045) +[2024-09-18 11:49:56,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3618.2, 300 sec: 3387.5). Total num frames: 4632576. Throughput: 0: 894.8. Samples: 156612. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:49:56,724][00268] Avg episode reward: [(0, '22.710')] +[2024-09-18 11:50:01,719][00268] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3427.7). Total num frames: 4657152. Throughput: 0: 933.9. Samples: 163274. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:50:01,726][00268] Avg episode reward: [(0, '25.060')] +[2024-09-18 11:50:02,465][11921] Updated weights for policy 0, policy_version 1138 (0.0013) +[2024-09-18 11:50:06,719][00268] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3423.8). Total num frames: 4673536. Throughput: 0: 958.7. Samples: 166392. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:50:06,725][00268] Avg episode reward: [(0, '26.301')] +[2024-09-18 11:50:11,719][00268] Fps is (10 sec: 2867.3, 60 sec: 3549.9, 300 sec: 3399.7). Total num frames: 4685824. Throughput: 0: 905.6. Samples: 170462. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:50:11,721][00268] Avg episode reward: [(0, '27.010')] +[2024-09-18 11:50:14,728][11921] Updated weights for policy 0, policy_version 1148 (0.0027) +[2024-09-18 11:50:16,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.6, 300 sec: 3436.6). Total num frames: 4710400. Throughput: 0: 907.2. Samples: 176498. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:50:16,723][00268] Avg episode reward: [(0, '28.325')] +[2024-09-18 11:50:21,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3452.3). Total num frames: 4730880. Throughput: 0: 931.6. Samples: 179602. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:50:21,723][00268] Avg episode reward: [(0, '28.817')] +[2024-09-18 11:50:21,736][11908] Saving new best policy, reward=28.817! +[2024-09-18 11:50:26,219][11921] Updated weights for policy 0, policy_version 1158 (0.0019) +[2024-09-18 11:50:26,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3429.2). Total num frames: 4743168. Throughput: 0: 913.9. Samples: 184356. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:50:26,722][00268] Avg episode reward: [(0, '29.033')] +[2024-09-18 11:50:26,727][11908] Saving new best policy, reward=29.033! +[2024-09-18 11:50:31,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3425.7). Total num frames: 4759552. Throughput: 0: 877.0. Samples: 189354. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:50:31,722][00268] Avg episode reward: [(0, '30.088')] +[2024-09-18 11:50:31,730][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001162_4759552.pth... +[2024-09-18 11:50:31,901][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth +[2024-09-18 11:50:31,919][11908] Saving new best policy, reward=30.088! +[2024-09-18 11:50:36,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3440.6). Total num frames: 4780032. Throughput: 0: 896.5. Samples: 192364. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:50:36,722][00268] Avg episode reward: [(0, '29.569')] +[2024-09-18 11:50:37,058][11921] Updated weights for policy 0, policy_version 1168 (0.0030) +[2024-09-18 11:50:41,725][00268] Fps is (10 sec: 3684.1, 60 sec: 3617.7, 300 sec: 3437.0). Total num frames: 4796416. Throughput: 0: 922.0. Samples: 198108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:50:41,732][00268] Avg episode reward: [(0, '29.314')] +[2024-09-18 11:50:46,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3433.7). Total num frames: 4812800. Throughput: 0: 865.7. Samples: 202230. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:50:46,722][00268] Avg episode reward: [(0, '27.605')] +[2024-09-18 11:50:49,498][11921] Updated weights for policy 0, policy_version 1178 (0.0014) +[2024-09-18 11:50:51,719][00268] Fps is (10 sec: 3688.8, 60 sec: 3618.1, 300 sec: 3447.5). Total num frames: 4833280. Throughput: 0: 863.9. Samples: 205266. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:50:51,724][00268] Avg episode reward: [(0, '27.196')] +[2024-09-18 11:50:56,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3460.7). Total num frames: 4853760. Throughput: 0: 920.2. Samples: 211870. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:50:56,725][00268] Avg episode reward: [(0, '24.836')] +[2024-09-18 11:51:00,346][11921] Updated weights for policy 0, policy_version 1188 (0.0015) +[2024-09-18 11:51:01,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3440.6). Total num frames: 4866048. Throughput: 0: 882.3. Samples: 216200. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:51:01,721][00268] Avg episode reward: [(0, '23.741')] +[2024-09-18 11:51:06,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3453.5). Total num frames: 4886528. Throughput: 0: 865.7. Samples: 218560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:51:06,725][00268] Avg episode reward: [(0, '22.872')] +[2024-09-18 11:51:11,084][11921] Updated weights for policy 0, policy_version 1198 (0.0021) +[2024-09-18 11:51:11,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3465.8). Total num frames: 4907008. Throughput: 0: 906.5. Samples: 225150. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:51:11,725][00268] Avg episode reward: [(0, '24.063')] +[2024-09-18 11:51:16,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3462.3). Total num frames: 4923392. Throughput: 0: 913.6. Samples: 230464. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:51:16,723][00268] Avg episode reward: [(0, '22.917')] +[2024-09-18 11:51:21,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3458.8). Total num frames: 4939776. Throughput: 0: 890.2. Samples: 232424. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:51:21,723][00268] Avg episode reward: [(0, '23.143')] +[2024-09-18 11:51:23,297][11921] Updated weights for policy 0, policy_version 1208 (0.0027) +[2024-09-18 11:51:26,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3470.4). Total num frames: 4960256. Throughput: 0: 895.2. Samples: 238386. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:51:26,726][00268] Avg episode reward: [(0, '24.554')] +[2024-09-18 11:51:31,720][00268] Fps is (10 sec: 4095.4, 60 sec: 3686.3, 300 sec: 3481.6). Total num frames: 4980736. Throughput: 0: 946.1. Samples: 244806. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:51:31,725][00268] Avg episode reward: [(0, '27.471')] +[2024-09-18 11:51:33,757][11921] Updated weights for policy 0, policy_version 1218 (0.0015) +[2024-09-18 11:51:36,726][00268] Fps is (10 sec: 3683.7, 60 sec: 3617.7, 300 sec: 3477.9). Total num frames: 4997120. Throughput: 0: 920.9. Samples: 246714. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:51:36,729][00268] Avg episode reward: [(0, '28.113')] +[2024-09-18 11:51:41,719][00268] Fps is (10 sec: 3277.2, 60 sec: 3618.5, 300 sec: 3474.5). Total num frames: 5013504. Throughput: 0: 888.7. Samples: 251860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:51:41,730][00268] Avg episode reward: [(0, '26.407')] +[2024-09-18 11:51:44,646][11921] Updated weights for policy 0, policy_version 1228 (0.0017) +[2024-09-18 11:51:46,719][00268] Fps is (10 sec: 4099.0, 60 sec: 3754.7, 300 sec: 3499.0). Total num frames: 5038080. Throughput: 0: 939.8. Samples: 258490. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:51:46,721][00268] Avg episode reward: [(0, '28.514')] +[2024-09-18 11:51:51,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 5050368. Throughput: 0: 944.6. Samples: 261068. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:51:51,722][00268] Avg episode reward: [(0, '29.487')] +[2024-09-18 11:51:56,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3596.1). Total num frames: 5066752. Throughput: 0: 888.5. Samples: 265134. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:51:56,723][00268] Avg episode reward: [(0, '30.170')] +[2024-09-18 11:51:56,727][11908] Saving new best policy, reward=30.170! +[2024-09-18 11:51:57,077][11921] Updated weights for policy 0, policy_version 1238 (0.0026) +[2024-09-18 11:52:01,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 5087232. Throughput: 0: 914.9. Samples: 271634. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:52:01,721][00268] Avg episode reward: [(0, '28.542')] +[2024-09-18 11:52:06,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3637.8). Total num frames: 5107712. Throughput: 0: 944.9. Samples: 274946. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:06,728][00268] Avg episode reward: [(0, '28.131')] +[2024-09-18 11:52:06,902][11921] Updated weights for policy 0, policy_version 1248 (0.0015) +[2024-09-18 11:52:11,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 5124096. Throughput: 0: 911.0. Samples: 279380. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:11,722][00268] Avg episode reward: [(0, '28.898')] +[2024-09-18 11:52:16,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5144576. Throughput: 0: 894.2. Samples: 285044. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:16,722][00268] Avg episode reward: [(0, '29.064')] +[2024-09-18 11:52:18,514][11921] Updated weights for policy 0, policy_version 1258 (0.0019) +[2024-09-18 11:52:21,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 5165056. Throughput: 0: 920.4. Samples: 288126. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:21,723][00268] Avg episode reward: [(0, '27.645')] +[2024-09-18 11:52:26,719][00268] Fps is (10 sec: 3686.3, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5181440. Throughput: 0: 927.0. Samples: 293576. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:52:26,728][00268] Avg episode reward: [(0, '26.540')] +[2024-09-18 11:52:30,732][11921] Updated weights for policy 0, policy_version 1268 (0.0016) +[2024-09-18 11:52:31,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3651.7). Total num frames: 5197824. Throughput: 0: 886.1. Samples: 298366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:31,725][00268] Avg episode reward: [(0, '28.213')] +[2024-09-18 11:52:31,734][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001269_5197824.pth... +[2024-09-18 11:52:31,936][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001056_4325376.pth +[2024-09-18 11:52:36,719][00268] Fps is (10 sec: 3686.5, 60 sec: 3686.9, 300 sec: 3651.7). Total num frames: 5218304. Throughput: 0: 899.4. Samples: 301542. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:36,721][00268] Avg episode reward: [(0, '27.068')] +[2024-09-18 11:52:39,984][11921] Updated weights for policy 0, policy_version 1278 (0.0013) +[2024-09-18 11:52:41,720][00268] Fps is (10 sec: 4095.5, 60 sec: 3754.6, 300 sec: 3651.7). Total num frames: 5238784. Throughput: 0: 951.7. Samples: 307960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:52:41,727][00268] Avg episode reward: [(0, '25.255')] +[2024-09-18 11:52:46,719][00268] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3637.8). Total num frames: 5251072. Throughput: 0: 899.1. Samples: 312092. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:46,722][00268] Avg episode reward: [(0, '25.490')] +[2024-09-18 11:52:51,719][00268] Fps is (10 sec: 3277.2, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5271552. Throughput: 0: 892.8. Samples: 315120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:51,722][00268] Avg episode reward: [(0, '26.781')] +[2024-09-18 11:52:51,930][11921] Updated weights for policy 0, policy_version 1288 (0.0012) +[2024-09-18 11:52:56,719][00268] Fps is (10 sec: 4505.7, 60 sec: 3822.9, 300 sec: 3665.6). Total num frames: 5296128. Throughput: 0: 946.5. Samples: 321974. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:52:56,722][00268] Avg episode reward: [(0, '26.026')] +[2024-09-18 11:53:01,720][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5308416. Throughput: 0: 923.8. Samples: 326616. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:53:01,724][00268] Avg episode reward: [(0, '26.079')] +[2024-09-18 11:53:03,766][11921] Updated weights for policy 0, policy_version 1298 (0.0017) +[2024-09-18 11:53:06,719][00268] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 5328896. Throughput: 0: 902.8. Samples: 328752. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:53:06,722][00268] Avg episode reward: [(0, '26.119')] +[2024-09-18 11:53:11,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 5349376. Throughput: 0: 928.9. Samples: 335374. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:53:11,728][00268] Avg episode reward: [(0, '28.086')] +[2024-09-18 11:53:13,350][11921] Updated weights for policy 0, policy_version 1308 (0.0022) +[2024-09-18 11:53:16,719][00268] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3637.8). Total num frames: 5365760. Throughput: 0: 945.7. Samples: 340924. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:53:16,721][00268] Avg episode reward: [(0, '28.089')] +[2024-09-18 11:53:21,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 5382144. Throughput: 0: 918.9. Samples: 342894. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:53:21,721][00268] Avg episode reward: [(0, '26.776')] +[2024-09-18 11:53:25,449][11921] Updated weights for policy 0, policy_version 1318 (0.0013) +[2024-09-18 11:53:26,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5402624. Throughput: 0: 905.4. Samples: 348704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:53:26,727][00268] Avg episode reward: [(0, '27.871')] +[2024-09-18 11:53:31,719][00268] Fps is (10 sec: 4095.9, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 5423104. Throughput: 0: 960.0. Samples: 355292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:53:31,724][00268] Avg episode reward: [(0, '28.868')] +[2024-09-18 11:53:36,644][11921] Updated weights for policy 0, policy_version 1328 (0.0041) +[2024-09-18 11:53:36,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.8). Total num frames: 5439488. Throughput: 0: 937.7. Samples: 357318. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:53:36,720][00268] Avg episode reward: [(0, '28.193')] +[2024-09-18 11:53:41,719][00268] Fps is (10 sec: 3276.7, 60 sec: 3618.2, 300 sec: 3665.6). Total num frames: 5455872. Throughput: 0: 892.0. Samples: 362116. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:53:41,722][00268] Avg episode reward: [(0, '29.277')] +[2024-09-18 11:53:46,586][11921] Updated weights for policy 0, policy_version 1338 (0.0013) +[2024-09-18 11:53:46,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3665.6). Total num frames: 5480448. Throughput: 0: 938.9. Samples: 368866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:53:46,723][00268] Avg episode reward: [(0, '29.732')] +[2024-09-18 11:53:51,719][00268] Fps is (10 sec: 4096.1, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 5496832. Throughput: 0: 956.8. Samples: 371808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:53:51,721][00268] Avg episode reward: [(0, '29.132')] +[2024-09-18 11:53:56,719][00268] Fps is (10 sec: 2867.1, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 5509120. Throughput: 0: 900.1. Samples: 375880. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:53:56,721][00268] Avg episode reward: [(0, '29.381')] +[2024-09-18 11:53:58,845][11921] Updated weights for policy 0, policy_version 1348 (0.0026) +[2024-09-18 11:54:01,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 5533696. Throughput: 0: 916.1. Samples: 382150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:54:01,723][00268] Avg episode reward: [(0, '28.556')] +[2024-09-18 11:54:06,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 5554176. Throughput: 0: 947.1. Samples: 385514. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:54:06,726][00268] Avg episode reward: [(0, '27.738')] +[2024-09-18 11:54:09,442][11921] Updated weights for policy 0, policy_version 1358 (0.0016) +[2024-09-18 11:54:11,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 5566464. Throughput: 0: 926.4. Samples: 390394. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:54:11,729][00268] Avg episode reward: [(0, '27.041')] +[2024-09-18 11:54:16,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 5586944. Throughput: 0: 897.7. Samples: 395690. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:54:16,723][00268] Avg episode reward: [(0, '25.506')] +[2024-09-18 11:54:20,289][11921] Updated weights for policy 0, policy_version 1368 (0.0022) +[2024-09-18 11:54:21,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 5607424. Throughput: 0: 924.0. Samples: 398898. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-18 11:54:21,721][00268] Avg episode reward: [(0, '27.194')] +[2024-09-18 11:54:26,721][00268] Fps is (10 sec: 3685.6, 60 sec: 3686.3, 300 sec: 3651.7). Total num frames: 5623808. Throughput: 0: 942.5. Samples: 404532. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:54:26,728][00268] Avg episode reward: [(0, '26.821')] +[2024-09-18 11:54:31,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 5640192. Throughput: 0: 889.3. Samples: 408884. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:54:31,723][00268] Avg episode reward: [(0, '26.285')] +[2024-09-18 11:54:31,732][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001377_5640192.pth... +[2024-09-18 11:54:31,927][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001162_4759552.pth +[2024-09-18 11:54:32,600][11921] Updated weights for policy 0, policy_version 1378 (0.0014) +[2024-09-18 11:54:36,719][00268] Fps is (10 sec: 3687.2, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 5660672. Throughput: 0: 894.4. Samples: 412058. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:54:36,724][00268] Avg episode reward: [(0, '27.335')] +[2024-09-18 11:54:41,719][00268] Fps is (10 sec: 4095.8, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 5681152. Throughput: 0: 950.2. Samples: 418638. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:54:41,722][00268] Avg episode reward: [(0, '27.669')] +[2024-09-18 11:54:42,427][11921] Updated weights for policy 0, policy_version 1388 (0.0013) +[2024-09-18 11:54:46,724][00268] Fps is (10 sec: 3275.0, 60 sec: 3549.5, 300 sec: 3651.6). Total num frames: 5693440. Throughput: 0: 905.1. Samples: 422884. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-18 11:54:46,727][00268] Avg episode reward: [(0, '27.314')] +[2024-09-18 11:54:51,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 5713920. Throughput: 0: 887.5. Samples: 425454. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:54:51,723][00268] Avg episode reward: [(0, '28.240')] +[2024-09-18 11:54:54,215][11921] Updated weights for policy 0, policy_version 1398 (0.0021) +[2024-09-18 11:54:56,719][00268] Fps is (10 sec: 4098.2, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 5734400. Throughput: 0: 922.3. Samples: 431896. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:54:56,721][00268] Avg episode reward: [(0, '28.285')] +[2024-09-18 11:55:01,719][00268] Fps is (10 sec: 3686.6, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 5750784. Throughput: 0: 918.4. Samples: 437020. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:55:01,724][00268] Avg episode reward: [(0, '29.069')] +[2024-09-18 11:55:06,379][11921] Updated weights for policy 0, policy_version 1408 (0.0020) +[2024-09-18 11:55:06,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 5767168. Throughput: 0: 893.3. Samples: 439096. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:55:06,721][00268] Avg episode reward: [(0, '28.113')] +[2024-09-18 11:55:11,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5787648. Throughput: 0: 903.2. Samples: 445172. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:55:11,722][00268] Avg episode reward: [(0, '27.883')] +[2024-09-18 11:55:15,716][11921] Updated weights for policy 0, policy_version 1418 (0.0023) +[2024-09-18 11:55:16,719][00268] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 5808128. Throughput: 0: 946.0. Samples: 451454. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:55:16,721][00268] Avg episode reward: [(0, '28.492')] +[2024-09-18 11:55:21,721][00268] Fps is (10 sec: 3276.0, 60 sec: 3549.7, 300 sec: 3651.7). Total num frames: 5820416. Throughput: 0: 918.5. Samples: 453392. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:55:21,726][00268] Avg episode reward: [(0, '28.751')] +[2024-09-18 11:55:26,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3665.6). Total num frames: 5840896. Throughput: 0: 882.8. Samples: 458364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:55:26,726][00268] Avg episode reward: [(0, '27.881')] +[2024-09-18 11:55:28,081][11921] Updated weights for policy 0, policy_version 1428 (0.0019) +[2024-09-18 11:55:31,719][00268] Fps is (10 sec: 4097.0, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 5861376. Throughput: 0: 935.0. Samples: 464952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:55:31,721][00268] Avg episode reward: [(0, '26.046')] +[2024-09-18 11:55:36,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3665.7). Total num frames: 5877760. Throughput: 0: 935.1. Samples: 467534. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:55:36,730][00268] Avg episode reward: [(0, '26.107')] +[2024-09-18 11:55:40,230][11921] Updated weights for policy 0, policy_version 1438 (0.0029) +[2024-09-18 11:55:41,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 5894144. Throughput: 0: 885.5. Samples: 471744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:55:41,721][00268] Avg episode reward: [(0, '25.899')] +[2024-09-18 11:55:46,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3755.0, 300 sec: 3679.5). Total num frames: 5918720. Throughput: 0: 920.4. Samples: 478436. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:55:46,722][00268] Avg episode reward: [(0, '24.725')] +[2024-09-18 11:55:49,442][11921] Updated weights for policy 0, policy_version 1448 (0.0020) +[2024-09-18 11:55:51,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 5935104. Throughput: 0: 948.1. Samples: 481760. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:55:51,721][00268] Avg episode reward: [(0, '24.213')] +[2024-09-18 11:55:56,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 5947392. Throughput: 0: 906.2. Samples: 485950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:55:56,721][00268] Avg episode reward: [(0, '24.600')] +[2024-09-18 11:56:01,540][11921] Updated weights for policy 0, policy_version 1458 (0.0013) +[2024-09-18 11:56:01,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 5971968. Throughput: 0: 897.2. Samples: 491830. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:56:01,725][00268] Avg episode reward: [(0, '25.053')] +[2024-09-18 11:56:06,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 5992448. Throughput: 0: 925.3. Samples: 495030. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:56:06,721][00268] Avg episode reward: [(0, '26.158')] +[2024-09-18 11:56:11,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6008832. Throughput: 0: 932.2. Samples: 500312. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:56:11,722][00268] Avg episode reward: [(0, '25.424')] +[2024-09-18 11:56:13,036][11921] Updated weights for policy 0, policy_version 1468 (0.0018) +[2024-09-18 11:56:16,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6025216. Throughput: 0: 894.4. Samples: 505198. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:56:16,725][00268] Avg episode reward: [(0, '25.783')] +[2024-09-18 11:56:21,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3679.5). Total num frames: 6045696. Throughput: 0: 909.6. Samples: 508468. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:56:21,722][00268] Avg episode reward: [(0, '25.508')] +[2024-09-18 11:56:23,290][11921] Updated weights for policy 0, policy_version 1478 (0.0012) +[2024-09-18 11:56:26,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 6062080. Throughput: 0: 952.0. Samples: 514586. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:56:26,722][00268] Avg episode reward: [(0, '25.471')] +[2024-09-18 11:56:31,723][00268] Fps is (10 sec: 3275.4, 60 sec: 3617.9, 300 sec: 3665.6). Total num frames: 6078464. Throughput: 0: 893.6. Samples: 518654. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:56:31,726][00268] Avg episode reward: [(0, '25.458')] +[2024-09-18 11:56:31,738][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001484_6078464.pth... +[2024-09-18 11:56:31,944][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001269_5197824.pth +[2024-09-18 11:56:35,487][11921] Updated weights for policy 0, policy_version 1488 (0.0025) +[2024-09-18 11:56:36,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6098944. Throughput: 0: 886.8. Samples: 521664. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:56:36,726][00268] Avg episode reward: [(0, '25.154')] +[2024-09-18 11:56:41,719][00268] Fps is (10 sec: 4097.8, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 6119424. Throughput: 0: 940.1. Samples: 528254. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:56:41,727][00268] Avg episode reward: [(0, '25.652')] +[2024-09-18 11:56:46,432][11921] Updated weights for policy 0, policy_version 1498 (0.0013) +[2024-09-18 11:56:46,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6135808. Throughput: 0: 914.3. Samples: 532974. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:56:46,725][00268] Avg episode reward: [(0, '25.958')] +[2024-09-18 11:56:51,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6152192. Throughput: 0: 892.0. Samples: 535170. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:56:51,721][00268] Avg episode reward: [(0, '25.891')] +[2024-09-18 11:56:56,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6172672. Throughput: 0: 919.7. Samples: 541700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:56:56,721][00268] Avg episode reward: [(0, '25.102')] +[2024-09-18 11:56:56,814][11921] Updated weights for policy 0, policy_version 1508 (0.0013) +[2024-09-18 11:57:01,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6193152. Throughput: 0: 934.8. Samples: 547264. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:57:01,726][00268] Avg episode reward: [(0, '24.813')] +[2024-09-18 11:57:06,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 6205440. Throughput: 0: 907.3. Samples: 549298. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:57:06,723][00268] Avg episode reward: [(0, '24.918')] +[2024-09-18 11:57:08,828][11921] Updated weights for policy 0, policy_version 1518 (0.0014) +[2024-09-18 11:57:11,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6230016. Throughput: 0: 903.0. Samples: 555220. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:57:11,723][00268] Avg episode reward: [(0, '25.126')] +[2024-09-18 11:57:16,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6250496. Throughput: 0: 957.8. Samples: 561750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:57:16,724][00268] Avg episode reward: [(0, '25.219')] +[2024-09-18 11:57:19,548][11921] Updated weights for policy 0, policy_version 1528 (0.0018) +[2024-09-18 11:57:21,719][00268] Fps is (10 sec: 3276.6, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 6262784. Throughput: 0: 936.1. Samples: 563790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:57:21,722][00268] Avg episode reward: [(0, '26.143')] +[2024-09-18 11:57:26,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6283264. Throughput: 0: 896.6. Samples: 568602. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:57:26,721][00268] Avg episode reward: [(0, '25.709')] +[2024-09-18 11:57:30,341][11921] Updated weights for policy 0, policy_version 1538 (0.0018) +[2024-09-18 11:57:31,719][00268] Fps is (10 sec: 4096.2, 60 sec: 3754.9, 300 sec: 3679.5). Total num frames: 6303744. Throughput: 0: 939.8. Samples: 575264. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:57:31,724][00268] Avg episode reward: [(0, '27.262')] +[2024-09-18 11:57:36,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 6320128. Throughput: 0: 955.5. Samples: 578168. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 11:57:36,721][00268] Avg episode reward: [(0, '27.389')] +[2024-09-18 11:57:41,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6336512. Throughput: 0: 900.4. Samples: 582218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:57:41,726][00268] Avg episode reward: [(0, '29.126')] +[2024-09-18 11:57:42,303][11921] Updated weights for policy 0, policy_version 1548 (0.0015) +[2024-09-18 11:57:46,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6356992. Throughput: 0: 922.9. Samples: 588794. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:57:46,721][00268] Avg episode reward: [(0, '28.139')] +[2024-09-18 11:57:51,722][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 6377472. Throughput: 0: 949.3. Samples: 592016. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 11:57:51,725][00268] Avg episode reward: [(0, '27.480')] +[2024-09-18 11:57:52,203][11921] Updated weights for policy 0, policy_version 1558 (0.0017) +[2024-09-18 11:57:56,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6393856. Throughput: 0: 918.1. Samples: 596536. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:57:56,723][00268] Avg episode reward: [(0, '27.492')] +[2024-09-18 11:58:01,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 6410240. Throughput: 0: 895.1. Samples: 602028. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:58:01,722][00268] Avg episode reward: [(0, '28.131')] +[2024-09-18 11:58:03,804][11921] Updated weights for policy 0, policy_version 1568 (0.0017) +[2024-09-18 11:58:06,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3679.5). Total num frames: 6434816. Throughput: 0: 923.4. Samples: 605344. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 11:58:06,725][00268] Avg episode reward: [(0, '27.295')] +[2024-09-18 11:58:11,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6451200. Throughput: 0: 941.2. Samples: 610956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:58:11,726][00268] Avg episode reward: [(0, '26.648')] +[2024-09-18 11:58:15,727][11921] Updated weights for policy 0, policy_version 1578 (0.0027) +[2024-09-18 11:58:16,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6467584. Throughput: 0: 897.3. Samples: 615644. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:58:16,726][00268] Avg episode reward: [(0, '27.698')] +[2024-09-18 11:58:21,719][00268] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6488064. Throughput: 0: 905.5. Samples: 618916. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:58:21,721][00268] Avg episode reward: [(0, '27.417')] +[2024-09-18 11:58:25,116][11921] Updated weights for policy 0, policy_version 1588 (0.0015) +[2024-09-18 11:58:26,720][00268] Fps is (10 sec: 4095.4, 60 sec: 3754.6, 300 sec: 3679.4). Total num frames: 6508544. Throughput: 0: 959.3. Samples: 625386. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:58:26,726][00268] Avg episode reward: [(0, '27.626')] +[2024-09-18 11:58:31,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 6520832. Throughput: 0: 903.8. Samples: 629466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:58:31,723][00268] Avg episode reward: [(0, '26.817')] +[2024-09-18 11:58:31,742][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001592_6520832.pth... +[2024-09-18 11:58:32,021][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001377_5640192.pth +[2024-09-18 11:58:36,719][00268] Fps is (10 sec: 3277.3, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6541312. Throughput: 0: 894.2. Samples: 632254. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:58:36,726][00268] Avg episode reward: [(0, '26.392')] +[2024-09-18 11:58:37,298][11921] Updated weights for policy 0, policy_version 1598 (0.0017) +[2024-09-18 11:58:41,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 6561792. Throughput: 0: 940.7. Samples: 638868. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:58:41,721][00268] Avg episode reward: [(0, '26.572')] +[2024-09-18 11:58:46,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 6578176. Throughput: 0: 927.4. Samples: 643762. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:58:46,724][00268] Avg episode reward: [(0, '25.712')] +[2024-09-18 11:58:49,050][11921] Updated weights for policy 0, policy_version 1608 (0.0043) +[2024-09-18 11:58:51,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 6594560. Throughput: 0: 900.4. Samples: 645864. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:58:51,723][00268] Avg episode reward: [(0, '25.829')] +[2024-09-18 11:58:56,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 6615040. Throughput: 0: 913.0. Samples: 652040. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:58:56,721][00268] Avg episode reward: [(0, '26.750')] +[2024-09-18 11:58:58,901][11921] Updated weights for policy 0, policy_version 1618 (0.0013) +[2024-09-18 11:59:01,720][00268] Fps is (10 sec: 4095.4, 60 sec: 3754.6, 300 sec: 3665.6). Total num frames: 6635520. Throughput: 0: 939.3. Samples: 657912. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:59:01,723][00268] Avg episode reward: [(0, '26.893')] +[2024-09-18 11:59:06,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 6647808. Throughput: 0: 912.0. Samples: 659958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:59:06,721][00268] Avg episode reward: [(0, '26.881')] +[2024-09-18 11:59:11,097][11921] Updated weights for policy 0, policy_version 1628 (0.0042) +[2024-09-18 11:59:11,719][00268] Fps is (10 sec: 3277.2, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 6668288. Throughput: 0: 888.7. Samples: 665378. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:59:11,721][00268] Avg episode reward: [(0, '26.499')] +[2024-09-18 11:59:16,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6692864. Throughput: 0: 945.5. Samples: 672012. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:59:16,724][00268] Avg episode reward: [(0, '26.382')] +[2024-09-18 11:59:21,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 6705152. Throughput: 0: 934.9. Samples: 674324. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:59:21,726][00268] Avg episode reward: [(0, '26.072')] +[2024-09-18 11:59:22,342][11921] Updated weights for policy 0, policy_version 1638 (0.0036) +[2024-09-18 11:59:26,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3550.0, 300 sec: 3665.6). Total num frames: 6721536. Throughput: 0: 885.5. Samples: 678716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:59:26,722][00268] Avg episode reward: [(0, '24.927')] +[2024-09-18 11:59:31,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6746112. Throughput: 0: 923.4. Samples: 685316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:59:31,721][00268] Avg episode reward: [(0, '25.593')] +[2024-09-18 11:59:32,637][11921] Updated weights for policy 0, policy_version 1648 (0.0024) +[2024-09-18 11:59:36,721][00268] Fps is (10 sec: 4095.1, 60 sec: 3686.3, 300 sec: 3665.6). Total num frames: 6762496. Throughput: 0: 949.6. Samples: 688596. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:59:36,725][00268] Avg episode reward: [(0, '25.252')] +[2024-09-18 11:59:41,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 6774784. Throughput: 0: 904.6. Samples: 692746. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:59:41,722][00268] Avg episode reward: [(0, '23.549')] +[2024-09-18 11:59:44,670][11921] Updated weights for policy 0, policy_version 1658 (0.0018) +[2024-09-18 11:59:46,719][00268] Fps is (10 sec: 3687.2, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 6799360. Throughput: 0: 908.4. Samples: 698788. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 11:59:46,725][00268] Avg episode reward: [(0, '23.246')] +[2024-09-18 11:59:51,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6819840. Throughput: 0: 936.0. Samples: 702076. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 11:59:51,725][00268] Avg episode reward: [(0, '23.312')] +[2024-09-18 11:59:55,189][11921] Updated weights for policy 0, policy_version 1668 (0.0016) +[2024-09-18 11:59:56,722][00268] Fps is (10 sec: 3685.1, 60 sec: 3686.2, 300 sec: 3679.4). Total num frames: 6836224. Throughput: 0: 929.8. Samples: 707222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 11:59:56,725][00268] Avg episode reward: [(0, '23.432')] +[2024-09-18 12:00:01,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3679.5). Total num frames: 6852608. Throughput: 0: 893.9. Samples: 712238. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:00:01,725][00268] Avg episode reward: [(0, '24.060')] +[2024-09-18 12:00:06,063][11921] Updated weights for policy 0, policy_version 1678 (0.0020) +[2024-09-18 12:00:06,719][00268] Fps is (10 sec: 3687.7, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6873088. Throughput: 0: 914.8. Samples: 715490. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:00:06,721][00268] Avg episode reward: [(0, '25.224')] +[2024-09-18 12:00:11,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 6893568. Throughput: 0: 954.0. Samples: 721648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:00:11,725][00268] Avg episode reward: [(0, '26.514')] +[2024-09-18 12:00:16,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 6905856. Throughput: 0: 900.5. Samples: 725840. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:00:16,722][00268] Avg episode reward: [(0, '26.208')] +[2024-09-18 12:00:17,979][11921] Updated weights for policy 0, policy_version 1688 (0.0038) +[2024-09-18 12:00:21,719][00268] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 6930432. Throughput: 0: 902.7. Samples: 729214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:00:21,726][00268] Avg episode reward: [(0, '26.966')] +[2024-09-18 12:00:26,719][00268] Fps is (10 sec: 4505.4, 60 sec: 3822.9, 300 sec: 3693.3). Total num frames: 6950912. Throughput: 0: 953.2. Samples: 735642. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:00:26,725][00268] Avg episode reward: [(0, '27.033')] +[2024-09-18 12:00:28,095][11921] Updated weights for policy 0, policy_version 1698 (0.0017) +[2024-09-18 12:00:31,723][00268] Fps is (10 sec: 3275.4, 60 sec: 3617.9, 300 sec: 3679.4). Total num frames: 6963200. Throughput: 0: 918.2. Samples: 740110. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:00:31,726][00268] Avg episode reward: [(0, '27.927')] +[2024-09-18 12:00:31,741][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001700_6963200.pth... +[2024-09-18 12:00:32,020][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001484_6078464.pth +[2024-09-18 12:00:36,719][00268] Fps is (10 sec: 2867.4, 60 sec: 3618.3, 300 sec: 3679.5). Total num frames: 6979584. Throughput: 0: 895.0. Samples: 742350. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 12:00:36,721][00268] Avg episode reward: [(0, '27.250')] +[2024-09-18 12:00:39,691][11921] Updated weights for policy 0, policy_version 1708 (0.0017) +[2024-09-18 12:00:41,719][00268] Fps is (10 sec: 4097.8, 60 sec: 3822.9, 300 sec: 3679.5). Total num frames: 7004160. Throughput: 0: 928.5. Samples: 749000. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:00:41,725][00268] Avg episode reward: [(0, '28.422')] +[2024-09-18 12:00:46,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7020544. Throughput: 0: 937.1. Samples: 754408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:00:46,728][00268] Avg episode reward: [(0, '29.734')] +[2024-09-18 12:00:51,452][11921] Updated weights for policy 0, policy_version 1718 (0.0019) +[2024-09-18 12:00:51,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 7036928. Throughput: 0: 911.4. Samples: 756504. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:00:51,723][00268] Avg episode reward: [(0, '29.012')] +[2024-09-18 12:00:56,719][00268] Fps is (10 sec: 3686.3, 60 sec: 3686.6, 300 sec: 3679.5). Total num frames: 7057408. Throughput: 0: 906.3. Samples: 762432. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:00:56,722][00268] Avg episode reward: [(0, '29.362')] +[2024-09-18 12:01:00,979][11921] Updated weights for policy 0, policy_version 1728 (0.0014) +[2024-09-18 12:01:01,723][00268] Fps is (10 sec: 4094.2, 60 sec: 3754.4, 300 sec: 3679.4). Total num frames: 7077888. Throughput: 0: 954.3. Samples: 768788. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 12:01:01,736][00268] Avg episode reward: [(0, '27.385')] +[2024-09-18 12:01:06,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 7090176. Throughput: 0: 923.9. Samples: 770788. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:06,721][00268] Avg episode reward: [(0, '27.252')] +[2024-09-18 12:01:11,719][00268] Fps is (10 sec: 3278.1, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 7110656. Throughput: 0: 893.1. Samples: 775832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:11,726][00268] Avg episode reward: [(0, '26.320')] +[2024-09-18 12:01:13,033][11921] Updated weights for policy 0, policy_version 1738 (0.0022) +[2024-09-18 12:01:16,719][00268] Fps is (10 sec: 4095.9, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7131136. Throughput: 0: 943.1. Samples: 782546. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:16,726][00268] Avg episode reward: [(0, '25.834')] +[2024-09-18 12:01:21,721][00268] Fps is (10 sec: 3685.8, 60 sec: 3618.0, 300 sec: 3679.4). Total num frames: 7147520. Throughput: 0: 955.6. Samples: 785354. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 12:01:21,725][00268] Avg episode reward: [(0, '25.833')] +[2024-09-18 12:01:24,770][11921] Updated weights for policy 0, policy_version 1748 (0.0041) +[2024-09-18 12:01:26,719][00268] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 7163904. Throughput: 0: 899.3. Samples: 789470. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:26,724][00268] Avg episode reward: [(0, '27.265')] +[2024-09-18 12:01:31,719][00268] Fps is (10 sec: 4096.8, 60 sec: 3755.0, 300 sec: 3693.3). Total num frames: 7188480. Throughput: 0: 923.4. Samples: 795962. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:01:31,722][00268] Avg episode reward: [(0, '26.010')] +[2024-09-18 12:01:34,509][11921] Updated weights for policy 0, policy_version 1758 (0.0015) +[2024-09-18 12:01:36,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3693.3). Total num frames: 7208960. Throughput: 0: 949.2. Samples: 799216. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:36,721][00268] Avg episode reward: [(0, '27.834')] +[2024-09-18 12:01:41,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 7221248. Throughput: 0: 920.9. Samples: 803872. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:41,725][00268] Avg episode reward: [(0, '28.086')] +[2024-09-18 12:01:46,488][11921] Updated weights for policy 0, policy_version 1768 (0.0017) +[2024-09-18 12:01:46,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 7241728. Throughput: 0: 902.3. Samples: 809388. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:46,721][00268] Avg episode reward: [(0, '28.723')] +[2024-09-18 12:01:51,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 7262208. Throughput: 0: 930.0. Samples: 812638. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:01:51,721][00268] Avg episode reward: [(0, '27.993')] +[2024-09-18 12:01:56,719][00268] Fps is (10 sec: 3686.2, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7278592. Throughput: 0: 939.9. Samples: 818126. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:01:56,724][00268] Avg episode reward: [(0, '27.581')] +[2024-09-18 12:01:57,608][11921] Updated weights for policy 0, policy_version 1778 (0.0019) +[2024-09-18 12:02:01,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.4, 300 sec: 3693.3). Total num frames: 7294976. Throughput: 0: 891.9. Samples: 822682. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 12:02:01,721][00268] Avg episode reward: [(0, '28.306')] +[2024-09-18 12:02:06,719][00268] Fps is (10 sec: 3686.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7315456. Throughput: 0: 902.2. Samples: 825952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:02:06,725][00268] Avg episode reward: [(0, '28.468')] +[2024-09-18 12:02:07,951][11921] Updated weights for policy 0, policy_version 1788 (0.0020) +[2024-09-18 12:02:11,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7335936. Throughput: 0: 957.7. Samples: 832568. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:02:11,726][00268] Avg episode reward: [(0, '28.431')] +[2024-09-18 12:02:16,720][00268] Fps is (10 sec: 3276.4, 60 sec: 3618.1, 300 sec: 3679.4). Total num frames: 7348224. Throughput: 0: 902.8. Samples: 836590. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 12:02:16,728][00268] Avg episode reward: [(0, '27.077')] +[2024-09-18 12:02:20,131][11921] Updated weights for policy 0, policy_version 1798 (0.0027) +[2024-09-18 12:02:21,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3679.5). Total num frames: 7368704. Throughput: 0: 892.6. Samples: 839384. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 12:02:21,721][00268] Avg episode reward: [(0, '27.229')] +[2024-09-18 12:02:26,719][00268] Fps is (10 sec: 4096.5, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7389184. Throughput: 0: 935.7. Samples: 845978. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:02:26,722][00268] Avg episode reward: [(0, '27.974')] +[2024-09-18 12:02:30,900][11921] Updated weights for policy 0, policy_version 1808 (0.0013) +[2024-09-18 12:02:31,719][00268] Fps is (10 sec: 3686.2, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 7405568. Throughput: 0: 920.6. Samples: 850816. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:02:31,725][00268] Avg episode reward: [(0, '27.048')] +[2024-09-18 12:02:31,739][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001808_7405568.pth... +[2024-09-18 12:02:31,971][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001592_6520832.pth +[2024-09-18 12:02:36,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 7421952. Throughput: 0: 891.7. Samples: 852766. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:02:36,721][00268] Avg episode reward: [(0, '26.089')] +[2024-09-18 12:02:41,719][00268] Fps is (10 sec: 3686.6, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7442432. Throughput: 0: 911.3. Samples: 859134. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:02:41,726][00268] Avg episode reward: [(0, '25.247')] +[2024-09-18 12:02:41,947][11921] Updated weights for policy 0, policy_version 1818 (0.0013) +[2024-09-18 12:02:46,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7462912. Throughput: 0: 941.4. Samples: 865046. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 12:02:46,729][00268] Avg episode reward: [(0, '27.200')] +[2024-09-18 12:02:51,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3665.6). Total num frames: 7475200. Throughput: 0: 914.2. Samples: 867090. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 12:02:51,721][00268] Avg episode reward: [(0, '27.240')] +[2024-09-18 12:02:53,848][11921] Updated weights for policy 0, policy_version 1828 (0.0018) +[2024-09-18 12:02:56,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3679.5). Total num frames: 7495680. Throughput: 0: 888.2. Samples: 872536. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-18 12:02:56,723][00268] Avg episode reward: [(0, '27.196')] +[2024-09-18 12:03:01,719][00268] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7520256. Throughput: 0: 944.0. Samples: 879070. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 12:03:01,723][00268] Avg episode reward: [(0, '27.551')] +[2024-09-18 12:03:04,409][11921] Updated weights for policy 0, policy_version 1838 (0.0033) +[2024-09-18 12:03:06,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 7532544. Throughput: 0: 932.0. Samples: 881324. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 12:03:06,726][00268] Avg episode reward: [(0, '28.536')] +[2024-09-18 12:03:11,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 7553024. Throughput: 0: 890.2. Samples: 886038. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 12:03:11,726][00268] Avg episode reward: [(0, '28.465')] +[2024-09-18 12:03:15,285][11921] Updated weights for policy 0, policy_version 1848 (0.0015) +[2024-09-18 12:03:16,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7573504. Throughput: 0: 933.5. Samples: 892824. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 12:03:16,728][00268] Avg episode reward: [(0, '28.093')] +[2024-09-18 12:03:21,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7593984. Throughput: 0: 965.3. Samples: 896206. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:03:21,721][00268] Avg episode reward: [(0, '28.711')] +[2024-09-18 12:03:26,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 7606272. Throughput: 0: 912.7. Samples: 900206. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 12:03:26,721][00268] Avg episode reward: [(0, '26.209')] +[2024-09-18 12:03:27,488][11921] Updated weights for policy 0, policy_version 1858 (0.0020) +[2024-09-18 12:03:31,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7626752. Throughput: 0: 910.4. Samples: 906014. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 12:03:31,722][00268] Avg episode reward: [(0, '27.397')] +[2024-09-18 12:03:36,722][00268] Fps is (10 sec: 4094.7, 60 sec: 3754.5, 300 sec: 3679.4). Total num frames: 7647232. Throughput: 0: 936.9. Samples: 909254. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-18 12:03:36,724][00268] Avg episode reward: [(0, '26.876')] +[2024-09-18 12:03:36,769][11921] Updated weights for policy 0, policy_version 1868 (0.0024) +[2024-09-18 12:03:41,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7663616. Throughput: 0: 928.6. Samples: 914324. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 12:03:41,725][00268] Avg episode reward: [(0, '26.087')] +[2024-09-18 12:03:46,720][00268] Fps is (10 sec: 3277.4, 60 sec: 3618.0, 300 sec: 3679.4). Total num frames: 7680000. Throughput: 0: 896.7. Samples: 919422. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 12:03:46,726][00268] Avg episode reward: [(0, '25.837')] +[2024-09-18 12:03:48,805][11921] Updated weights for policy 0, policy_version 1878 (0.0026) +[2024-09-18 12:03:51,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3693.3). Total num frames: 7704576. Throughput: 0: 918.9. Samples: 922676. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:03:51,726][00268] Avg episode reward: [(0, '25.995')] +[2024-09-18 12:03:56,720][00268] Fps is (10 sec: 4096.0, 60 sec: 3754.6, 300 sec: 3679.5). Total num frames: 7720960. Throughput: 0: 948.3. Samples: 928714. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:03:56,723][00268] Avg episode reward: [(0, '27.092')] +[2024-09-18 12:04:00,954][11921] Updated weights for policy 0, policy_version 1888 (0.0019) +[2024-09-18 12:04:01,719][00268] Fps is (10 sec: 2867.1, 60 sec: 3549.8, 300 sec: 3679.5). Total num frames: 7733248. Throughput: 0: 889.5. Samples: 932852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:04:01,726][00268] Avg episode reward: [(0, '26.959')] +[2024-09-18 12:04:06,719][00268] Fps is (10 sec: 3686.9, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 7757824. Throughput: 0: 883.6. Samples: 935968. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:04:06,726][00268] Avg episode reward: [(0, '26.889')] +[2024-09-18 12:04:10,485][11921] Updated weights for policy 0, policy_version 1898 (0.0014) +[2024-09-18 12:04:11,719][00268] Fps is (10 sec: 4505.8, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7778304. Throughput: 0: 943.0. Samples: 942642. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:04:11,727][00268] Avg episode reward: [(0, '27.312')] +[2024-09-18 12:04:16,721][00268] Fps is (10 sec: 3276.1, 60 sec: 3618.0, 300 sec: 3679.4). Total num frames: 7790592. Throughput: 0: 916.5. Samples: 947258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:04:16,733][00268] Avg episode reward: [(0, '28.035')] +[2024-09-18 12:04:21,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 7811072. Throughput: 0: 895.4. Samples: 949544. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:04:21,723][00268] Avg episode reward: [(0, '28.479')] +[2024-09-18 12:04:22,364][11921] Updated weights for policy 0, policy_version 1908 (0.0018) +[2024-09-18 12:04:26,719][00268] Fps is (10 sec: 4096.9, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 7831552. Throughput: 0: 931.9. Samples: 956258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-18 12:04:26,723][00268] Avg episode reward: [(0, '29.121')] +[2024-09-18 12:04:31,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7847936. Throughput: 0: 938.9. Samples: 961672. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:04:31,727][00268] Avg episode reward: [(0, '29.297')] +[2024-09-18 12:04:31,742][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001916_7847936.pth... +[2024-09-18 12:04:32,004][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001700_6963200.pth +[2024-09-18 12:04:33,759][11921] Updated weights for policy 0, policy_version 1918 (0.0018) +[2024-09-18 12:04:36,719][00268] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3693.3). Total num frames: 7864320. Throughput: 0: 908.5. Samples: 963560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 12:04:36,721][00268] Avg episode reward: [(0, '29.971')] +[2024-09-18 12:04:41,719][00268] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 7884800. Throughput: 0: 908.7. Samples: 969604. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:04:41,726][00268] Avg episode reward: [(0, '30.055')] +[2024-09-18 12:04:43,851][11921] Updated weights for policy 0, policy_version 1928 (0.0015) +[2024-09-18 12:04:46,721][00268] Fps is (10 sec: 4095.2, 60 sec: 3754.6, 300 sec: 3679.4). Total num frames: 7905280. Throughput: 0: 960.5. Samples: 976074. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-18 12:04:46,728][00268] Avg episode reward: [(0, '29.777')] +[2024-09-18 12:04:51,719][00268] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 7921664. Throughput: 0: 935.2. Samples: 978054. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:04:51,721][00268] Avg episode reward: [(0, '29.816')] +[2024-09-18 12:04:55,772][11921] Updated weights for policy 0, policy_version 1938 (0.0014) +[2024-09-18 12:04:56,720][00268] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3679.4). Total num frames: 7938048. Throughput: 0: 899.3. Samples: 983110. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-18 12:04:56,723][00268] Avg episode reward: [(0, '30.122')] +[2024-09-18 12:05:01,719][00268] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3693.3). Total num frames: 7962624. Throughput: 0: 939.9. Samples: 989552. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 12:05:01,725][00268] Avg episode reward: [(0, '30.431')] +[2024-09-18 12:05:01,734][11908] Saving new best policy, reward=30.431! +[2024-09-18 12:05:06,535][11921] Updated weights for policy 0, policy_version 1948 (0.0039) +[2024-09-18 12:05:06,720][00268] Fps is (10 sec: 4096.2, 60 sec: 3686.3, 300 sec: 3679.4). Total num frames: 7979008. Throughput: 0: 950.5. Samples: 992316. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-18 12:05:06,725][00268] Avg episode reward: [(0, '29.583')] +[2024-09-18 12:05:11,719][00268] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 7991296. Throughput: 0: 893.5. Samples: 996464. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-18 12:05:11,726][00268] Avg episode reward: [(0, '29.750')] +[2024-09-18 12:05:14,587][11908] Stopping Batcher_0... +[2024-09-18 12:05:14,588][11908] Loop batcher_evt_loop terminating... +[2024-09-18 12:05:14,589][00268] Component Batcher_0 stopped! +[2024-09-18 12:05:14,599][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-18 12:05:14,642][11921] Weights refcount: 2 0 +[2024-09-18 12:05:14,648][00268] Component InferenceWorker_p0-w0 stopped! +[2024-09-18 12:05:14,656][11921] Stopping InferenceWorker_p0-w0... +[2024-09-18 12:05:14,658][11921] Loop inference_proc0-0_evt_loop terminating... +[2024-09-18 12:05:14,775][11908] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001808_7405568.pth +[2024-09-18 12:05:14,805][11908] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-18 12:05:15,011][11908] Stopping LearnerWorker_p0... +[2024-09-18 12:05:15,015][11908] Loop learner_proc0_evt_loop terminating... +[2024-09-18 12:05:15,012][00268] Component LearnerWorker_p0 stopped! +[2024-09-18 12:05:15,040][00268] Component RolloutWorker_w2 stopped! +[2024-09-18 12:05:15,047][00268] Component RolloutWorker_w4 stopped! +[2024-09-18 12:05:15,046][11924] Stopping RolloutWorker_w2... +[2024-09-18 12:05:15,052][11926] Stopping RolloutWorker_w4... +[2024-09-18 12:05:15,056][00268] Component RolloutWorker_w0 stopped! +[2024-09-18 12:05:15,053][11924] Loop rollout_proc2_evt_loop terminating... +[2024-09-18 12:05:15,054][11926] Loop rollout_proc4_evt_loop terminating... +[2024-09-18 12:05:15,061][11922] Stopping RolloutWorker_w0... +[2024-09-18 12:05:15,067][00268] Component RolloutWorker_w6 stopped! +[2024-09-18 12:05:15,066][11922] Loop rollout_proc0_evt_loop terminating... +[2024-09-18 12:05:15,072][11927] Stopping RolloutWorker_w6... +[2024-09-18 12:05:15,074][11927] Loop rollout_proc6_evt_loop terminating... +[2024-09-18 12:05:15,230][00268] Component RolloutWorker_w7 stopped! +[2024-09-18 12:05:15,233][11929] Stopping RolloutWorker_w7... +[2024-09-18 12:05:15,236][11929] Loop rollout_proc7_evt_loop terminating... +[2024-09-18 12:05:15,267][00268] Component RolloutWorker_w5 stopped! +[2024-09-18 12:05:15,270][11928] Stopping RolloutWorker_w5... +[2024-09-18 12:05:15,276][11928] Loop rollout_proc5_evt_loop terminating... +[2024-09-18 12:05:15,312][00268] Component RolloutWorker_w3 stopped! +[2024-09-18 12:05:15,314][11925] Stopping RolloutWorker_w3... +[2024-09-18 12:05:15,314][11925] Loop rollout_proc3_evt_loop terminating... +[2024-09-18 12:05:15,381][00268] Component RolloutWorker_w1 stopped! +[2024-09-18 12:05:15,387][00268] Waiting for process learner_proc0 to stop... +[2024-09-18 12:05:15,391][11923] Stopping RolloutWorker_w1... +[2024-09-18 12:05:15,393][11923] Loop rollout_proc1_evt_loop terminating... +[2024-09-18 12:05:17,441][00268] Waiting for process inference_proc0-0 to join... +[2024-09-18 12:05:17,522][00268] Waiting for process rollout_proc0 to join... +[2024-09-18 12:05:17,948][00268] Waiting for process rollout_proc1 to join... +[2024-09-18 12:05:17,970][00268] Waiting for process rollout_proc2 to join... +[2024-09-18 12:05:17,971][00268] Waiting for process rollout_proc3 to join... +[2024-09-18 12:05:17,976][00268] Waiting for process rollout_proc4 to join... +[2024-09-18 12:05:17,979][00268] Waiting for process rollout_proc5 to join... +[2024-09-18 12:05:17,980][00268] Waiting for process rollout_proc6 to join... +[2024-09-18 12:05:17,984][00268] Waiting for process rollout_proc7 to join... +[2024-09-18 12:05:17,987][00268] Batcher 0 profile tree view: +batching: 26.7487, releasing_batches: 0.0239 +[2024-09-18 12:05:17,991][00268] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 474.2526 +update_model: 7.8235 + weight_update: 0.0023 +one_step: 0.0022 + handle_policy_step: 577.7240 + deserialize: 15.3204, stack: 3.0071, obs_to_device_normalize: 117.7529, forward: 291.3099, send_messages: 29.0593 + prepare_outputs: 91.9556 + to_cpu: 58.5476 +[2024-09-18 12:05:17,993][00268] Learner 0 profile tree view: +misc: 0.0055, prepare_batch: 16.1687 +train: 79.2732 + epoch_init: 0.0102, minibatch_init: 0.0137, losses_postprocess: 0.5746, kl_divergence: 0.5765, after_optimizer: 3.2603 + calculate_losses: 24.3960 + losses_init: 0.0093, forward_head: 1.8492, bptt_initial: 15.5723, tail: 1.1127, advantages_returns: 0.2897, losses: 2.6898 + bptt: 2.4617 + bptt_forward_core: 2.3822 + update: 49.7214 + clip: 1.4882 +[2024-09-18 12:05:17,994][00268] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3711, enqueue_policy_requests: 119.1639, env_step: 857.3822, overhead: 14.7818, complete_rollouts: 7.3833 +save_policy_outputs: 26.2504 + split_output_tensors: 8.8148 +[2024-09-18 12:05:17,995][00268] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3241, enqueue_policy_requests: 117.0709, env_step: 859.9379, overhead: 15.5190, complete_rollouts: 7.2719 +save_policy_outputs: 26.8021 + split_output_tensors: 8.9067 +[2024-09-18 12:05:17,996][00268] Loop Runner_EvtLoop terminating... +[2024-09-18 12:05:17,998][00268] Runner profile tree view: +main_loop: 1124.6084 +[2024-09-18 12:05:17,999][00268] Collected {0: 8007680}, FPS: 3558.4 +[2024-09-18 12:05:18,033][00268] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-18 12:05:18,034][00268] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-18 12:05:18,036][00268] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-18 12:05:18,038][00268] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-18 12:05:18,040][00268] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-18 12:05:18,041][00268] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-18 12:05:18,043][00268] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-09-18 12:05:18,044][00268] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-18 12:05:18,045][00268] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-09-18 12:05:18,046][00268] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-09-18 12:05:18,047][00268] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-18 12:05:18,048][00268] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-18 12:05:18,049][00268] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-18 12:05:18,051][00268] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-18 12:05:18,052][00268] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-18 12:05:18,061][00268] RunningMeanStd input shape: (3, 72, 128) +[2024-09-18 12:05:18,069][00268] RunningMeanStd input shape: (1,) +[2024-09-18 12:05:18,082][00268] ConvEncoder: input_channels=3 +[2024-09-18 12:05:18,143][00268] Conv encoder output size: 512 +[2024-09-18 12:05:18,145][00268] Policy head output size: 512 +[2024-09-18 12:05:18,171][00268] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-18 12:05:18,658][00268] Num frames 100... +[2024-09-18 12:05:18,781][00268] Num frames 200... +[2024-09-18 12:05:18,897][00268] Num frames 300... +[2024-09-18 12:05:19,051][00268] Num frames 400... +[2024-09-18 12:05:19,174][00268] Num frames 500... +[2024-09-18 12:05:19,297][00268] Num frames 600... +[2024-09-18 12:05:19,418][00268] Num frames 700... +[2024-09-18 12:05:19,535][00268] Num frames 800... +[2024-09-18 12:05:19,651][00268] Num frames 900... +[2024-09-18 12:05:19,775][00268] Num frames 1000... +[2024-09-18 12:05:19,895][00268] Num frames 1100... +[2024-09-18 12:05:19,980][00268] Avg episode rewards: #0: 27.200, true rewards: #0: 11.200 +[2024-09-18 12:05:19,982][00268] Avg episode reward: 27.200, avg true_objective: 11.200 +[2024-09-18 12:05:20,083][00268] Num frames 1200... +[2024-09-18 12:05:20,198][00268] Num frames 1300... +[2024-09-18 12:05:20,314][00268] Num frames 1400... +[2024-09-18 12:05:20,433][00268] Num frames 1500... +[2024-09-18 12:05:20,569][00268] Num frames 1600... +[2024-09-18 12:05:20,736][00268] Num frames 1700... +[2024-09-18 12:05:20,904][00268] Num frames 1800... +[2024-09-18 12:05:21,083][00268] Num frames 1900... +[2024-09-18 12:05:21,240][00268] Num frames 2000... +[2024-09-18 12:05:21,408][00268] Num frames 2100... +[2024-09-18 12:05:21,567][00268] Num frames 2200... +[2024-09-18 12:05:21,727][00268] Num frames 2300... +[2024-09-18 12:05:21,895][00268] Num frames 2400... +[2024-09-18 12:05:22,078][00268] Num frames 2500... +[2024-09-18 12:05:22,241][00268] Num frames 2600... +[2024-09-18 12:05:22,375][00268] Avg episode rewards: #0: 35.225, true rewards: #0: 13.225 +[2024-09-18 12:05:22,378][00268] Avg episode reward: 35.225, avg true_objective: 13.225 +[2024-09-18 12:05:22,475][00268] Num frames 2700... +[2024-09-18 12:05:22,640][00268] Num frames 2800... +[2024-09-18 12:05:22,810][00268] Num frames 2900... +[2024-09-18 12:05:22,949][00268] Num frames 3000... +[2024-09-18 12:05:23,065][00268] Num frames 3100... +[2024-09-18 12:05:23,190][00268] Num frames 3200... +[2024-09-18 12:05:23,309][00268] Num frames 3300... +[2024-09-18 12:05:23,427][00268] Num frames 3400... +[2024-09-18 12:05:23,546][00268] Num frames 3500... +[2024-09-18 12:05:23,670][00268] Num frames 3600... +[2024-09-18 12:05:23,790][00268] Num frames 3700... +[2024-09-18 12:05:23,912][00268] Num frames 3800... +[2024-09-18 12:05:24,043][00268] Num frames 3900... +[2024-09-18 12:05:24,184][00268] Num frames 4000... +[2024-09-18 12:05:24,309][00268] Num frames 4100... +[2024-09-18 12:05:24,429][00268] Num frames 4200... +[2024-09-18 12:05:24,549][00268] Num frames 4300... +[2024-09-18 12:05:24,669][00268] Num frames 4400... +[2024-09-18 12:05:24,789][00268] Num frames 4500... +[2024-09-18 12:05:24,912][00268] Num frames 4600... +[2024-09-18 12:05:25,008][00268] Avg episode rewards: #0: 39.423, true rewards: #0: 15.423 +[2024-09-18 12:05:25,009][00268] Avg episode reward: 39.423, avg true_objective: 15.423 +[2024-09-18 12:05:25,095][00268] Num frames 4700... +[2024-09-18 12:05:25,219][00268] Num frames 4800... +[2024-09-18 12:05:25,331][00268] Num frames 4900... +[2024-09-18 12:05:25,450][00268] Num frames 5000... +[2024-09-18 12:05:25,568][00268] Num frames 5100... +[2024-09-18 12:05:25,684][00268] Num frames 5200... +[2024-09-18 12:05:25,808][00268] Num frames 5300... +[2024-09-18 12:05:25,933][00268] Num frames 5400... +[2024-09-18 12:05:26,058][00268] Num frames 5500... +[2024-09-18 12:05:26,181][00268] Num frames 5600... +[2024-09-18 12:05:26,308][00268] Num frames 5700... +[2024-09-18 12:05:26,428][00268] Num frames 5800... +[2024-09-18 12:05:26,546][00268] Num frames 5900... +[2024-09-18 12:05:26,665][00268] Num frames 6000... +[2024-09-18 12:05:26,813][00268] Avg episode rewards: #0: 39.445, true rewards: #0: 15.195 +[2024-09-18 12:05:26,816][00268] Avg episode reward: 39.445, avg true_objective: 15.195 +[2024-09-18 12:05:26,845][00268] Num frames 6100... +[2024-09-18 12:05:26,970][00268] Num frames 6200... +[2024-09-18 12:05:27,088][00268] Num frames 6300... +[2024-09-18 12:05:27,206][00268] Num frames 6400... +[2024-09-18 12:05:27,334][00268] Num frames 6500... +[2024-09-18 12:05:27,450][00268] Num frames 6600... +[2024-09-18 12:05:27,637][00268] Avg episode rewards: #0: 34.192, true rewards: #0: 13.392 +[2024-09-18 12:05:27,639][00268] Avg episode reward: 34.192, avg true_objective: 13.392 +[2024-09-18 12:05:27,649][00268] Num frames 6700... +[2024-09-18 12:05:27,765][00268] Num frames 6800... +[2024-09-18 12:05:27,881][00268] Num frames 6900... +[2024-09-18 12:05:28,010][00268] Num frames 7000... +[2024-09-18 12:05:28,135][00268] Num frames 7100... +[2024-09-18 12:05:28,273][00268] Num frames 7200... +[2024-09-18 12:05:28,400][00268] Num frames 7300... +[2024-09-18 12:05:28,519][00268] Num frames 7400... +[2024-09-18 12:05:28,642][00268] Num frames 7500... +[2024-09-18 12:05:28,761][00268] Num frames 7600... +[2024-09-18 12:05:28,881][00268] Num frames 7700... +[2024-09-18 12:05:29,011][00268] Num frames 7800... +[2024-09-18 12:05:29,139][00268] Num frames 7900... +[2024-09-18 12:05:29,260][00268] Num frames 8000... +[2024-09-18 12:05:29,383][00268] Num frames 8100... +[2024-09-18 12:05:29,507][00268] Num frames 8200... +[2024-09-18 12:05:29,559][00268] Avg episode rewards: #0: 34.500, true rewards: #0: 13.667 +[2024-09-18 12:05:29,561][00268] Avg episode reward: 34.500, avg true_objective: 13.667 +[2024-09-18 12:05:29,675][00268] Num frames 8300... +[2024-09-18 12:05:29,788][00268] Num frames 8400... +[2024-09-18 12:05:29,909][00268] Num frames 8500... +[2024-09-18 12:05:30,032][00268] Num frames 8600... +[2024-09-18 12:05:30,158][00268] Num frames 8700... +[2024-09-18 12:05:30,275][00268] Num frames 8800... +[2024-09-18 12:05:30,402][00268] Num frames 8900... +[2024-09-18 12:05:30,520][00268] Num frames 9000... +[2024-09-18 12:05:30,639][00268] Num frames 9100... +[2024-09-18 12:05:30,755][00268] Num frames 9200... +[2024-09-18 12:05:30,881][00268] Num frames 9300... +[2024-09-18 12:05:31,008][00268] Num frames 9400... +[2024-09-18 12:05:31,133][00268] Num frames 9500... +[2024-09-18 12:05:31,252][00268] Num frames 9600... +[2024-09-18 12:05:31,378][00268] Num frames 9700... +[2024-09-18 12:05:31,495][00268] Num frames 9800... +[2024-09-18 12:05:31,616][00268] Num frames 9900... +[2024-09-18 12:05:31,738][00268] Num frames 10000... +[2024-09-18 12:05:31,863][00268] Num frames 10100... +[2024-09-18 12:05:31,988][00268] Num frames 10200... +[2024-09-18 12:05:32,117][00268] Num frames 10300... +[2024-09-18 12:05:32,170][00268] Avg episode rewards: #0: 38.000, true rewards: #0: 14.714 +[2024-09-18 12:05:32,172][00268] Avg episode reward: 38.000, avg true_objective: 14.714 +[2024-09-18 12:05:32,287][00268] Num frames 10400... +[2024-09-18 12:05:32,411][00268] Num frames 10500... +[2024-09-18 12:05:32,525][00268] Num frames 10600... +[2024-09-18 12:05:32,640][00268] Num frames 10700... +[2024-09-18 12:05:32,757][00268] Num frames 10800... +[2024-09-18 12:05:32,887][00268] Num frames 10900... +[2024-09-18 12:05:33,055][00268] Num frames 11000... +[2024-09-18 12:05:33,223][00268] Num frames 11100... +[2024-09-18 12:05:33,389][00268] Num frames 11200... +[2024-09-18 12:05:33,555][00268] Num frames 11300... +[2024-09-18 12:05:33,713][00268] Num frames 11400... +[2024-09-18 12:05:33,805][00268] Avg episode rewards: #0: 36.400, true rewards: #0: 14.275 +[2024-09-18 12:05:33,807][00268] Avg episode reward: 36.400, avg true_objective: 14.275 +[2024-09-18 12:05:33,948][00268] Num frames 11500... +[2024-09-18 12:05:34,113][00268] Num frames 11600... +[2024-09-18 12:05:34,287][00268] Num frames 11700... +[2024-09-18 12:05:34,471][00268] Num frames 11800... +[2024-09-18 12:05:34,638][00268] Num frames 11900... +[2024-09-18 12:05:34,809][00268] Num frames 12000... +[2024-09-18 12:05:34,995][00268] Num frames 12100... +[2024-09-18 12:05:35,185][00268] Num frames 12200... +[2024-09-18 12:05:35,344][00268] Num frames 12300... +[2024-09-18 12:05:35,473][00268] Num frames 12400... +[2024-09-18 12:05:35,603][00268] Num frames 12500... +[2024-09-18 12:05:35,723][00268] Num frames 12600... +[2024-09-18 12:05:35,848][00268] Num frames 12700... +[2024-09-18 12:05:35,974][00268] Num frames 12800... +[2024-09-18 12:05:36,098][00268] Num frames 12900... +[2024-09-18 12:05:36,220][00268] Num frames 13000... +[2024-09-18 12:05:36,344][00268] Num frames 13100... +[2024-09-18 12:05:36,468][00268] Num frames 13200... +[2024-09-18 12:05:36,599][00268] Num frames 13300... +[2024-09-18 12:05:36,722][00268] Num frames 13400... +[2024-09-18 12:05:36,843][00268] Num frames 13500... +[2024-09-18 12:05:36,924][00268] Avg episode rewards: #0: 39.133, true rewards: #0: 15.022 +[2024-09-18 12:05:36,925][00268] Avg episode reward: 39.133, avg true_objective: 15.022 +[2024-09-18 12:05:37,025][00268] Num frames 13600... +[2024-09-18 12:05:37,144][00268] Num frames 13700... +[2024-09-18 12:05:37,268][00268] Num frames 13800... +[2024-09-18 12:05:37,391][00268] Num frames 13900... +[2024-09-18 12:05:37,515][00268] Num frames 14000... +[2024-09-18 12:05:37,652][00268] Num frames 14100... +[2024-09-18 12:05:37,797][00268] Num frames 14200... +[2024-09-18 12:05:38,025][00268] Num frames 14300... +[2024-09-18 12:05:38,255][00268] Num frames 14400... +[2024-09-18 12:05:38,462][00268] Num frames 14500... +[2024-09-18 12:05:38,569][00268] Avg episode rewards: #0: 37.544, true rewards: #0: 14.544 +[2024-09-18 12:05:38,571][00268] Avg episode reward: 37.544, avg true_objective: 14.544 +[2024-09-18 12:07:11,264][00268] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-18 12:07:11,919][00268] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-18 12:07:11,922][00268] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-18 12:07:11,924][00268] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-18 12:07:11,926][00268] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-18 12:07:11,927][00268] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-18 12:07:11,929][00268] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-18 12:07:11,931][00268] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-09-18 12:07:11,932][00268] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-18 12:07:11,933][00268] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-09-18 12:07:11,934][00268] Adding new argument 'hf_repository'='mkdem/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-09-18 12:07:11,935][00268] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-18 12:07:11,936][00268] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-18 12:07:11,937][00268] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-18 12:07:11,938][00268] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-18 12:07:11,939][00268] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-18 12:07:11,951][00268] RunningMeanStd input shape: (3, 72, 128) +[2024-09-18 12:07:11,955][00268] RunningMeanStd input shape: (1,) +[2024-09-18 12:07:11,982][00268] ConvEncoder: input_channels=3 +[2024-09-18 12:07:12,041][00268] Conv encoder output size: 512 +[2024-09-18 12:07:12,043][00268] Policy head output size: 512 +[2024-09-18 12:07:12,069][00268] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-09-18 12:07:12,848][00268] Num frames 100... +[2024-09-18 12:07:13,007][00268] Num frames 200... +[2024-09-18 12:07:13,157][00268] Num frames 300... +[2024-09-18 12:07:13,318][00268] Num frames 400... +[2024-09-18 12:07:13,469][00268] Num frames 500... +[2024-09-18 12:07:13,617][00268] Num frames 600... +[2024-09-18 12:07:13,769][00268] Num frames 700... +[2024-09-18 12:07:13,919][00268] Num frames 800... +[2024-09-18 12:07:14,075][00268] Num frames 900... +[2024-09-18 12:07:14,230][00268] Num frames 1000... +[2024-09-18 12:07:14,400][00268] Num frames 1100... +[2024-09-18 12:07:14,555][00268] Num frames 1200... +[2024-09-18 12:07:14,706][00268] Num frames 1300... +[2024-09-18 12:07:14,859][00268] Num frames 1400... +[2024-09-18 12:07:15,016][00268] Num frames 1500... +[2024-09-18 12:07:15,171][00268] Num frames 1600... +[2024-09-18 12:07:15,327][00268] Num frames 1700... +[2024-09-18 12:07:15,494][00268] Num frames 1800... +[2024-09-18 12:07:15,671][00268] Num frames 1900... +[2024-09-18 12:07:15,860][00268] Avg episode rewards: #0: 48.839, true rewards: #0: 19.840 +[2024-09-18 12:07:15,862][00268] Avg episode reward: 48.839, avg true_objective: 19.840 +[2024-09-18 12:07:15,891][00268] Num frames 2000... +[2024-09-18 12:07:16,051][00268] Num frames 2100... +[2024-09-18 12:07:16,221][00268] Num frames 2200... +[2024-09-18 12:07:16,393][00268] Num frames 2300... +[2024-09-18 12:07:16,547][00268] Num frames 2400... +[2024-09-18 12:07:16,722][00268] Num frames 2500... +[2024-09-18 12:07:16,925][00268] Num frames 2600... +[2024-09-18 12:07:17,116][00268] Num frames 2700... +[2024-09-18 12:07:17,314][00268] Num frames 2800... +[2024-09-18 12:07:17,516][00268] Num frames 2900... +[2024-09-18 12:07:17,705][00268] Num frames 3000... +[2024-09-18 12:07:17,938][00268] Num frames 3100... +[2024-09-18 12:07:18,122][00268] Num frames 3200... +[2024-09-18 12:07:18,332][00268] Num frames 3300... +[2024-09-18 12:07:18,519][00268] Num frames 3400... +[2024-09-18 12:07:18,687][00268] Num frames 3500... +[2024-09-18 12:07:18,860][00268] Num frames 3600... +[2024-09-18 12:07:19,056][00268] Num frames 3700... +[2024-09-18 12:07:19,299][00268] Num frames 3800... +[2024-09-18 12:07:19,527][00268] Num frames 3900... +[2024-09-18 12:07:19,758][00268] Num frames 4000... +[2024-09-18 12:07:20,020][00268] Avg episode rewards: #0: 54.919, true rewards: #0: 20.420 +[2024-09-18 12:07:20,023][00268] Avg episode reward: 54.919, avg true_objective: 20.420 +[2024-09-18 12:07:20,062][00268] Num frames 4100... +[2024-09-18 12:07:20,266][00268] Num frames 4200... +[2024-09-18 12:07:20,499][00268] Num frames 4300... +[2024-09-18 12:07:20,719][00268] Num frames 4400... +[2024-09-18 12:07:20,919][00268] Num frames 4500... +[2024-09-18 12:07:21,132][00268] Num frames 4600... +[2024-09-18 12:07:21,346][00268] Num frames 4700... +[2024-09-18 12:07:21,559][00268] Num frames 4800... +[2024-09-18 12:07:21,750][00268] Num frames 4900... +[2024-09-18 12:07:21,913][00268] Num frames 5000... +[2024-09-18 12:07:22,071][00268] Num frames 5100... +[2024-09-18 12:07:22,236][00268] Num frames 5200... +[2024-09-18 12:07:22,398][00268] Num frames 5300... +[2024-09-18 12:07:22,556][00268] Num frames 5400... +[2024-09-18 12:07:22,720][00268] Num frames 5500... +[2024-09-18 12:07:22,890][00268] Num frames 5600... +[2024-09-18 12:07:23,038][00268] Num frames 5700... +[2024-09-18 12:07:23,162][00268] Num frames 5800... +[2024-09-18 12:07:23,282][00268] Num frames 5900... +[2024-09-18 12:07:23,401][00268] Num frames 6000... +[2024-09-18 12:07:23,521][00268] Num frames 6100... +[2024-09-18 12:07:23,690][00268] Avg episode rewards: #0: 55.279, true rewards: #0: 20.613 +[2024-09-18 12:07:23,692][00268] Avg episode reward: 55.279, avg true_objective: 20.613 +[2024-09-18 12:07:23,715][00268] Num frames 6200... +[2024-09-18 12:07:23,834][00268] Num frames 6300... +[2024-09-18 12:07:23,960][00268] Num frames 6400... +[2024-09-18 12:07:24,079][00268] Num frames 6500... +[2024-09-18 12:07:24,198][00268] Num frames 6600... +[2024-09-18 12:07:24,321][00268] Num frames 6700... +[2024-09-18 12:07:24,447][00268] Avg episode rewards: #0: 43.649, true rewards: #0: 16.900 +[2024-09-18 12:07:24,448][00268] Avg episode reward: 43.649, avg true_objective: 16.900 +[2024-09-18 12:07:24,499][00268] Num frames 6800... +[2024-09-18 12:07:24,625][00268] Num frames 6900... +[2024-09-18 12:07:24,743][00268] Num frames 7000... +[2024-09-18 12:07:24,863][00268] Num frames 7100... +[2024-09-18 12:07:24,986][00268] Num frames 7200... +[2024-09-18 12:07:25,106][00268] Num frames 7300... +[2024-09-18 12:07:25,230][00268] Num frames 7400... +[2024-09-18 12:07:25,284][00268] Avg episode rewards: #0: 36.999, true rewards: #0: 14.800 +[2024-09-18 12:07:25,286][00268] Avg episode reward: 36.999, avg true_objective: 14.800 +[2024-09-18 12:07:25,404][00268] Num frames 7500... +[2024-09-18 12:07:25,523][00268] Num frames 7600... +[2024-09-18 12:07:25,650][00268] Num frames 7700... +[2024-09-18 12:07:25,765][00268] Num frames 7800... +[2024-09-18 12:07:25,883][00268] Num frames 7900... +[2024-09-18 12:07:26,002][00268] Num frames 8000... +[2024-09-18 12:07:26,127][00268] Num frames 8100... +[2024-09-18 12:07:26,244][00268] Num frames 8200... +[2024-09-18 12:07:26,361][00268] Num frames 8300... +[2024-09-18 12:07:26,489][00268] Num frames 8400... +[2024-09-18 12:07:26,624][00268] Num frames 8500... +[2024-09-18 12:07:26,748][00268] Num frames 8600... +[2024-09-18 12:07:26,874][00268] Num frames 8700... +[2024-09-18 12:07:26,995][00268] Num frames 8800... +[2024-09-18 12:07:27,120][00268] Num frames 8900... +[2024-09-18 12:07:27,243][00268] Num frames 9000... +[2024-09-18 12:07:27,368][00268] Num frames 9100... +[2024-09-18 12:07:27,489][00268] Num frames 9200... +[2024-09-18 12:07:27,626][00268] Num frames 9300... +[2024-09-18 12:07:27,756][00268] Num frames 9400... +[2024-09-18 12:07:27,886][00268] Num frames 9500... +[2024-09-18 12:07:27,940][00268] Avg episode rewards: #0: 40.499, true rewards: #0: 15.833 +[2024-09-18 12:07:27,942][00268] Avg episode reward: 40.499, avg true_objective: 15.833 +[2024-09-18 12:07:28,065][00268] Num frames 9600... +[2024-09-18 12:07:28,208][00268] Num frames 9700... +[2024-09-18 12:07:28,344][00268] Num frames 9800... +[2024-09-18 12:07:28,500][00268] Avg episode rewards: #0: 35.691, true rewards: #0: 14.120 +[2024-09-18 12:07:28,502][00268] Avg episode reward: 35.691, avg true_objective: 14.120 +[2024-09-18 12:07:28,524][00268] Num frames 9900... +[2024-09-18 12:07:28,649][00268] Num frames 10000... +[2024-09-18 12:07:28,768][00268] Num frames 10100... +[2024-09-18 12:07:28,882][00268] Num frames 10200... +[2024-09-18 12:07:28,998][00268] Num frames 10300... +[2024-09-18 12:07:29,122][00268] Num frames 10400... +[2024-09-18 12:07:29,243][00268] Num frames 10500... +[2024-09-18 12:07:29,360][00268] Num frames 10600... +[2024-09-18 12:07:29,477][00268] Num frames 10700... +[2024-09-18 12:07:29,602][00268] Num frames 10800... +[2024-09-18 12:07:29,724][00268] Num frames 10900... +[2024-09-18 12:07:29,845][00268] Num frames 11000... +[2024-09-18 12:07:29,962][00268] Num frames 11100... +[2024-09-18 12:07:30,082][00268] Num frames 11200... +[2024-09-18 12:07:30,209][00268] Num frames 11300... +[2024-09-18 12:07:30,329][00268] Num frames 11400... +[2024-09-18 12:07:30,450][00268] Num frames 11500... +[2024-09-18 12:07:30,573][00268] Num frames 11600... +[2024-09-18 12:07:30,705][00268] Num frames 11700... +[2024-09-18 12:07:30,878][00268] Avg episode rewards: #0: 37.748, true rewards: #0: 14.749 +[2024-09-18 12:07:30,881][00268] Avg episode reward: 37.748, avg true_objective: 14.749 +[2024-09-18 12:07:30,885][00268] Num frames 11800... +[2024-09-18 12:07:31,002][00268] Num frames 11900... +[2024-09-18 12:07:31,122][00268] Num frames 12000... +[2024-09-18 12:07:31,249][00268] Num frames 12100... +[2024-09-18 12:07:31,367][00268] Num frames 12200... +[2024-09-18 12:07:31,484][00268] Num frames 12300... +[2024-09-18 12:07:31,627][00268] Avg episode rewards: #0: 34.972, true rewards: #0: 13.750 +[2024-09-18 12:07:31,629][00268] Avg episode reward: 34.972, avg true_objective: 13.750 +[2024-09-18 12:07:31,668][00268] Num frames 12400... +[2024-09-18 12:07:31,837][00268] Num frames 12500... +[2024-09-18 12:07:32,009][00268] Num frames 12600... +[2024-09-18 12:07:32,177][00268] Num frames 12700... +[2024-09-18 12:07:32,399][00268] Avg episode rewards: #0: 32.098, true rewards: #0: 12.799 +[2024-09-18 12:07:32,401][00268] Avg episode reward: 32.098, avg true_objective: 12.799 +[2024-09-18 12:07:32,408][00268] Num frames 12800... +[2024-09-18 12:08:53,783][00268] Replay video saved to /content/train_dir/default_experiment/replay.mp4!