{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.852378010749817, "min": 1.8350627422332764, "max": 1.8768107891082764, "count": 6 }, "SoccerTwos.Policy.Entropy.sum": { "value": 34913.62109375, "min": 5941.634765625, "max": 40539.11328125, "count": 6 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 88.03636363636363, "min": 29.666666666666668, "max": 88.03636363636363, "count": 6 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19368.0, "min": 712.0, "max": 20120.0, "count": 6 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1192.4054277979922, "min": 1190.8040054383152, "max": 1201.2929666713765, "count": 6 }, "SoccerTwos.Self-play.ELO.sum": { "value": 131164.59705777914, "min": 14400.0, "max": 189804.28873407748, "count": 6 }, "SoccerTwos.Step.mean": { "value": 10049991.0, "min": 9999982.0, "max": 10049991.0, "count": 6 }, "SoccerTwos.Step.sum": { "value": 10049991.0, "min": 9999982.0, "max": 10049991.0, "count": 6 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.004964224062860012, "min": -0.09364105761051178, "max": 0.009783200919628143, "count": 6 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 0.551028847694397, "min": -2.561784267425537, "max": 1.2131168842315674, "count": 6 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.004720787517726421, "min": -0.08726067841053009, "max": 0.010965084657073021, "count": 6 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 0.5240074396133423, "min": -2.6891181468963623, "max": 1.3596705198287964, "count": 6 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 6 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 6 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.07192071923264512, "min": -0.6030400037765503, "max": 0.04130569221527596, "count": 6 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -7.983199834823608, "min": -42.29800021648407, "max": 5.080600142478943, "count": 6 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.07192071923264512, "min": -0.6030400037765503, "max": 0.04130569221527596, "count": 6 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -7.983199834823608, "min": -42.29800021648407, "max": 5.080600142478943, "count": 6 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 6 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 6 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.018015451532725515, "min": 0.018015451532725515, "max": 0.019700842688325792, "count": 2 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.018015451532725515, "min": 0.018015451532725515, "max": 0.019700842688325792, "count": 2 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.07724483733375867, "min": 0.07724483733375867, "max": 0.09004426474372546, "count": 2 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.07724483733375867, "min": 0.07724483733375867, "max": 0.09004426474372546, "count": 2 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.07705125908056895, "min": 0.07705125908056895, "max": 0.09023706714312235, "count": 2 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.07705125908056895, "min": 0.07705125908056895, "max": 0.09023706714312235, "count": 2 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 5.875427021492605e-07, "min": 5.875427021492605e-07, "max": 2.026965382089537e-06, "count": 2 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 5.875427021492605e-07, "min": 5.875427021492605e-07, "max": 2.026965382089537e-06, "count": 2 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.10008392039800995, "min": 0.10008392039800995, "max": 0.10028955223880595, "count": 2 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.10008392039800995, "min": 0.10008392039800995, "max": 0.10028955223880595, "count": 2 }, "SoccerTwos.Policy.Beta.mean": { "value": 1.4187627860696568e-05, "min": 1.4187627860696568e-05, "max": 2.444865671641781e-05, "count": 2 }, "SoccerTwos.Policy.Beta.sum": { "value": 1.4187627860696568e-05, "min": 1.4187627860696568e-05, "max": 2.444865671641781e-05, "count": 2 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1700259473", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\Jonat\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --resume", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.1.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1700259659" }, "total": 185.82584489999863, "count": 1, "self": 0.7753106999880401, "children": { "run_training.setup": { "total": 0.09379280000575818, "count": 1, "self": 0.09379280000575818 }, "TrainerController.start_learning": { "total": 184.95674140000483, "count": 1, "self": 0.08903199959604535, "children": { "TrainerController._reset_env": { "total": 5.969565200008219, "count": 2, "self": 5.969565200008219 }, "TrainerController.advance": { "total": 178.67253850038105, "count": 3443, "self": 0.07832270073413383, "children": { "env_step": { "total": 64.80500039977778, "count": 3443, "self": 49.44157259988424, "children": { "SubprocessEnvManager._take_step": { "total": 15.305126899955212, "count": 3443, "self": 0.39619699952891096, "children": { "TorchPolicy.evaluate": { "total": 14.908929900426301, "count": 6352, "self": 14.908929900426301 } } }, "workers": { "total": 0.058300899938330986, "count": 3443, "self": 0.0, "children": { "worker_root": { "total": 179.18925629938894, "count": 3443, "is_parallel": true, "self": 139.73535029958293, "children": { "steps_from_proto": { "total": 0.004917600002954714, "count": 4, "is_parallel": true, "self": 0.0007504000095650554, "children": { "_process_rank_one_or_two_observation": { "total": 0.004167199993389659, "count": 16, "is_parallel": true, "self": 0.004167199993389659 } } }, "UnityEnvironment.step": { "total": 39.448988399803056, "count": 3443, "is_parallel": true, "self": 1.9205469998996705, "children": { "UnityEnvironment._generate_step_input": { "total": 1.6575815995165613, "count": 3443, "is_parallel": true, "self": 1.6575815995165613 }, "communicator.exchange": { "total": 29.456260600389214, "count": 3443, "is_parallel": true, "self": 29.456260600389214 }, "steps_from_proto": { "total": 6.41459919999761, "count": 6886, "is_parallel": true, "self": 1.2085421001102077, "children": { "_process_rank_one_or_two_observation": { "total": 5.206057099887403, "count": 27544, "is_parallel": true, "self": 5.206057099887403 } } } } } } } } } } }, "trainer_advance": { "total": 113.78921539986914, "count": 3443, "self": 0.7887009992264211, "children": { "process_trajectory": { "total": 20.89631270064274, "count": 3443, "self": 20.71150490063883, "children": { "RLTrainer._checkpoint": { "total": 0.1848078000039095, "count": 1, "self": 0.1848078000039095 } } }, "_update_policy": { "total": 92.10420169999998, "count": 2, "self": 7.926415099966107, "children": { "TorchPOCAOptimizer.update": { "total": 84.17778660003387, "count": 60, "self": 84.17778660003387 } } } } } } }, "trainer_threads": { "total": 1.00000761449337e-06, "count": 1, "self": 1.00000761449337e-06 }, "TrainerController._save_models": { "total": 0.2256047000118997, "count": 1, "self": 0.05978990001312923, "children": { "RLTrainer._checkpoint": { "total": 0.16581479999877047, "count": 1, "self": 0.16581479999877047 } } } } } } }