Pyramids RND PPO

34f69ac almost 2 years ago

18.7 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.5453948974609375,
	"min": 0.5453948974609375,
	"max": 1.4915077686309814,
	"count": 33
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 16292.0361328125,
	"min": 16292.0361328125,
	"max": 45246.37890625,
	"count": 33
	},
	"Pyramids.Step.mean": {
	"value": 989948.0,
	"min": 29952.0,
	"max": 989948.0,
	"count": 33
	},
	"Pyramids.Step.sum": {
	"value": 989948.0,
	"min": 29952.0,
	"max": 989948.0,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.38538113236427307,
	"min": -0.0955909714102745,
	"max": 0.41770029067993164,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 102.89675903320312,
	"min": -22.94183349609375,
	"max": 113.19677734375,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 16.360816955566406,
	"min": -0.1930875927209854,
	"max": 16.360816955566406,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 4368.337890625,
	"min": -50.78203582763672,
	"max": 4368.337890625,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06801365375992775,
	"min": 0.06521726133429355,
	"max": 0.07370985161850217,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.9521911526389885,
	"min": 0.4906440676131752,
	"max": 1.060501630349523,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 29.912240036896296,
	"min": 0.00019611255523600504,
	"max": 29.912240036896296,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 418.77136051654816,
	"min": 0.0027455757733040706,
	"max": 418.77136051654816,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 7.7169974277e-06,
	"min": 7.7169974277e-06,
	"max": 0.00029515063018788575,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 0.00010803796398780002,
	"min": 0.00010803796398780002,
	"max": 0.0035084345305218994,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10257229999999999,
	"min": 0.10257229999999999,
	"max": 0.19838354285714285,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4360122,
	"min": 1.3886848,
	"max": 2.5694781,
	"count": 33
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.00026697277000000004,
	"min": 0.00026697277000000004,
	"max": 0.00983851593142857,
	"count": 33
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0037376187800000006,
	"min": 0.0037376187800000006,
	"max": 0.11697086218999998,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.008513377048075199,
	"min": 0.007972446270287037,
	"max": 0.3100510239601135,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.11918728053569794,
	"min": 0.11161424964666367,
	"max": 2.1703572273254395,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 424.6901408450704,
	"min": 422.54794520547944,
	"max": 999.0,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 30153.0,
	"min": 15984.0,
	"max": 33152.0,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.4406085542270115,
	"min": -1.0000000521540642,
	"max": 1.4488225605699323,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 100.84259879589081,
	"min": -30.9916016086936,
	"max": 103.15039819478989,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.4406085542270115,
	"min": -1.0000000521540642,
	"max": 1.4488225605699323,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 100.84259879589081,
	"min": -30.9916016086936,
	"max": 103.15039819478989,
	"count": 33
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.03696704734528404,
	"min": 0.03696704734528404,
	"max": 6.418480900116265,
	"count": 33
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.587693314169883,
	"min": 2.5355656908650417,
	"max": 102.69569440186024,
	"count": 33
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1682526146",
	"python_version": "3.9.16 (main, Dec 7 2022, 01:11:51) \n[GCC 9.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1682528392"
	},
	"total": 2246.308129565,
	"count": 1,
	"self": 0.7797014669995406,
	"children": {
	"run_training.setup": {
	"total": 0.11689240200007589,
	"count": 1,
	"self": 0.11689240200007589
	},
	"TrainerController.start_learning": {
	"total": 2245.411535696,
	"count": 1,
	"self": 1.5090777700143008,
	"children": {
	"TrainerController._reset_env": {
	"total": 3.909860901000002,
	"count": 1,
	"self": 3.909860901000002
	},
	"TrainerController.advance": {
	"total": 2239.8390529809853,
	"count": 63559,
	"self": 1.5223795160577538,
	"children": {
	"env_step": {
	"total": 1617.4875878039404,
	"count": 63559,
	"self": 1506.3141157789323,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 110.26800501199705,
	"count": 63559,
	"self": 4.80453054196596,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 105.46347447003109,
	"count": 62557,
	"self": 105.46347447003109
	}
	}
	},
	"workers": {
	"total": 0.905467013011048,
	"count": 63559,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2239.815159996994,
	"count": 63559,
	"is_parallel": true,
	"self": 851.1071773159645,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.001978406000034738,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006393470000602974,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0013390589999744407,
	"count": 8,
	"is_parallel": true,
	"self": 0.0013390589999744407
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.05351651900014076,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005735750003168505,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0009403719998317683,
	"count": 1,
	"is_parallel": true,
	"self": 0.0009403719998317683
	},
	"communicator.exchange": {
	"total": 0.05040523100001337,
	"count": 1,
	"is_parallel": true,
	"self": 0.05040523100001337
	},
	"steps_from_proto": {
	"total": 0.001597340999978769,
	"count": 1,
	"is_parallel": true,
	"self": 0.00036294000028647133,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0012344009996922978,
	"count": 8,
	"is_parallel": true,
	"self": 0.0012344009996922978
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1388.7079826810298,
	"count": 63558,
	"is_parallel": true,
	"self": 32.64124984892874,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 23.010396003001915,
	"count": 63558,
	"is_parallel": true,
	"self": 23.010396003001915
	},
	"communicator.exchange": {
	"total": 1239.0051135500414,
	"count": 63558,
	"is_parallel": true,
	"self": 1239.0051135500414
	},
	"steps_from_proto": {
	"total": 94.0512232790577,
	"count": 63558,
	"is_parallel": true,
	"self": 20.470986927843796,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 73.5802363512139,
	"count": 508464,
	"is_parallel": true,
	"self": 73.5802363512139
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 620.8290856609874,
	"count": 63559,
	"self": 2.864323043996592,
	"children": {
	"process_trajectory": {
	"total": 107.19060220599113,
	"count": 63559,
	"self": 106.93437550399085,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.2562267020002764,
	"count": 2,
	"self": 0.2562267020002764
	}
	}
	},
	"_update_policy": {
	"total": 510.7741604109997,
	"count": 454,
	"self": 327.1899827700081,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 183.58417764099158,
	"count": 22791,
	"self": 183.58417764099158
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.3150001905160025e-06,
	"count": 1,
	"self": 1.3150001905160025e-06
	},
	"TrainerController._save_models": {
	"total": 0.15354272900003707,
	"count": 1,
	"self": 0.0020205760001772433,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.15152215299985983,
	"count": 1,
	"self": 0.15152215299985983
	}
	}
	}
	}
	}
	}
	}