vwxyzjn commited on
Commit
b174205
1 Parent(s): ff0b88f

pushing model

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: Pong-v5
17
  metrics:
18
  - type: mean_reward
19
- value: -20.40 +/- 0.66
20
  name: mean_reward
21
  verified: false
22
  ---
@@ -46,7 +46,7 @@ curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/ma
46
  curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/pyproject.toml
47
  curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/poetry.lock
48
  poetry install --all-extras
49
- python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4 --params-queue-timeout 0.02 --track --save-model --upload-model --hf-entity cleanrl --total-timesteps 200000 --env-id Pong-v5 --seed 1
50
  ```
51
 
52
  # Hyperparameters
@@ -74,7 +74,7 @@ python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4
74
  'num_envs': 64,
75
  'num_minibatches': 4,
76
  'num_steps': 128,
77
- 'num_updates': 24,
78
  'params_queue_timeout': 0.02,
79
  'profile': False,
80
  'save_model': True,
@@ -82,7 +82,7 @@ python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4
82
  'target_kl': None,
83
  'test_actor_learner_throughput': False,
84
  'torch_deterministic': True,
85
- 'total_timesteps': 200000,
86
  'track': True,
87
  'update_epochs': 4,
88
  'upload_model': True,
 
16
  type: Pong-v5
17
  metrics:
18
  - type: mean_reward
19
+ value: 17.90 +/- 1.97
20
  name: mean_reward
21
  verified: false
22
  ---
 
46
  curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/pyproject.toml
47
  curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/poetry.lock
48
  poetry install --all-extras
49
+ python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4 --params-queue-timeout 0.02 --track --save-model --upload-model --hf-entity cleanrl --env-id Pong-v5 --seed 1
50
  ```
51
 
52
  # Hyperparameters
 
74
  'num_envs': 64,
75
  'num_minibatches': 4,
76
  'num_steps': 128,
77
+ 'num_updates': 6103,
78
  'params_queue_timeout': 0.02,
79
  'profile': False,
80
  'save_model': True,
 
82
  'target_kl': None,
83
  'test_actor_learner_throughput': False,
84
  'torch_deterministic': True,
85
+ 'total_timesteps': 50000000,
86
  'track': True,
87
  'update_epochs': 4,
88
  'upload_model': True,
events.out.tfevents.1675614244.ip-26-0-128-121.2245265.0 → events.out.tfevents.1675616236.ip-26-0-142-109.1351408.0 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4b502eb043bb4939c51d419cc3560d0e5c794acf9fb5f5ffc8361cfc73eac83
3
- size 39173
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ccfb749271751f36fb56879919b9ff0978aab3d403b28b67b8c575302c29f4a
3
+ size 9358611
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
sebulba_ppo_envpool.cleanrl_model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a38ade401b6608fba3b56897a8ba8ef47abe60c033f22c57ac5593eed995d76
3
- size 4378336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce9a36ebe3d00a58f91122dc6e4cafc62de05a884ad7a03513d93cb9b6c9846b
3
+ size 4378338
sebulba_ppo_envpool.py CHANGED
@@ -749,7 +749,6 @@ if __name__ == "__main__":
749
  ),
750
  ),
751
  )
752
- print(devices)
753
  learner_devices = [devices[d_id] for d_id in args.learner_device_ids]
754
  actor_devices = [devices[d_id] for d_id in args.actor_device_ids]
755
  agent_state = flax.jax_utils.replicate(agent_state, devices=learner_devices)
@@ -876,7 +875,15 @@ if __name__ == "__main__":
876
 
877
  repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
878
  repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
879
- push_to_hub(args, episodic_returns, repo_id, "PPO", f"runs/{run_name}", f"videos/{run_name}-eval", extra_dependencies=["jax", "envpool", "atari"])
 
 
 
 
 
 
 
 
880
 
881
  envs.close()
882
  writer.close()
 
749
  ),
750
  ),
751
  )
 
752
  learner_devices = [devices[d_id] for d_id in args.learner_device_ids]
753
  actor_devices = [devices[d_id] for d_id in args.actor_device_ids]
754
  agent_state = flax.jax_utils.replicate(agent_state, devices=learner_devices)
 
875
 
876
  repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
877
  repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
878
+ push_to_hub(
879
+ args,
880
+ episodic_returns,
881
+ repo_id,
882
+ "PPO",
883
+ f"runs/{run_name}",
884
+ f"videos/{run_name}-eval",
885
+ extra_dependencies=["jax", "envpool", "atari"],
886
+ )
887
 
888
  envs.close()
889
  writer.close()
videos/Pong-v5__sebulba_ppo_envpool__1__074d5429-3b5b-459e-9b88-8db711fce3f2-eval/0.mp4 ADDED
Binary file (161 kB). View file
 
videos/Pong-v5__sebulba_ppo_envpool__1__859fb3f0-0595-46e5-afd6-dda7445846f6-eval/0.mp4 DELETED
Binary file (42.2 kB)