pushing model
Browse files- README.md +4 -4
- events.out.tfevents.1675614244.ip-26-0-128-121.2245265.0 → events.out.tfevents.1675616236.ip-26-0-142-109.1351408.0 +2 -2
- replay.mp4 +0 -0
- sebulba_ppo_envpool.cleanrl_model +2 -2
- sebulba_ppo_envpool.py +9 -2
- videos/Pong-v5__sebulba_ppo_envpool__1__074d5429-3b5b-459e-9b88-8db711fce3f2-eval/0.mp4 +0 -0
- videos/Pong-v5__sebulba_ppo_envpool__1__859fb3f0-0595-46e5-afd6-dda7445846f6-eval/0.mp4 +0 -0
README.md
CHANGED
@@ -16,7 +16,7 @@ model-index:
|
|
16 |
type: Pong-v5
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
-
value:
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
@@ -46,7 +46,7 @@ curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/ma
|
|
46 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/pyproject.toml
|
47 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/poetry.lock
|
48 |
poetry install --all-extras
|
49 |
-
python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4 --params-queue-timeout 0.02 --track --save-model --upload-model --hf-entity cleanrl --
|
50 |
```
|
51 |
|
52 |
# Hyperparameters
|
@@ -74,7 +74,7 @@ python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4
|
|
74 |
'num_envs': 64,
|
75 |
'num_minibatches': 4,
|
76 |
'num_steps': 128,
|
77 |
-
'num_updates':
|
78 |
'params_queue_timeout': 0.02,
|
79 |
'profile': False,
|
80 |
'save_model': True,
|
@@ -82,7 +82,7 @@ python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4
|
|
82 |
'target_kl': None,
|
83 |
'test_actor_learner_throughput': False,
|
84 |
'torch_deterministic': True,
|
85 |
-
'total_timesteps':
|
86 |
'track': True,
|
87 |
'update_epochs': 4,
|
88 |
'upload_model': True,
|
|
|
16 |
type: Pong-v5
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
+
value: 17.90 +/- 1.97
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
|
|
46 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/pyproject.toml
|
47 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/poetry.lock
|
48 |
poetry install --all-extras
|
49 |
+
python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4 --params-queue-timeout 0.02 --track --save-model --upload-model --hf-entity cleanrl --env-id Pong-v5 --seed 1
|
50 |
```
|
51 |
|
52 |
# Hyperparameters
|
|
|
74 |
'num_envs': 64,
|
75 |
'num_minibatches': 4,
|
76 |
'num_steps': 128,
|
77 |
+
'num_updates': 6103,
|
78 |
'params_queue_timeout': 0.02,
|
79 |
'profile': False,
|
80 |
'save_model': True,
|
|
|
82 |
'target_kl': None,
|
83 |
'test_actor_learner_throughput': False,
|
84 |
'torch_deterministic': True,
|
85 |
+
'total_timesteps': 50000000,
|
86 |
'track': True,
|
87 |
'update_epochs': 4,
|
88 |
'upload_model': True,
|
events.out.tfevents.1675614244.ip-26-0-128-121.2245265.0 → events.out.tfevents.1675616236.ip-26-0-142-109.1351408.0
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ccfb749271751f36fb56879919b9ff0978aab3d403b28b67b8c575302c29f4a
|
3 |
+
size 9358611
|
replay.mp4
CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
sebulba_ppo_envpool.cleanrl_model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce9a36ebe3d00a58f91122dc6e4cafc62de05a884ad7a03513d93cb9b6c9846b
|
3 |
+
size 4378338
|
sebulba_ppo_envpool.py
CHANGED
@@ -749,7 +749,6 @@ if __name__ == "__main__":
|
|
749 |
),
|
750 |
),
|
751 |
)
|
752 |
-
print(devices)
|
753 |
learner_devices = [devices[d_id] for d_id in args.learner_device_ids]
|
754 |
actor_devices = [devices[d_id] for d_id in args.actor_device_ids]
|
755 |
agent_state = flax.jax_utils.replicate(agent_state, devices=learner_devices)
|
@@ -876,7 +875,15 @@ if __name__ == "__main__":
|
|
876 |
|
877 |
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
|
878 |
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
|
879 |
-
push_to_hub(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
880 |
|
881 |
envs.close()
|
882 |
writer.close()
|
|
|
749 |
),
|
750 |
),
|
751 |
)
|
|
|
752 |
learner_devices = [devices[d_id] for d_id in args.learner_device_ids]
|
753 |
actor_devices = [devices[d_id] for d_id in args.actor_device_ids]
|
754 |
agent_state = flax.jax_utils.replicate(agent_state, devices=learner_devices)
|
|
|
875 |
|
876 |
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
|
877 |
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
|
878 |
+
push_to_hub(
|
879 |
+
args,
|
880 |
+
episodic_returns,
|
881 |
+
repo_id,
|
882 |
+
"PPO",
|
883 |
+
f"runs/{run_name}",
|
884 |
+
f"videos/{run_name}-eval",
|
885 |
+
extra_dependencies=["jax", "envpool", "atari"],
|
886 |
+
)
|
887 |
|
888 |
envs.close()
|
889 |
writer.close()
|
videos/Pong-v5__sebulba_ppo_envpool__1__074d5429-3b5b-459e-9b88-8db711fce3f2-eval/0.mp4
ADDED
Binary file (161 kB). View file
|
|
videos/Pong-v5__sebulba_ppo_envpool__1__859fb3f0-0595-46e5-afd6-dda7445846f6-eval/0.mp4
DELETED
Binary file (42.2 kB)
|
|