nithiroj commited on
Commit
fb38bdf
1 Parent(s): adacbc6

Push Reinforce agent to the Hub

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  tags:
3
- - LunarLander-v2
4
  - ppo
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
@@ -11,19 +11,19 @@ model-index:
11
  results:
12
  - metrics:
13
  - type: mean_reward
14
- value: -112.60 +/- 107.42
15
  name: mean_reward
16
  task:
17
  type: reinforcement-learning
18
  name: reinforcement-learning
19
  dataset:
20
- name: LunarLander-v2
21
- type: LunarLander-v2
22
  ---
23
 
24
- # PPO Agent Playing LunarLander-v2
25
 
26
- This is a trained model of a PPO agent playing LunarLander-v2.
27
  To learn to code your own PPO agent and train it Unit 8 of the Deep Reinforcement Learning Class: https://github.com/huggingface/deep-rl-class/tree/main/unit8
28
 
29
  # Hyperparameters
@@ -36,7 +36,7 @@ model-index:
36
  'wandb_project_name': 'cleanRL'
37
  'wandb_entity': None
38
  'capture_video': False
39
- 'env_id': 'LunarLander-v2'
40
  'total_timesteps': 50000
41
  'learning_rate': 0.00025
42
  'num_envs': 4
 
1
  ---
2
  tags:
3
+ - CartPole-v1
4
  - ppo
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
 
11
  results:
12
  - metrics:
13
  - type: mean_reward
14
+ value: 166.00 +/- 55.22
15
  name: mean_reward
16
  task:
17
  type: reinforcement-learning
18
  name: reinforcement-learning
19
  dataset:
20
+ name: CartPole-v1
21
+ type: CartPole-v1
22
  ---
23
 
24
+ # PPO Agent Playing CartPole-v1
25
 
26
+ This is a trained model of a PPO agent playing CartPole-v1.
27
  To learn to code your own PPO agent and train it Unit 8 of the Deep Reinforcement Learning Class: https://github.com/huggingface/deep-rl-class/tree/main/unit8
28
 
29
  # Hyperparameters
 
36
  'wandb_project_name': 'cleanRL'
37
  'wandb_entity': None
38
  'capture_video': False
39
+ 'env_id': 'CartPole-v1'
40
  'total_timesteps': 50000
41
  'learning_rate': 0.00025
42
  'num_envs': 4
logs/events.out.tfevents.1662590309.nt-pc.11485.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:327003697481a51f2d0682c8b8d4ceebac7f2a6551b2c4e8cf737f81255011a9
3
+ size 116432
model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cda0e6fa965668f39fdf95e4f2deb1cafd6cf6c90bf65a20693583a68d3d98e7
3
- size 42689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:615063e1c901e83f6483e95e25739786a9c37a58a7e03e55a1c56f95f7e1493a
3
+ size 40129
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"env_id": "LunarLander-v2", "mean_reward": -112.6023172789165, "std_reward": 107.4201506784575, "n_evaluation_episodes": 10, "eval_datetime": "2022-09-08T05:25:35.416844"}
 
1
+ {"env_id": "CartPole-v1", "mean_reward": 166.0, "std_reward": 55.22499434133063, "n_evaluation_episodes": 10, "eval_datetime": "2022-09-08T05:38:57.571085"}