File size: 2,612 Bytes
666cfea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
CartPole-v1: &cartpole-defaults
  n_timesteps: !!float 5e5
  env_hyperparams:
    n_envs: 8

CartPole-v0:
  <<: *cartpole-defaults

MountainCar-v0:
  n_timesteps: !!float 1e6
  env_hyperparams:
    n_envs: 16
    normalize: true

MountainCarContinuous-v0:
  n_timesteps: !!float 1e5
  env_hyperparams:
    n_envs: 4
    normalize: true
  # policy_hyperparams:
  #   use_sde: true
  #   log_std_init: 0.0
  #   init_layers_orthogonal: false
  algo_hyperparams:
    n_steps: 100
    sde_sample_freq: 16

Acrobot-v1:
  n_timesteps: !!float 5e5
  env_hyperparams:
    normalize: true
    n_envs: 16

LunarLander-v2:
  n_timesteps: !!float 1e6
  env_hyperparams:
    n_envs: 8
    normalize: true
  algo_hyperparams:
    n_steps: 5
    gamma: 0.995
    learning_rate: !!float 8.3e-4
    learning_rate_decay: linear
    ent_coef: !!float 1e-5

BipedalWalker-v3:
  n_timesteps: !!float 5e6
  env_hyperparams:
    n_envs: 16
    normalize: true
  policy_hyperparams:
    use_sde: true
    log_std_init: -2
    init_layers_orthogonal: false
  algo_hyperparams:
    ent_coef: 0
    max_grad_norm: 0.5
    n_steps: 8
    gae_lambda: 0.9
    vf_coef: 0.4
    gamma: 0.99
    learning_rate: !!float 9.6e-4
    learning_rate_decay: linear

HalfCheetahBulletEnv-v0: &pybullet-defaults
  n_timesteps: !!float 2e6
  env_hyperparams:
    n_envs: 4
    normalize: true
  policy_hyperparams:
    use_sde: true
    log_std_init: -2
    init_layers_orthogonal: false
  algo_hyperaparms: &pybullet-algo-defaults
    n_steps: 8
    ent_coef: 0
    max_grad_norm: 0.5
    gae_lambda: 0.9
    gamma: 0.99
    vf_coef: 0.4
    learning_rate: !!float 9.6e-4
    learning_rate_decay: linear

AntBulletEnv-v0:
  <<: *pybullet-defaults

Walker2DBulletEnv-v0:
  <<: *pybullet-defaults

HopperBulletEnv-v0:
  <<: *pybullet-defaults

CarRacing-v0:
  n_timesteps: !!float 4e6
  env_hyperparams:
    n_envs: 8
    frame_stack: 4
  policy_hyperparams:
    use_sde: true
    log_std_init: -2
    init_layers_orthogonal: false
    activation_fn: relu
    share_features_extractor: false
    cnn_feature_dim: 256
    hidden_sizes: [256]
  algo_hyperparams:
    n_steps: 8
    learning_rate: !!float 5e-5
    learning_rate_decay: linear
    gamma: 0.99
    gae_lambda: 0.95
    ent_coef: 0
    sde_sample_freq: 4

_atari: &atari-defaults
  n_timesteps: !!float 1e7
  env_hyperparams: &atari-env-defaults
    n_envs: 16
    frame_stack: 4
    no_reward_timeout_steps: 1000
    no_reward_fire_steps: 500
    vec_env_class: async
  policy_hyperparams: &atari-policy-defaults
    activation_fn: relu
  algo_hyperparams:
    ent_coef: 0.01
    vf_coef: 0.25