igorcheb
/

REINFORCE-LunarLanderContinuous-v2

Reinforcement Learning

LunarLanderContinuous-v2

custom-implementation

Model card Files Files and versions Community

igorcheb commited on Jan 17, 2023

Commit

38a4bc3

•

1 Parent(s): 71e1160

Update Agent_class.py

Files changed (1) hide show

Agent_class.py +4 -4

Agent_class.py CHANGED Viewed

@@ -2,7 +2,7 @@ import torch
 class ParameterisedPolicy(torch.nn.Module):
     """
-    REINFORCE RL agent class. Returns action when the ParameterisedPolicy.act(observation) is used.
     observation is a gym state vector.
     obs_len - length of the state vector
     act_space_len - length of the action vector
@@ -10,8 +10,6 @@ class ParameterisedPolicy(torch.nn.Module):
     """
     def __init__(self, obs_len=8, act_space_len=2):
         super().__init__()
-        self.deterministic = False
-        self.continuous = True
         self.obs_len = obs_len
         self.act_space_len = act_space_len
         self.lin_1 = torch.nn.Linear(self.obs_len, 256)
@@ -41,7 +39,9 @@ class ParameterisedPolicy(torch.nn.Module):
         return mu, sigma
     def act(self, observation):
         (mus, sigmas) = self.forward(torch.tensor(observation, dtype=torch.float32))
         m = torch.distributions.normal.Normal(mus, sigmas)
         action = m.sample().detach().numpy()

 class ParameterisedPolicy(torch.nn.Module):
     """
+    REINFORCE RL agent class. Returns action when the ParameterisedPolicy.act(observation) method is used.
     observation is a gym state vector.
     obs_len - length of the state vector
     act_space_len - length of the action vector
     """
     def __init__(self, obs_len=8, act_space_len=2):
         super().__init__()
         self.obs_len = obs_len
         self.act_space_len = act_space_len
         self.lin_1 = torch.nn.Linear(self.obs_len, 256)
         return mu, sigma
     def act(self, observation):
+        """
+        Method returns action when gym state vector is passed.
+        """
         (mus, sigmas) = self.forward(torch.tensor(observation, dtype=torch.float32))
         m = torch.distributions.normal.Normal(mus, sigmas)
         action = m.sample().detach().numpy()