mlpack  3.4.2
categorical_dqn.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_RL_CATEGORICAL_DQN_HPP
13 #define MLPACK_METHODS_RL_CATEGORICAL_DQN_HPP
14 
15 #include <mlpack/prereqs.hpp>
21 #include "../training_config.hpp"
22 
23 namespace mlpack {
24 namespace rl {
25 
26 using namespace mlpack::ann;
27 
45 template<
46  typename OutputLayerType = EmptyLoss<>,
47  typename InitType = GaussianInitialization,
48  typename NetworkType = FFN<OutputLayerType, InitType>
49 >
51 {
52  public:
56  CategoricalDQN() : network(), isNoisy(false)
57  { /* Nothing to do here. */ }
58 
71  CategoricalDQN(const int inputDim,
72  const int h1,
73  const int h2,
74  const int outputDim,
75  TrainingConfig config,
76  const bool isNoisy = false,
77  InitType init = InitType(),
78  OutputLayerType outputLayer = OutputLayerType()):
79  network(outputLayer, init),
80  atomSize(config.AtomSize()),
81  vMin(config.VMin()),
82  vMax(config.VMax()),
83  isNoisy(isNoisy)
84  {
85  network.Add(new Linear<>(inputDim, h1));
86  network.Add(new ReLULayer<>());
87  if (isNoisy)
88  {
89  noisyLayerIndex.push_back(network.Model().size());
90  network.Add(new NoisyLinear<>(h1, h2));
91  network.Add(new ReLULayer<>());
92  noisyLayerIndex.push_back(network.Model().size());
93  network.Add(new NoisyLinear<>(h2, outputDim * atomSize));
94  }
95  else
96  {
97  network.Add(new Linear<>(h1, h2));
98  network.Add(new ReLULayer<>());
99  network.Add(new Linear<>(h2, outputDim * atomSize));
100  }
101  }
102 
110  CategoricalDQN(NetworkType& network,
111  TrainingConfig config,
112  const bool isNoisy = false):
113  network(std::move(network)),
114  atomSize(config.AtomSize()),
115  vMin(config.VMin()),
116  vMax(config.VMax()),
117  isNoisy(isNoisy)
118  { /* Nothing to do here. */ }
119 
131  void Predict(const arma::mat state, arma::mat& actionValue)
132  {
133  arma::mat q_atoms;
134  network.Predict(state, q_atoms);
135  activations.copy_size(q_atoms);
136  actionValue.set_size(q_atoms.n_rows / atomSize, q_atoms.n_cols);
137  arma::rowvec support = arma::linspace<arma::rowvec>(vMin, vMax, atomSize);
138  for (size_t i = 0; i < q_atoms.n_rows; i += atomSize)
139  {
140  arma::mat activation = activations.rows(i, i + atomSize - 1);
141  arma::mat input = q_atoms.rows(i, i + atomSize - 1);
142  softMax.Forward(input, activation);
143  activations.rows(i, i + atomSize - 1) = activation;
144  actionValue.row(i/atomSize) = support * activation;
145  }
146  }
147 
154  void Forward(const arma::mat state, arma::mat& dist)
155  {
156  arma::mat q_atoms;
157  network.Forward(state, q_atoms);
158  activations.copy_size(q_atoms);
159  for (size_t i = 0; i < q_atoms.n_rows; i += atomSize)
160  {
161  arma::mat activation = activations.rows(i, i + atomSize - 1);
162  arma::mat input = q_atoms.rows(i, i + atomSize - 1);
163  softMax.Forward(input, activation);
164  activations.rows(i, i + atomSize - 1) = activation;
165  }
166  dist = activations;
167  }
168 
173  {
174  network.ResetParameters();
175  }
176 
180  void ResetNoise()
181  {
182  for (size_t i = 0; i < noisyLayerIndex.size(); i++)
183  {
184  boost::get<NoisyLinear<>*>
185  (network.Model()[noisyLayerIndex[i]])->ResetNoise();
186  }
187  }
188 
190  const arma::mat& Parameters() const { return network.Parameters(); }
192  arma::mat& Parameters() { return network.Parameters(); }
193 
201  void Backward(const arma::mat state,
202  arma::mat& lossGradients,
203  arma::mat& gradient)
204  {
205  arma::mat activationGradients(arma::size(activations));
206  for (size_t i = 0; i < activations.n_rows; i += atomSize)
207  {
208  arma::mat activationGrad;
209  arma::mat lossGrad = lossGradients.rows(i, i + atomSize - 1);
210  arma::mat activation = activations.rows(i, i + atomSize - 1);
211  softMax.Backward(activation, lossGrad, activationGrad);
212  activationGradients.rows(i, i + atomSize - 1) = activationGrad;
213  }
214  network.Backward(state, activationGradients, gradient);
215  }
216 
217  private:
219  NetworkType network;
220 
222  size_t atomSize;
223 
225  double vMin;
226 
228  double vMax;
229 
231  bool isNoisy;
232 
234  std::vector<size_t> noisyLayerIndex;
235 
237  Softmax<> softMax;
238 
240  arma::mat activations;
241 };
242 
243 } // namespace rl
244 } // namespace mlpack
245 
246 #endif
ffn.hpp
prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.
mlpack::rl::CategoricalDQN::CategoricalDQN
CategoricalDQN()
Default constructor.
Definition: categorical_dqn.hpp:56
mlpack::ann::BaseLayer
Implementation of the base layer.
Definition: base_layer.hpp:66
empty_loss.hpp
mlpack::ann
Artificial Neural Network.
Definition: elish_function.hpp:32
mlpack::rl::CategoricalDQN::Parameters
arma::mat & Parameters()
Modify the Parameters.
Definition: categorical_dqn.hpp:192
mlpack::rl::CategoricalDQN::ResetNoise
void ResetNoise()
Resets noise of the network, if the network is of type noisy.
Definition: categorical_dqn.hpp:180
layer.hpp
mean_squared_error.hpp
mlpack::ann::Softmax
Implementation of the Softmax layer.
Definition: softmax.hpp:39
mlpack::rl::CategoricalDQN::Parameters
const arma::mat & Parameters() const
Return the Parameters.
Definition: categorical_dqn.hpp:190
mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_cli11.hpp:21
mlpack::ann::NoisyLinear
Implementation of the NoisyLinear layer class.
Definition: noisylinear.hpp:34
mlpack::ann::EmptyLoss<>
mlpack::rl::CategoricalDQN::ResetParameters
void ResetParameters()
Resets the parameters of the network.
Definition: categorical_dqn.hpp:172
mlpack::rl::CategoricalDQN
Implementation of the Categorical Deep Q-Learning network.
Definition: categorical_dqn.hpp:51
mlpack::rl::CategoricalDQN::Backward
void Backward(const arma::mat state, arma::mat &lossGradients, arma::mat &gradient)
Perform the backward pass of the state in real batch mode.
Definition: categorical_dqn.hpp:201
mlpack::rl::CategoricalDQN::Predict
void Predict(const arma::mat state, arma::mat &actionValue)
Predict the responses to a given set of predictors.
Definition: categorical_dqn.hpp:131
mlpack::rl::TrainingConfig
Definition: training_config.hpp:20
mlpack::rl::CategoricalDQN::CategoricalDQN
CategoricalDQN(NetworkType &network, TrainingConfig config, const bool isNoisy=false)
Construct an instance of CategoricalDQN class from a pre-constructed network.
Definition: categorical_dqn.hpp:110
std
Definition: prereqs.hpp:67
mlpack::rl::CategoricalDQN::Forward
void Forward(const arma::mat state, arma::mat &dist)
Perform the forward pass of the states in real batch mode.
Definition: categorical_dqn.hpp:154
gaussian_init.hpp
mlpack::ann::GaussianInitialization
This class is used to initialize weigth matrix with a gaussian.
Definition: gaussian_init.hpp:29
mlpack::ann::FFN
Implementation of a standard feed forward network.
Definition: ffn.hpp:53
mlpack::rl::CategoricalDQN::CategoricalDQN
CategoricalDQN(const int inputDim, const int h1, const int h2, const int outputDim, TrainingConfig config, const bool isNoisy=false, InitType init=InitType(), OutputLayerType outputLayer=OutputLayerType())
Construct an instance of CategoricalDQN class.
Definition: categorical_dqn.hpp:71
mlpack::ann::Linear
Implementation of the Linear layer class.
Definition: linear.hpp:39