Stan  1.0
probability, sampling & optimization
adaptive_cdhmc.hpp
Go to the documentation of this file.
1 #ifndef __STAN__MCMC__ADAPTIVE_CDHMC_H__
2 #define __STAN__MCMC__ADAPTIVE_CDHMC_H__
3 
4 #include <ctime>
5 #include <cstddef>
6 #include <vector>
7 #include <boost/random/normal_distribution.hpp>
8 #include <boost/random/mersenne_twister.hpp>
9 #include <boost/random/variate_generator.hpp>
10 #include <boost/random/uniform_01.hpp>
11 
12 #include <stan/math/util.hpp>
15 #include <stan/model/prob_grad.hpp>
16 #include <stan/mcmc/util.hpp>
17 
18 namespace stan {
19 
20  namespace mcmc {
21 
40  private:
41  // Provides the target distribution we're trying to sample from
42  stan::model::prob_grad& _model;
43 
44  // The most recent setting of the real-valued parameters
45  std::vector<double> _x;
46  // The most recent setting of the discrete parameters
47  std::vector<int> _z;
48  // The most recent gradient with respect to the real parameters
49  std::vector<double> _g;
50  // The most recent log-likelihood
51  double _logp;
52 
53  // The step size used in the Hamiltonian simulation
54  double _epsilon;
55  // The number of steps used in the Hamiltonian simulation
56  unsigned int _L;
57  // The desired value of epsilon*L
58  double _epsilonL;
59  // The desired value of E[acceptance probability]
60  double _delta;
61 
62  boost::mt19937 _rand_int;
63  boost::variate_generator<boost::mt19937&, boost::normal_distribution<> > _rand_unit_norm;
64  boost::uniform_01<boost::mt19937&> _rand_uniform_01;
65 
66  // Class implementing Nesterov's primal-dual averaging
67  DualAverage _da;
68  // Gamma parameter for dual averaging.
69  inline static double da_gamma() { return 0.05; }
70 
71  public:
72 
80  void set_epsilon(double epsilon) {
81  _epsilon = epsilon;
82  _L = lround(_epsilonL / epsilon);
83  }
84 
108  double epsilonL,
109  double delta = 0.651,
110  double epsilon = -1,
111  unsigned int random_seed = static_cast<unsigned int>(std::time(0)))
112  : adaptive_sampler(epsilon < 0.0),
113  _model(model),
114  _x(model.num_params_r()),
115  _z(model.num_params_i()),
116  _g(model.num_params_r()),
117 
118  _epsilonL(epsilonL),
119  _delta(delta),
120 
121  _rand_int(random_seed),
122  _rand_unit_norm(_rand_int,
123  boost::normal_distribution<>()),
124  _rand_uniform_01(_rand_int),
125 
126  _da(da_gamma(), std::vector<double>(1, 0)) {
127  set_epsilon(_epsilon);
128  model.init(_x,_z);
129  _logp = model.grad_log_prob(_x,_z,_g);
130  if (_epsilon <= 0)
132  _da.setx0(std::vector<double>(1, log(_epsilon)));
133  }
134 
138  virtual ~adaptive_cdhmc() {
139  }
140 
151  virtual void set_params(const std::vector<double>& x,
152  const std::vector<int>& z) {
153  if (x.size() != _x.size())
154  throw std::invalid_argument("adaptive_cdhmc::set_params: double params must match in size");
155  if (z.size() != _z.size())
156  throw std::invalid_argument("adaptive_cdhmc::set_params: int params must match in size");
157  _x = x;
158  _z = z;
159  _logp = _model.grad_log_prob(_x,_z,_g);
160  }
161 
173  void set_params_r(const std::vector<double>& x) {
174  if (x.size() != _model.num_params_r())
175  throw std::invalid_argument("x.size() must match the number of parameters of the model.");
176  _x = x;
177  _logp = _model.grad_log_prob(_x,_z,_g);
178  }
179 
191  void set_params_i(const std::vector<int>& z) {
192  if (z.size() != _model.num_params_i())
193  throw std::invalid_argument("z.size() must match the number of parameters of the model.");
194  _z = z;
195  _logp = _model.grad_log_prob(_x,_z,_g);
196  }
197 
202  virtual void find_reasonable_parameters() {
203  _epsilon = 1;
204  std::vector<double> x = _x;
205  std::vector<double> m(_model.num_params_r());
206  for (size_t i = 0; i < m.size(); ++i)
207  m[i] = _rand_unit_norm();
208  std::vector<double> g = _g;
209  double lastlogp = _logp;
210  double logp = leapfrog(_model, _z, x, m, g, _epsilon);
211  double H = logp - lastlogp;
212  int direction = H > log(0.5) ? 1 : -1;
213  // fprintf(stderr, "epsilon = %f. initial logp = %f, lf logp = %f\n",
214  // _epsilon, lastlogp, logp);
215  while (1) {
216  x = _x;
217  g = _g;
218  for (size_t i = 0; i < m.size(); ++i)
219  m[i] = _rand_unit_norm();
220  logp = leapfrog(_model, _z, x, m, g, _epsilon);
221  H = logp - lastlogp;
222 // fprintf(stderr, "epsilon = %f. initial logp = %f, lf logp = %f\n",
223 // _epsilon, lastlogp, logp);
224  if ((direction == 1) && (H < log(0.5)))
225  break;
226  else if ((direction == -1) && (H > log(0.5)))
227  break;
228  else
229  _epsilon = direction == 1 ? 2 * _epsilon : 0.5 * _epsilon;
230  }
231  set_epsilon(_epsilon);
232  }
233 
239  virtual sample next_impl() {
240  // Gibbs for discrete
241  std::vector<double> probs;
242  for (size_t m = 0; m < _model.num_params_i(); ++m) {
243  probs.resize(0);
244  for (int k = _model.param_range_i_lower(m);
245  k < _model.param_range_i_upper(m);
246  ++k)
247  probs.push_back(_model.log_prob_star(m,k,_x,_z));
248  _z[m] = sample_unnorm_log(probs,_rand_uniform_01);
249  }
250 
251  // HMC for continuous
252  std::vector<double> m(_model.num_params_r());
253  for (size_t i = 0; i < m.size(); ++i)
254  m[i] = _rand_unit_norm();
255  double H = -(stan::math::dot_self(m) / 2.0) + _logp;
256 
257  std::vector<double> g_new(_g);
258  std::vector<double> x_new(_x);
259  double logp_new = -1e100;
260  for (unsigned int l = 0; l < _L; ++l)
261  logp_new = leapfrog(_model, _z, x_new, m, g_new, _epsilon);
262  nfevals_plus_eq(_L);
263 
264  double H_new = -(stan::math::dot_self(m) / 2.0) + logp_new;
265  double dH = H_new - H;
266  if (_rand_uniform_01() < exp(dH)) {
267  _x = x_new;
268  _g = g_new;
269  _logp = logp_new;
270  }
271 
272  // Now we just have to update epsilon, if adaptation is on.
273  double adapt_stat = stan::math::min(1, exp(dH));
274  if (adapt_stat != adapt_stat)
275  adapt_stat = 0;
276  if (adapting()) {
277  double adapt_g = adapt_stat - _delta;
278  std::vector<double> gvec(1, -adapt_g);
279  std::vector<double> result;
280  _da.update(gvec, result);
281  set_epsilon(exp(result[0]));
282  }
283  std::vector<double> result;
284  _da.xbar(result);
285 // fprintf(stderr, "xbar = %f\n", exp(result[0]));
286  double avg_eta = 1.0 / n_steps();
287  update_mean_stat(avg_eta,adapt_stat);
288 
289  mcmc::sample s(_x, _z, _logp);
290  return s;
291  }
292 
300  virtual void adapt_off() {
302  std::vector<double> result;
303  _da.xbar(result);
304  set_epsilon(exp(result[0]));
305  }
306 
312  virtual void get_parameters(std::vector<double>& params) {
313  params.assign(1, _epsilon);
314  }
315 
316  };
317 
318  }
319 
320 }
321 
322 #endif
Implements Nesterov's dual average algorithm.
Definition: dualaverage.hpp:28
void setx0(const std::vector< double > &x0)
Set the point towards which each iterate is shrunk.
Definition: dualaverage.hpp:75
void update(const std::vector< double > &g, std::vector< double > &xk)
Produces the next iterate xk given the current gradient g.
Definition: dualaverage.hpp:53
void xbar(std::vector< double > &xbar)
Get the exponentially weighted moving average of all previous iterates.
Definition: dualaverage.hpp:86
Adaptive "constant distance" Hamiltonian Monte Carlo (CDHMC) sampler.
adaptive_cdhmc(stan::model::prob_grad &model, double epsilonL, double delta=0.651, double epsilon=-1, unsigned int random_seed=static_cast< unsigned int >(std::time(0)))
Constructs an adaptive "constant distance" Hamiltonian Monte Carlo (CDHMC) sampler for the specified ...
virtual sample next_impl()
Returns the next sample.
void set_epsilon(double epsilon)
Set the step size epsilon.
virtual ~adaptive_cdhmc()
Destructor.
virtual void find_reasonable_parameters()
Searches for a roughly reasonable (within a factor of 2) setting of the step size epsilon.
void set_params_r(const std::vector< double > &x)
Sets the model's real parameters to the specified values and update gradients and log probability to ...
virtual void get_parameters(std::vector< double > &params)
Returns the value of epsilon.
virtual void adapt_off()
Turn off parameter adaptation.
virtual void set_params(const std::vector< double > &x, const std::vector< int > &z)
Sets the model's real and integer parameters to the specified values.
void set_params_i(const std::vector< int > &z)
Sets the model integer parameters to the specified values and update gradients and log probability to...
An abstract base class for adaptive samplers.
bool adapting()
Return whether or not parameter adaptation is on.
int n_steps()
Return the number of iterations for this sampler.
void update_mean_stat(double avg_eta, double adapt_stat)
Updates the mean statistic given the specified adaptation statistic and weighting.
void nfevals_plus_eq(int n)
Add the specified number of evaluations to the number of function evaluations.
virtual void adapt_off()
Turn off parameter adaption.
Representation of a MCMC sample.
Definition: sampler.hpp:16
The prob_grad class represents densities with fixed numbers of discrete and scalar parameters and the...
Definition: prob_grad.hpp:24
virtual void init(std::vector< double > &params_r, std::vector< int > &params_i)
Definition: prob_grad.hpp:80
virtual size_t num_params_r()
Definition: prob_grad.hpp:52
virtual size_t num_params_i()
Definition: prob_grad.hpp:56
virtual double log_prob_star(size_t idx, int val, std::vector< double > &params_r, std::vector< int > &params_i, std::ostream *output_stream=0)
Definition: prob_grad.hpp:144
virtual double grad_log_prob(std::vector< double > &params_r, std::vector< int > &params_i, std::vector< double > &gradient, std::ostream *output_stream=0)=0
int param_range_i_lower(size_t idx)
Definition: prob_grad.hpp:72
int param_range_i_upper(size_t idx)
Definition: prob_grad.hpp:76
Reimplementing boost functionality.
var log(const var &a)
Return the natural log of the specified variable (cmath).
Definition: agrad.hpp:1730
var exp(const var &a)
Return the exponentiation of the specified variable (cmath).
Definition: agrad.hpp:1716
double epsilon()
Return minimum positive number representable.
int min(const std::vector< int > &x)
Returns the minimum coefficient in the specified column vector.
Definition: matrix.hpp:917
double dot_self(const Eigen::Matrix< double, R, C > &v)
Returns the dot product of the specified vector with itself.
Definition: matrix.hpp:846
double leapfrog(stan::model::prob_grad &model, std::vector< int > z, std::vector< double > &x, std::vector< double > &m, std::vector< double > &g, double epsilon, std::ostream *error_msgs=0, std::ostream *output_msgs=0)
Computes the log probability for a single leapfrog step in Hamiltonian Monte Carlo.
Definition: util.hpp:57
int sample_unnorm_log(std::vector< double > probs, boost::uniform_01< boost::mt19937 & > &rand_uniform_01)
Definition: util.hpp:103
Probability, optimization and sampling library.
Definition: agrad.cpp:6
Template specification of functions in std for Stan.
Definition: agrad.hpp:2306

     [ Stan Home Page ] © 2011–2012, Stan Development Team.