Stan  1.0
probability, sampling & optimization
 All Classes Namespaces Files Functions Variables Typedefs Enumerator Friends Macros Pages
adaptive_cdhmc.hpp
Go to the documentation of this file.
1 #ifndef __STAN__MCMC__ADAPTIVE_CDHMC_H__
2 #define __STAN__MCMC__ADAPTIVE_CDHMC_H__
3 
4 #include <ctime>
5 #include <cstddef>
6 #include <vector>
7 #include <boost/random/normal_distribution.hpp>
8 #include <boost/random/mersenne_twister.hpp>
9 #include <boost/random/variate_generator.hpp>
10 #include <boost/random/uniform_01.hpp>
11 
12 #include <stan/math/util.hpp>
15 #include <stan/model/prob_grad.hpp>
16 #include <stan/mcmc/util.hpp>
17 
18 namespace stan {
19 
20  namespace mcmc {
21 
40  private:
41  // Provides the target distribution we're trying to sample from
42  stan::model::prob_grad& _model;
43 
44  // The most recent setting of the real-valued parameters
45  std::vector<double> _x;
46  // The most recent setting of the discrete parameters
47  std::vector<int> _z;
48  // The most recent gradient with respect to the real parameters
49  std::vector<double> _g;
50  // The most recent log-likelihood
51  double _logp;
52 
53  // The step size used in the Hamiltonian simulation
54  double _epsilon;
55  // The number of steps used in the Hamiltonian simulation
56  unsigned int _L;
57  // The desired value of epsilon*L
58  double _epsilonL;
59  // The desired value of E[acceptance probability]
60  double _delta;
61 
62  boost::mt19937 _rand_int;
63  boost::variate_generator<boost::mt19937&, boost::normal_distribution<> > _rand_unit_norm;
64  boost::uniform_01<boost::mt19937&> _rand_uniform_01;
65 
66  // Class implementing Nesterov's primal-dual averaging
67  DualAverage _da;
68  // Gamma parameter for dual averaging.
69  inline static double da_gamma() { return 0.05; }
70 
71  public:
72 
80  void set_epsilon(double epsilon) {
81  _epsilon = epsilon;
82  _L = lround(_epsilonL / epsilon);
83  }
84 
108  double epsilonL,
109  double delta = 0.651,
110  double epsilon = -1,
111  unsigned int random_seed = static_cast<unsigned int>(std::time(0)))
112  : adaptive_sampler(epsilon < 0.0),
113  _model(model),
114  _x(model.num_params_r()),
115  _z(model.num_params_i()),
116  _g(model.num_params_r()),
117 
118  _epsilonL(epsilonL),
119  _delta(delta),
120 
121  _rand_int(random_seed),
122  _rand_unit_norm(_rand_int,
123  boost::normal_distribution<>()),
124  _rand_uniform_01(_rand_int),
125 
126  _da(da_gamma(), std::vector<double>(1, 0)) {
127  set_epsilon(_epsilon);
128  model.init(_x,_z);
129  _logp = model.grad_log_prob(_x,_z,_g);
130  if (_epsilon <= 0)
132  _da.setx0(std::vector<double>(1, log(_epsilon)));
133  }
134 
138  virtual ~adaptive_cdhmc() {
139  }
140 
151  virtual void set_params(const std::vector<double>& x,
152  const std::vector<int>& z) {
153  if (x.size() != _x.size())
154  throw std::invalid_argument("adaptive_cdhmc::set_params: double params must match in size");
155  if (z.size() != _z.size())
156  throw std::invalid_argument("adaptive_cdhmc::set_params: int params must match in size");
157  _x = x;
158  _z = z;
159  _logp = _model.grad_log_prob(_x,_z,_g);
160  }
161 
173  void set_params_r(const std::vector<double>& x) {
174  if (x.size() != _model.num_params_r())
175  throw std::invalid_argument("x.size() must match the number of parameters of the model.");
176  _x = x;
177  _logp = _model.grad_log_prob(_x,_z,_g);
178  }
179 
191  void set_params_i(const std::vector<int>& z) {
192  if (z.size() != _model.num_params_i())
193  throw std::invalid_argument("z.size() must match the number of parameters of the model.");
194  _z = z;
195  _logp = _model.grad_log_prob(_x,_z,_g);
196  }
197 
202  virtual void find_reasonable_parameters() {
203  _epsilon = 1;
204  std::vector<double> x = _x;
205  std::vector<double> m(_model.num_params_r());
206  for (size_t i = 0; i < m.size(); ++i)
207  m[i] = _rand_unit_norm();
208  std::vector<double> g = _g;
209  double lastlogp = _logp;
210  double logp = leapfrog(_model, _z, x, m, g, _epsilon);
211  double H = logp - lastlogp;
212  int direction = H > log(0.5) ? 1 : -1;
213  // fprintf(stderr, "epsilon = %f. initial logp = %f, lf logp = %f\n",
214  // _epsilon, lastlogp, logp);
215  while (1) {
216  x = _x;
217  g = _g;
218  for (size_t i = 0; i < m.size(); ++i)
219  m[i] = _rand_unit_norm();
220  logp = leapfrog(_model, _z, x, m, g, _epsilon);
221  H = logp - lastlogp;
222 // fprintf(stderr, "epsilon = %f. initial logp = %f, lf logp = %f\n",
223 // _epsilon, lastlogp, logp);
224  if ((direction == 1) && (H < log(0.5)))
225  break;
226  else if ((direction == -1) && (H > log(0.5)))
227  break;
228  else
229  _epsilon = direction == 1 ? 2 * _epsilon : 0.5 * _epsilon;
230  }
231  set_epsilon(_epsilon);
232  }
233 
239  virtual sample next_impl() {
240  // Gibbs for discrete
241  std::vector<double> probs;
242  for (size_t m = 0; m < _model.num_params_i(); ++m) {
243  probs.resize(0);
244  for (int k = _model.param_range_i_lower(m);
245  k < _model.param_range_i_upper(m);
246  ++k)
247  probs.push_back(_model.log_prob_star(m,k,_x,_z));
248  _z[m] = sample_unnorm_log(probs,_rand_uniform_01);
249  }
250 
251  // HMC for continuous
252  std::vector<double> m(_model.num_params_r());
253  for (size_t i = 0; i < m.size(); ++i)
254  m[i] = _rand_unit_norm();
255  double H = -(stan::math::dot_self(m) / 2.0) + _logp;
256 
257  std::vector<double> g_new(_g);
258  std::vector<double> x_new(_x);
259  double logp_new = -1e100;
260  for (unsigned int l = 0; l < _L; ++l)
261  logp_new = leapfrog(_model, _z, x_new, m, g_new, _epsilon);
262  nfevals_plus_eq(_L);
263 
264  double H_new = -(stan::math::dot_self(m) / 2.0) + logp_new;
265  double dH = H_new - H;
266  if (_rand_uniform_01() < exp(dH)) {
267  _x = x_new;
268  _g = g_new;
269  _logp = logp_new;
270  }
271 
272  // Now we just have to update epsilon, if adaptation is on.
273  double adapt_stat = stan::math::min(1, exp(dH));
274  if (adapt_stat != adapt_stat)
275  adapt_stat = 0;
276  if (adapting()) {
277  double adapt_g = adapt_stat - _delta;
278  std::vector<double> gvec(1, -adapt_g);
279  std::vector<double> result;
280  _da.update(gvec, result);
281  set_epsilon(exp(result[0]));
282  }
283  std::vector<double> result;
284  _da.xbar(result);
285 // fprintf(stderr, "xbar = %f\n", exp(result[0]));
286  double avg_eta = 1.0 / n_steps();
287  update_mean_stat(avg_eta,adapt_stat);
288 
289  mcmc::sample s(_x, _z, _logp);
290  return s;
291  }
292 
300  virtual void adapt_off() {
302  std::vector<double> result;
303  _da.xbar(result);
304  set_epsilon(exp(result[0]));
305  }
306 
312  virtual void get_parameters(std::vector<double>& params) {
313  params.assign(1, _epsilon);
314  }
315 
316  };
317 
318  }
319 
320 }
321 
322 #endif

     [ Stan Home Page ] © 2011–2012, Stan Development Team.