Stan  1.0
probability, sampling & optimization
prob_grad.hpp
Go to the documentation of this file.
1 #ifndef __STAN__MODEL__PROB_GRAD_HPP__
2 #define __STAN__MODEL__PROB_GRAD_HPP__
3 
4 #include <iomanip>
5 #include <iostream>
6 #include <limits>
7 #include <stdexcept>
8 #include <stdio.h>
9 #include <utility>
10 #include <vector>
11 
12 #include <stan/io/csv_writer.hpp>
13 
14 namespace stan {
15 
16  namespace model {
17 
24  class prob_grad {
25  protected:
27  std::vector<std::pair<int,int> > param_ranges_i__;
28 
29  public:
30 
33  param_ranges_i__(std::vector<std::pair<int,int> >(0)) {
34  }
35 
37  std::vector<std::pair<int,int> >& param_ranges_i)
39  param_ranges_i__(param_ranges_i) {
40  }
41 
42  virtual ~prob_grad() { }
43 
46  }
47 
48  void setparam_ranges_i__(std::vector<std::pair<int,int> > param_ranges_i) {
49  param_ranges_i__ = param_ranges_i;
50  }
51 
52  virtual size_t num_params_r() {
53  return num_params_r__;
54  }
55 
56  virtual size_t num_params_i() {
57  return param_ranges_i__.size();
58  }
59 
60  inline std::pair<int,int> param_range_i(size_t idx) {
61  return param_ranges_i__[idx];
62  }
63 
64  inline void set_param_range_i_lower(size_t idx, int low) {
65  param_ranges_i__[idx].first = low;
66  }
67 
68  inline void set_param_range_i_upper(size_t idx, int up) {
69  param_ranges_i__[idx].second = up;
70  }
71 
72  inline int param_range_i_lower(size_t idx) {
73  return param_ranges_i__[idx].first;
74  }
75 
76  inline int param_range_i_upper(size_t idx) {
77  return param_ranges_i__[idx].second;
78  }
79 
80  virtual void init(std::vector<double>& params_r,
81  std::vector<int>& params_i) {
82  for (size_t i = 0; i < num_params_r(); ++i)
83  params_r[i] = 0.0;
84  for (size_t j = 0; j < num_params_i(); ++j)
85  params_i[j] = param_range_i_lower(j);
86  }
87 
88  virtual double grad_log_prob(std::vector<double>& params_r,
89  std::vector<int>& params_i,
90  std::vector<double>& gradient,
91  std::ostream* output_stream = 0) = 0;
92 
93  virtual double log_prob(std::vector<double>& params_r,
94  std::vector<int>& params_i,
95  std::ostream* output_stream = 0) = 0;
96 
112  virtual double grad_hess_log_prob(std::vector<double>& params_r,
113  std::vector<int>& params_i,
114  std::vector<double>& gradient,
115  std::vector<double>& hessian,
116  std::ostream* output_stream = 0) {
117  const double epsilon = 1e-3;
118  const int order = 4;
119  const double perturbations[order] = {-2*epsilon, -1*epsilon, epsilon, 2*epsilon};
120  const double coefficients[order] = {1.0/12.0,-2.0/3.0,2.0/3.0,-1.0/12.0};
121 
122  double result = grad_log_prob(params_r, params_i, gradient,
123  output_stream);
124 
125  hessian.assign(params_r.size() * params_r.size(), 0);
126  std::vector<double> temp_grad(params_r.size());
127  std::vector<double> perturbed_params(params_r.begin(), params_r.end());
128  for (size_t d = 0; d < params_r.size(); d++) {
129  double* row = &hessian[d*params_r.size()];
130  for (int i = 0; i < order; i++) {
131  perturbed_params[d] = params_r[d] + perturbations[i];
132  grad_log_prob(perturbed_params, params_i, temp_grad);
133  for (size_t dd = 0; dd < params_r.size(); dd++) {
134  row[dd] += 0.5 * coefficients[i] * temp_grad[dd] / epsilon;
135  hessian[d + dd*params_r.size()] += 0.5 * coefficients[i] * temp_grad[dd] / epsilon;
136  }
137  }
138  perturbed_params[d] = params_r[d];
139  }
140 
141  return result;
142  }
143 
144  virtual double log_prob_star(size_t idx,
145  int val,
146  std::vector<double>& params_r,
147  std::vector<int>& params_i,
148  std::ostream* output_stream = 0) {
149  if (idx >= num_params_i()) // || idx < 0
150  throw std::runtime_error ("idx >= num_params_i()");
151  if (val >= param_range_i(idx).first) //
152  throw std::runtime_error ("val <= param_range_i(idx) lower");
153  if (val >= param_range_i(idx).second) //
154  throw std::runtime_error ("val >= param_range_i(idx) upper");
155 
156  int original_val = params_i[idx];
157  params_i[idx] = val;
158  double result = log_prob(params_r,params_i,output_stream);
159  params_i[idx] = original_val;
160  return result;
161  }
162 
163 
177  virtual void write_csv(std::vector<double>& params_r,
178  std::vector<int>& params_i,
179  std::ostream& o,
180  std::ostream* output_stream = 0) {
181  stan::io::csv_writer writer(o);
182  for (size_t i = 0; i < params_i.size(); ++i)
183  writer.write(params_i[i]);
184  for (size_t i = 0; i < params_r.size(); ++i)
185  writer.write(params_r[i]);
186  writer.newline();
187  }
188 
189 
201  void finite_diff_grad(std::vector<double>& params_r,
202  std::vector<int>& params_i,
203  std::vector<double>& grad,
204  double epsilon = 1e-6,
205  std::ostream* output_stream = 0) {
206  std::vector<double> perturbed(params_r);
207  grad_log_prob(params_r,params_i,grad,output_stream);
208  grad.resize(params_r.size());
209  for (size_t k = 0; k < params_r.size(); k++) {
210  perturbed[k] += epsilon;
211  double logp_plus = log_prob(perturbed,params_i,output_stream);
212  perturbed[k] = params_r[k] - epsilon;
213  double logp_minus = log_prob(perturbed,params_i,output_stream);
214  double gradest = (logp_plus - logp_minus) / (2*epsilon);
215  grad[k] = gradest;
216  perturbed[k] = params_r[k];
217  }
218  }
219 
236  int test_gradients(std::vector<double>& params_r,
237  std::vector<int>& params_i,
238  double epsilon = 1e-6,
239  double error = 1e-6,
240  std::ostream& o = std::cout,
241  std::ostream* output_stream = 0) {
242  std::vector<double> grad;
243  double lp = grad_log_prob(params_r,params_i,grad,output_stream);
244 
245  std::vector<double> grad_fd;
246  finite_diff_grad(params_r,params_i,grad_fd,epsilon,output_stream);
247 
248  int num_failed = 0;
249 
250  o << std::endl
251  << " Log probability=" << lp
252  << std::endl;
253 
254  o << std::endl
255  << std::setw(10) << "param idx"
256  << std::setw(16) << "value"
257  << std::setw(16) << "model"
258  << std::setw(16) << "finite diff"
259  << std::setw(16) << "error"
260  << std::endl;
261  for (size_t k = 0; k < params_r.size(); k++) {
262  o << std::setw(10) << k
263  << std::setw(16) << params_r[k]
264  << std::setw(16) << grad[k]
265  << std::setw(16) << grad_fd[k]
266  << std::setw(16) << (grad[k] - grad_fd[k])
267  << std::endl;
268  if (std::fabs(grad[k] - grad_fd[k]) > error)
269  num_failed++;
270  }
271  return num_failed;
272  }
273 
274  };
275  }
276 }
277 
278 #endif
Writes Stan variables in comma-separated-value format to an output stream.
Definition: csv_writer.hpp:19
void newline()
Write a newline character.
Definition: csv_writer.hpp:59
void write(int n)
Write a value.
Definition: csv_writer.hpp:72
The prob_grad class represents densities with fixed numbers of discrete and scalar parameters and the...
Definition: prob_grad.hpp:24
std::vector< std::pair< int, int > > param_ranges_i__
Definition: prob_grad.hpp:27
virtual void init(std::vector< double > &params_r, std::vector< int > &params_i)
Definition: prob_grad.hpp:80
virtual size_t num_params_r()
Definition: prob_grad.hpp:52
virtual void write_csv(std::vector< double > &params_r, std::vector< int > &params_i, std::ostream &o, std::ostream *output_stream=0)
Write the parameters on a single line in CSV format.
Definition: prob_grad.hpp:177
std::pair< int, int > param_range_i(size_t idx)
Definition: prob_grad.hpp:60
virtual size_t num_params_i()
Definition: prob_grad.hpp:56
virtual double log_prob_star(size_t idx, int val, std::vector< double > &params_r, std::vector< int > &params_i, std::ostream *output_stream=0)
Definition: prob_grad.hpp:144
void set_num_params_r(size_t num_params_r)
Definition: prob_grad.hpp:44
prob_grad(size_t num_params_r, std::vector< std::pair< int, int > > &param_ranges_i)
Definition: prob_grad.hpp:36
prob_grad(size_t num_params_r)
Definition: prob_grad.hpp:31
void set_param_range_i_lower(size_t idx, int low)
Definition: prob_grad.hpp:64
int test_gradients(std::vector< double > &params_r, std::vector< int > &params_i, double epsilon=1e-6, double error=1e-6, std::ostream &o=std::cout, std::ostream *output_stream=0)
Test the grad_log_prob() function's ability to produce accurate gradients using finite differences.
Definition: prob_grad.hpp:236
virtual double grad_log_prob(std::vector< double > &params_r, std::vector< int > &params_i, std::vector< double > &gradient, std::ostream *output_stream=0)=0
void setparam_ranges_i__(std::vector< std::pair< int, int > > param_ranges_i)
Definition: prob_grad.hpp:48
void finite_diff_grad(std::vector< double > &params_r, std::vector< int > &params_i, std::vector< double > &grad, double epsilon=1e-6, std::ostream *output_stream=0)
Compute the gradient using finite differences for the specified parameters, writing the result into t...
Definition: prob_grad.hpp:201
virtual double log_prob(std::vector< double > &params_r, std::vector< int > &params_i, std::ostream *output_stream=0)=0
virtual double grad_hess_log_prob(std::vector< double > &params_r, std::vector< int > &params_i, std::vector< double > &gradient, std::vector< double > &hessian, std::ostream *output_stream=0)
Evaluate the log-probability, its gradient, and its Hessian at params_r.
Definition: prob_grad.hpp:112
void set_param_range_i_upper(size_t idx, int up)
Definition: prob_grad.hpp:68
int param_range_i_lower(size_t idx)
Definition: prob_grad.hpp:72
int param_range_i_upper(size_t idx)
Definition: prob_grad.hpp:76
var fabs(const var &a)
Return the absolute value of the variable (cmath).
Definition: agrad.hpp:2023
static void grad(chainable *vi)
Compute the gradient for all variables starting from the specified root variable implementation.
Definition: agrad.hpp:2187
double epsilon()
Return minimum positive number representable.
double e()
Return the base of the natural logarithm.
Eigen::Matrix< T, 1, Eigen::Dynamic > row(const Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > &m, size_t i)
Return the specified row of the specified matrix, using start-at-1 indexing.
Definition: matrix.hpp:1584
Probability, optimization and sampling library.
Definition: agrad.cpp:6
Template specification of functions in std for Stan.
Definition: agrad.hpp:2306

     [ Stan Home Page ] © 2011–2012, Stan Development Team.