Go to the documentation of this file.
13 #ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
14 #define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
55 template<
typename FitnessFunction = GiniImpurity,
56 template<
typename>
class NumericSplitType =
58 template<
typename>
class CategoricalSplitType =
59 HoeffdingCategoricalSplit
93 template<
typename MatType>
96 const arma::Row<size_t>& labels,
97 const size_t numClasses,
98 const bool batchTraining =
true,
99 const double successProbability = 0.95,
100 const size_t maxSamples = 0,
101 const size_t checkInterval = 100,
102 const size_t minSamples = 100,
103 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
104 = CategoricalSplitType<FitnessFunction>(0, 0),
105 const NumericSplitType<FitnessFunction>& numericSplitIn =
106 NumericSplitType<FitnessFunction>(0));
131 const size_t numClasses,
132 const double successProbability = 0.95,
133 const size_t maxSamples = 0,
134 const size_t checkInterval = 100,
135 const size_t minSamples = 100,
136 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
137 = CategoricalSplitType<FitnessFunction>(0, 0),
138 const NumericSplitType<FitnessFunction>& numericSplitIn =
139 NumericSplitType<FitnessFunction>(0),
140 std::unordered_map<
size_t, std::pair<size_t, size_t>>*
141 dimensionMappings = NULL,
142 const bool copyDatasetInfo =
true);
171 template<
typename MatType>
173 const arma::Row<size_t>& labels,
174 const bool batchTraining =
true);
180 template<
typename MatType>
183 const arma::Row<size_t>& labels,
184 const bool batchTraining =
true);
192 template<
typename VecType>
193 void Train(
const VecType& point,
const size_t label);
250 template<
typename VecType>
260 template<
typename VecType>
277 template<
typename VecType>
278 void Classify(
const VecType& point,
size_t& prediction,
double& probability)
288 template<
typename MatType>
289 void Classify(
const MatType& data, arma::Row<size_t>& predictions)
const;
302 template<
typename MatType>
304 arma::Row<size_t>& predictions,
305 arma::rowvec& probabilities)
const;
313 template<
typename Archive>
320 std::vector<NumericSplitType<FitnessFunction>> numericSplits;
322 std::vector<CategoricalSplitType<FitnessFunction>> categoricalSplits;
325 std::unordered_map<size_t, std::pair<size_t, size_t>>* dimensionMappings;
336 size_t checkInterval;
344 double successProbability;
349 size_t splitDimension;
351 size_t majorityClass;
354 double majorityProbability;
356 typename CategoricalSplitType<FitnessFunction>::SplitInfo categoricalSplit;
358 typename NumericSplitType<FitnessFunction>::SplitInfo numericSplit;
360 std::vector<HoeffdingTree*> children;
366 #include "hoeffding_tree_impl.hpp"
size_t MaxSamples() const
Get the maximum number of samples before a split is forced.
size_t NumDescendants() const
Get the size of the Hoeffding Tree.
void Train(const MatType &data, const arma::Row< size_t > &labels, const bool batchTraining=true)
Train on a set of points, either in streaming mode or in batch mode, with the given labels.
The core includes that mlpack expects; standard C++ includes and Armadillo.
size_t CalculateDirection(const VecType &point) const
Given a point and that this node is not a leaf, calculate the index of the child node this point woul...
HoeffdingTree()
Construct a Hoeffding tree with no data and no information.
size_t MajorityClass() const
Get the majority class.
void MinSamples(const size_t minSamples)
Modify the minimum number of samples for a split.
HoeffdingNumericSplit< FitnessFunction, double > HoeffdingDoubleNumericSplit
Convenience typedef.
void MaxSamples(const size_t maxSamples)
Modify the maximum number of samples before a split is forced.
~HoeffdingTree()
Clean up memory.
void Train(const MatType &data, const data::DatasetInfo &info, const arma::Row< size_t > &labels, const bool batchTraining=true)
Train on a set of points, either in streaming mode or in batch mode, with the given labels and the gi...
size_t Classify(const VecType &point) const
Classify the given point, using this node and the entire (sub)tree beneath it.
size_t SplitDimension() const
Get the splitting dimension (size_t(-1) if no split).
size_t NumChildren() const
Get the number of children.
size_t SplitCheck()
Check if a split would satisfy the conditions of the Hoeffding bound with the node's specified succes...
void Classify(const VecType &point, size_t &prediction, double &probability) const
Classify the given point and also return an estimate of the probability that the prediction is correc...
Linear algebra utility functions, generally performed on matrices or vectors.
size_t CheckInterval() const
Get the number of samples before a split check is performed.
HoeffdingTree & Child(const size_t i)
Modify a child.
size_t MinSamples() const
Get the minimum number of samples for a split.
void Classify(const MatType &data, arma::Row< size_t > &predictions, arma::rowvec &probabilities) const
Classify the given points, using this node and the entire (sub)tree beneath it.
void serialize(Archive &ar, const unsigned int)
Serialize the split.
double SuccessProbability() const
Get the confidence required for a split.
CategoricalSplitType< FitnessFunction > CategoricalSplit
Allow access to the categorical split type.
void CheckInterval(const size_t checkInterval)
Modify the number of samples before a split check is performed.
void SuccessProbability(const double successProbability)
Modify the confidence required for a split.
size_t & MajorityClass()
Modify the majority class.
void Classify(const MatType &data, arma::Row< size_t > &predictions) const
Classify the given points, using this node and the entire (sub)tree beneath it.
HoeffdingTree(const HoeffdingTree &other)
Copy another tree (warning: this will duplicate the tree entirely, and may use a lot of memory.
void Train(const VecType &point, const size_t label)
Train on a single point in streaming mode, with the given label.
HoeffdingTree(const data::DatasetInfo &datasetInfo, const size_t numClasses, const double successProbability=0.95, const size_t maxSamples=0, const size_t checkInterval=100, const size_t minSamples=100, const CategoricalSplitType< FitnessFunction > &categoricalSplitIn=CategoricalSplitType< FitnessFunction >(0, 0), const NumericSplitType< FitnessFunction > &numericSplitIn=NumericSplitType< FitnessFunction >(0), std::unordered_map< size_t, std::pair< size_t, size_t >> *dimensionMappings=NULL, const bool copyDatasetInfo=true)
Construct the Hoeffding tree with the given parameters, but training on no data.
double MajorityProbability() const
Get the probability of the majority class (based on training samples).
const HoeffdingTree & Child(const size_t i) const
Get a child.
The HoeffdingTree object represents all of the necessary information for a Hoeffding-bound-based deci...
void CreateChildren()
Given that this node should split, create the children.
HoeffdingTree(const MatType &data, const data::DatasetInfo &datasetInfo, const arma::Row< size_t > &labels, const size_t numClasses, const bool batchTraining=true, const double successProbability=0.95, const size_t maxSamples=0, const size_t checkInterval=100, const size_t minSamples=100, const CategoricalSplitType< FitnessFunction > &categoricalSplitIn=CategoricalSplitType< FitnessFunction >(0, 0), const NumericSplitType< FitnessFunction > &numericSplitIn=NumericSplitType< FitnessFunction >(0))
Construct the Hoeffding tree with the given parameters and given training data.
double & MajorityProbability()
Modify the probability of the majority class.
NumericSplitType< FitnessFunction > NumericSplit
Allow access to the numeric split type.
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...