mlpack  2.0.1
dual_tree_kmeans.hpp
Go to the documentation of this file.
1 
17 #ifndef __MLPACK_METHODS_KMEANS_DUAL_TREE_KMEANS_HPP
18 #define __MLPACK_METHODS_KMEANS_DUAL_TREE_KMEANS_HPP
19 
23 
25 
26 namespace mlpack {
27 namespace kmeans {
28 
36 template<
37  typename MetricType,
38  typename MatType,
39  template<typename TreeMetricType,
40  typename TreeStatType,
41  typename TreeMatType>
42  class TreeType = tree::KDTree>
44 {
45  public:
47  typedef TreeType<MetricType, DualTreeKMeansStatistic, MatType> Tree;
48 
49  template<typename TreeMetricType,
50  typename IgnoredStatType,
51  typename TreeMatType>
52  using NNSTreeType =
53  TreeType<TreeMetricType, DualTreeKMeansStatistic, TreeMatType>;
54 
59  DualTreeKMeans(const MatType& dataset, MetricType& metric);
60 
65 
74  double Iterate(const arma::mat& centroids,
75  arma::mat& newCentroids,
76  arma::Col<size_t>& counts);
77 
79  size_t DistanceCalculations() const { return distanceCalculations; }
82 
83  private:
85  const MatType& datasetOrig; // Maybe not necessary.
87  Tree* tree;
89  const MatType& dataset;
91  MetricType metric;
92 
96  size_t iteration;
97 
99  arma::vec upperBounds;
101  arma::vec lowerBounds;
103  std::vector<bool> prunedPoints;
104 
105  arma::Row<size_t> assignments;
106 
107  std::vector<bool> visited; // Was the point visited this iteration?
108 
109  arma::mat lastIterationCentroids; // For sanity checks.
110 
111  arma::vec clusterDistances; // The amount the clusters moved last iteration.
112 
113  arma::mat interclusterDistances; // Static storage for intercluster distances.
114 
117  void UpdateTree(Tree& node,
118  const arma::mat& centroids,
119  const double parentUpperBound = 0.0,
120  const double adjustedParentUpperBound = DBL_MAX,
121  const double parentLowerBound = DBL_MAX,
122  const double adjustedParentLowerBound = 0.0);
123 
125  void ExtractCentroids(Tree& node,
126  arma::mat& newCentroids,
127  arma::Col<size_t>& newCounts,
128  arma::mat& centroids);
129 
130  void CoalesceTree(Tree& node, const size_t child = 0);
131  void DecoalesceTree(Tree& node);
132 };
133 
136 template<typename TreeType>
137 void HideChild(TreeType& node,
138  const size_t child,
139  const typename boost::disable_if_c<
140  tree::TreeTraits<TreeType>::BinaryTree>::type* junk = 0);
141 
145 template<typename TreeType>
146 void HideChild(TreeType& node,
147  const size_t child,
148  const typename boost::enable_if_c<
149  tree::TreeTraits<TreeType>::BinaryTree>::type* junk = 0);
150 
152 template<typename TreeType>
153 void RestoreChildren(TreeType& node,
154  const typename boost::disable_if_c<tree::TreeTraits<
155  TreeType>::BinaryTree>::type* junk = 0);
156 
158 template<typename TreeType>
159 void RestoreChildren(TreeType& node,
160  const typename boost::enable_if_c<tree::TreeTraits<
161  TreeType>::BinaryTree>::type* junk = 0);
162 
165 template<typename MetricType, typename MatType>
167 
170 template<typename MetricType, typename MatType>
171 using CoverTreeDualTreeKMeans = DualTreeKMeans<MetricType, MatType,
173 
174 } // namespace kmeans
175 } // namespace mlpack
176 
177 #include "dual_tree_kmeans_impl.hpp"
178 
179 #endif
BinarySpaceTree< MetricType, StatisticType, MatType, bound::HRectBound, MidpointSplit > KDTree
The standard midpoint-split kd-tree.
Definition: typedef.hpp:65
void HideChild(TreeType &node, const size_t child, const typename boost::disable_if_c< tree::TreeTraits< TreeType >::BinaryTree >::type *junk=0)
Utility function for hiding children.
TreeType< MetricType, DualTreeKMeansStatistic, MatType > Tree
Convenience typedef.
Linear algebra utility functions, generally performed on matrices or vectors.
arma::vec upperBounds
Upper bounds on nearest centroid.
~DualTreeKMeans()
Delete the tree constructed by the DualTreeKMeans object.
size_t distanceCalculations
Track distance calculations.
MetricType metric
The metric.
An algorithm for an exact Lloyd iteration which simply uses dual-tree nearest-neighbor search to find...
void ExtractCentroids(Tree &node, arma::mat &newCentroids, arma::Col< size_t > &newCounts, arma::mat &centroids)
Extract the centroids of the clusters.
void RestoreChildren(TreeType &node, const typename boost::disable_if_c< tree::TreeTraits< TreeType >::BinaryTree >::type *junk=0)
Utility function for restoring children to a non-binary tree.
Tree * tree
The tree built on the points.
void UpdateTree(Tree &node, const arma::mat &centroids, const double parentUpperBound=0.0, const double adjustedParentUpperBound=DBL_MAX, const double parentLowerBound=DBL_MAX, const double adjustedParentLowerBound=0.0)
Update the bounds in the tree before the next iteration.
arma::vec lowerBounds
Lower bounds on second closest cluster distance for each point.
const MatType & datasetOrig
The original dataset reference.
void CoalesceTree(Tree &node, const size_t child=0)
TreeType< TreeMetricType, DualTreeKMeansStatistic, TreeMatType > NNSTreeType
double Iterate(const arma::mat &centroids, arma::mat &newCentroids, arma::Col< size_t > &counts)
Run a single iteration of the dual-tree nearest neighbor algorithm for k-means, updating the given ce...
void DecoalesceTree(Tree &node)
DualTreeKMeans(const MatType &dataset, MetricType &metric)
Construct the DualTreeKMeans object, which will construct a tree on the points.
const MatType & dataset
The dataset we are using.
std::vector< bool > prunedPoints
Indicator of whether or not the point is pruned.
The TreeTraits class provides compile-time information on the characteristics of a given tree type...
Definition: tree_traits.hpp:79
size_t DistanceCalculations() const
Return the number of distance calculations.
size_t & DistanceCalculations()
Modify the number of distance calculations.
size_t iteration
Track iteration number.
A cover tree is a tree specifically designed to speed up nearest-neighbor computation in high-dimensi...
Definition: cover_tree.hpp:98