39 #include <boost/unordered/unordered_set.hpp>
44 #ifndef OPENMS_COMPARISON_CLUSTERING_HIERARCHICALCLUSTERING_H
45 #define OPENMS_COMPARISON_CLUSTERING_HIERARCHICALCLUSTERING_H
66 template <
typename Po
intRef>
81 public std::pair<PointCoordinate, PointCoordinate>
94 return this->second - this->first;
104 lit = this->first.begin(); rit = rhs.first.begin();
105 for (; lit != this->first.end(); ++lit, ++rit) *lit = std::min(*lit, *rit);
108 lit = this->second.begin(); rit = rhs.second.begin();
109 for (; lit != this->second.end(); ++lit, ++rit) *lit = std::max(*lit, *rit);
134 public boost::unordered_multimap<PointCoordinate, PointRef>
173 coord(coord), bbox(bbox),
174 left(left), right(right),
181 typedef std::map<typename Grid::CellIndex, std::pair<typename Grid::CellContent *, bool> >
ClusterCells;
192 distance(distance), left(left), right(right)
203 typedef std::priority_queue<TreeDistance, std::vector<TreeDistance>, std::greater<TreeDistance> >
TreeDistanceQueue;
211 grid(cluster_dimension)
223 it->second.insert(std::make_pair(d, ref));
233 std::vector<typename Grid::CellIndex> cells;
234 for (
typename Grid::const_grid_iterator it =
grid.grid_begin(); it !=
grid.grid_end(); ++it)
235 cells.push_back(it->first);
237 for (
typename std::vector<typename Grid::CellIndex>::const_iterator it = cells.begin(); it != cells.end(); ++it)
250 return grid.insert(std::make_pair(p, Cluster(p)));
276 void gridCell_(
const typename Grid::CellIndex & cur,
ClusterCells & cells,
bool center =
false,
bool ignore_missing =
true)
280 cells.insert(std::make_pair(cur, std::make_pair(&
grid.grid_at(cur), center)));
282 catch (std::out_of_range &)
284 if (!ignore_missing)
throw;
294 DoubleReal dist_min = std::numeric_limits<DoubleReal>::infinity();
295 typename ClusterTrees::const_iterator dist_it = trees.end();
298 for (
typename ClusterTrees::const_iterator it = trees.begin(); it != trees.end(); ++it)
300 if (tree == *it)
continue;
310 if (dist_it != trees.end()) dists.push(TreeDistance(dist_min, tree, *dist_it));
324 const BoundingBox bbox = left->bbox | right->bbox;
327 return std::numeric_limits<DoubleReal>::infinity();
343 if (tree->left && tree->right)
350 cluster.insert(std::make_pair(tree->bbox.first, tree->ref));
363 if (tree->left && tree->right)
381 for (; it != ret.
end(); ++it, ++lit) *it = *lit / rhs;
390 for (; it != ret.
end(); ++it, ++lit, ++rit) *it = *lit / *rit;
397 for (; lit != lhs.
end(); ++lit, ++rit)
399 if (*lit > *rit)
return true;
409 for (; it != p.
end(); ++it) ret += std::pow(*it, 2.);
410 return std::sqrt(ret);
415 template <
typename I>
425 gridCells5x5_(cur, cells);
427 catch (std::out_of_range &)
433 for (
typename ClusterCells::iterator cell_it = cells.begin(); cell_it != cells.end(); ++cell_it)
435 typename Grid::CellContent & cell_cur = *cell_it->second.first;
436 const bool & cell_center = cell_it->second.second;
439 typename Grid::cell_iterator cluster_tmp_it = cell_cur.begin();
440 while (cluster_tmp_it != cell_cur.end())
442 typename Grid::cell_iterator cluster_it = cluster_tmp_it;
446 if (cluster_it->second.size() == 1)
449 for (
typename Cluster::const_iterator point_it = cluster_it->second.begin(); point_it != cluster_it->second.end(); ++point_it)
453 addTreeDistance_(tree, trees, dists);
457 cell_cur.erase(cluster_it);
463 while (!dists.empty())
465 const typename TreeDistanceQueue::value_type cur_dist = dists.top();
466 TreeNode * tree_left(cur_dist.left), *tree_right(cur_dist.right);
470 Size count_left = trees.count(tree_left), count_right = trees.count(tree_right);
471 if (count_left && count_right)
473 trees.erase(tree_left);
474 trees.erase(tree_right);
480 const UInt & left_points = tree_left->points, & right_points = tree_right->
points;
481 const PointCoordinate coord = coordScalarDiv_(left * left_points + right * right_points, left_points + right_points);
485 addTreeDistance_(tree, trees, dists);
490 addTreeDistance_(tree_left, trees, dists);
491 else if (count_right)
492 addTreeDistance_(tree_right, trees, dists);
496 for (
typename ClusterTrees::iterator tree_it = trees.begin(); tree_it != trees.end(); ++tree_it)
499 if ((**tree_it).center)
501 Cluster & cluster = insertCluster_((**tree_it).bbox)->second;
502 tree2Cluster_(*tree_it, cluster);
507 tree2Points_(*tree_it);
513 template <
typename I>
517 gridCell_(base, cells,
true,
false);
519 typename Grid::CellIndex cur = base;
522 cur[1] -= 2; gridCell_(cur, cells);
524 cur[1] += 1; gridCell_(cur, cells);
526 cur[1] += 1; gridCell_(cur, cells);
528 cur[1] += 1; gridCell_(cur, cells);
530 cur[1] += 1; gridCell_(cur, cells);
532 cur = base; cur[0] -= 1;
534 cur[1] -= 2; gridCell_(cur, cells);
536 cur[1] += 1; gridCell_(cur, cells,
true);
538 cur[1] += 1; gridCell_(cur, cells,
true);
540 cur[1] += 1; gridCell_(cur, cells,
true);
542 cur[1] += 1; gridCell_(cur, cells);
546 cur[1] -= 2; gridCell_(cur, cells);
548 cur[1] += 1; gridCell_(cur, cells,
true);
552 cur[1] += 1; gridCell_(cur, cells,
true);
554 cur[1] += 1; gridCell_(cur, cells);
556 cur = base; cur[0] += 1;
558 cur[1] -= 2; gridCell_(cur, cells);
560 cur[1] += 1; gridCell_(cur, cells,
true);
562 cur[1] += 1; gridCell_(cur, cells,
true);
564 cur[1] += 1; gridCell_(cur, cells,
true);
566 cur[1] += 1; gridCell_(cur, cells);
568 cur = base; cur[0] += 2;
570 cur[1] -= 2; gridCell_(cur, cells);
572 cur[1] += 1; gridCell_(cur, cells);
574 cur[1] += 1; gridCell_(cur, cells);
576 cur[1] += 1; gridCell_(cur, cells);
578 cur[1] += 1; gridCell_(cur, cells);
Container for (2-dimensional coordinate, value) pairs.
Definition: HashGrid.h:62
void tree2Points_(const TreeNode *tree)
Recursively add the points of an unfinished cluster back to the grid. All points are saved in the lea...
Definition: HierarchicalClustering.h:361
DoubleReal treeDistance_(TreeNode *left, TreeNode *right)
Returns distance of two tree nodes Returns the euclidean distance of the coordinates of the two trees...
Definition: HierarchicalClustering.h:322
BoundingBox(const PointCoordinate &p)
Definition: HierarchicalClustering.h:84
const CoordinateType * const_iterator
Definition: DPosition.h:75
DoubleReal distance
Definition: HierarchicalClustering.h:188
Grid grid
The hash grid.
Definition: HierarchicalClustering.h:154
Generic 2-dimensional hierarchical clustering with geometric hashing.
Definition: HierarchicalClustering.h:67
void gridCell_(const typename Grid::CellIndex &cur, ClusterCells &cells, bool center=false, bool ignore_missing=true)
Collect one cell.
Definition: HierarchicalClustering.h:276
static PointCoordinate coordScalarDiv_(const PointCoordinate &lhs, const DoubleReal &rhs)
Definition: HierarchicalClustering.h:376
const BoundingBox bbox
Definition: HierarchicalClustering.h:162
boost::unordered_set< TreeNode * > ClusterTrees
Definition: HierarchicalClustering.h:182
BoundingBox & operator|=(const BoundingBox &rhs)
Intersection of bounding box.
Definition: HierarchicalClustering.h:98
BoundingBox(const BoundingBox &b)
Definition: HierarchicalClustering.h:88
Set of points. Describes a cluster on the grid. A point consists of a PointCoordinate and a PointRef...
Definition: HierarchicalClustering.h:133
const PointCoordinate coord
Definition: HierarchicalClustering.h:161
Grid::cell_iterator insertPoint(const PointCoordinate &d, const PointRef &ref)
Insert new PointCoordinate into grid.
Definition: HierarchicalClustering.h:220
const PointRef ref
Definition: HierarchicalClustering.h:166
static PointCoordinate coordElemDiv_(const PointCoordinate &lhs, const PointCoordinate &rhs)
Definition: HierarchicalClustering.h:385
ConstIterator end() const
Non-mutable end iterator.
Definition: DPosition.h:389
CoordinateType * iterator
Definition: DPosition.h:74
Wrapper class for two trees and the corresponding distance.
Definition: HierarchicalClustering.h:185
TreeNode(const PointCoordinate &coord, const PointRef &ref, bool center)
Definition: HierarchicalClustering.h:168
PointCoordinate size() const
Definition: HierarchicalClustering.h:92
Representation of a coordinate in D-dimensional space.
Definition: DPosition.h:52
DPosition< 2, DoubleReal > PointCoordinate
Coordinate of a point to be clustered.
Definition: HierarchicalClustering.h:74
Grid::cell_iterator insertCluster_(const P &p)
Insert new Cluster into grid.
Definition: HierarchicalClustering.h:248
BoundingBox operator|(const BoundingBox &rhs) const
Intersection of bounding box.
Definition: HierarchicalClustering.h:115
static DoubleReal coordDist_(const PointCoordinate &lhs, const PointCoordinate &rhs)
Definition: HierarchicalClustering.h:404
void clusterIndex_(const typename Grid::CellIndex &p)
Perform clustering at given cell index.
Definition: HierarchicalClustering.h:416
TreeNode * right
Definition: HierarchicalClustering.h:189
void tree2Cluster_(const TreeNode *tree, Cluster &cluster)
Recursively add the points of a finished cluster into the hash grid. All points are saved in the leaf...
Definition: HierarchicalClustering.h:341
UInt points
Definition: HierarchicalClustering.h:164
void cluster()
Perform clustering of all existing points.
Definition: HierarchicalClustering.h:230
TreeNode(const PointCoordinate &coord, const BoundingBox &bbox, TreeNode *left, TreeNode *right)
Definition: HierarchicalClustering.h:172
HierarchicalClustering(const PointCoordinate &cluster_dimension)
Constructor.
Definition: HierarchicalClustering.h:210
HashGrid< Cluster > Grid
The hash grid data type.
Definition: HierarchicalClustering.h:147
std::priority_queue< TreeDistance, std::vector< TreeDistance >, std::greater< TreeDistance > > TreeDistanceQueue
Priority queue queue used to find minimum distances.
Definition: HierarchicalClustering.h:203
void addTreeDistance_(TreeNode *tree, ClusterTrees &trees, TreeDistanceQueue &dists)
Add a new tree to the set of trees and distance queue.
Definition: HierarchicalClustering.h:291
Tree node used for clustering.
Definition: HierarchicalClustering.h:158
const bool center
Definition: HierarchicalClustering.h:165
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:144
std::map< typename Grid::CellIndex, std::pair< typename Grid::CellContent *, bool > > ClusterCells
Definition: HierarchicalClustering.h:181
TreeNode * left
Definition: HierarchicalClustering.h:189
TreeNode * left
Definition: HierarchicalClustering.h:163
bool operator>(const TreeDistance &rhs) const
Definition: HierarchicalClustering.h:195
void gridCells5x5_(typename Grid::CellIndex cur, ClusterCells &cells)
Collect all cells used to cluster at given cell index.
Definition: HierarchicalClustering.h:514
TreeNode * right
Definition: HierarchicalClustering.h:163
ConstIterator begin() const
Non-mutable begin iterator.
Definition: DPosition.h:383
Cluster(const BoundingBox &bbox)
Definition: HierarchicalClustering.h:139
TreeDistance(const DoubleReal &distance, TreeNode *left, TreeNode *right)
Definition: HierarchicalClustering.h:191
static bool coordElemGreater_(const PointCoordinate &lhs, const PointCoordinate &rhs)
Definition: HierarchicalClustering.h:394
Bounding box of cluster.
Definition: HierarchicalClustering.h:80
BoundingBox bbox
Definition: HierarchicalClustering.h:137