Writing: start about other papers concerning the topic.
[fic.git] / kdTree.h
blob46cdc3d742086e7ac84b86a393811f3820e49ba5
1 #ifndef KDTREE_HEADER_
2 #define KDTREE_HEADER_
4 #include "util.h"
6 namespace NOSPACE {
7 using namespace std;
10 /** Routines for computing with T[length] number fields */
11 namespace FieldMath {
12 /** Calls transformer(x,y) for every x from [i1..iEnd1) and corresponding y from [i2..) */
13 template<class I1,class I2,class Transf>
14 Transf transform2( I1 i1, I1 iEnd1, I2 i2, Transf transformer ) {
15 for (; i1!=iEnd1; ++i1,++i2)
16 transformer(*i1,*i2);
17 return transformer;
19 /** Analogous to ::transform2 */
20 template<class I1,class I2,class I3,class Transf>
21 Transf transform3( I1 i1, I1 iEnd1, I2 i2, I3 i3, Transf transformer ) {
22 for (; i1!=iEnd1; ++i1,++i2,++i3)
23 transformer(*i1,*i2,*i3);
24 return transformer;
26 /** Analogous to ::transform2 */
27 template<class I1,class I2,class I3,class I4,class Transf>
28 Transf transform4( I1 i1, I1 iEnd1, I2 i2, I3 i3, I4 i4, Transf transformer ) {
29 for (; i1!=iEnd1; ++i1,++i2,++i3,++i4)
30 transformer(*i1,*i2,*i3,*i4);
31 return transformer;
34 /** Means b[i]=a[i]; */
35 template<class T> T* assign(const T *a,int length,T *b) {
36 //copy(a,a+length,b);
38 memcpy(b,a,length*sizeof(T));
40 return b;
43 namespace NOSPACE {
44 template<class T,bool CheckNaNs> struct MoveToBounds {
45 T sqrError; // TODO: double?
47 MoveToBounds()
48 : sqrError(0) {}
50 void operator()(const T &point,const T bounds[2],T &result) {
51 if ( CheckNaNs && isnan(point) )
52 return;
53 if ( point < bounds[0] ) {
54 sqrError+= sqr(bounds[0]-point);
55 result= bounds[0];
56 } else
57 if ( point > bounds[1] ) {
58 sqrError+= sqr(point-bounds[1]);
59 result= bounds[1];
60 } else
61 result= point;
65 /** Copy_moves a vector (\p point) to the nearest point (\p result)
66 * within bounds (\p bounds) and returns SE (distance^2) */
67 template<class T,bool CheckNaNs>
68 T moveToBounds_copy(const T *point,const T (*bounds)[2],int length,T *result) {
69 return transform3( point, point+length, bounds, result
70 , MoveToBounds<T,CheckNaNs>() ) .sqrError;
72 // namespace NOSPACE {
73 // template<class T> struct Min {
74 // const T& operator()(const T &a,const T &b) const
75 // { return min(a,b); }
76 // };
77 // template<class T> struct Max {
78 // const T& operator()(const T &a,const T &b) const
79 // { return max(a,b); }
80 // };
81 // }
82 // /** Means b[i]=min(a[i],b[i]) */
83 // template<class T> T* min(const T *a,int length,T *b) {
84 // transform( a, a+length, b, b, Min<T>() );
85 // return b;
86 // }
87 // /** Means b[i]=max(a[i],b[i]) */
88 // template<class T> T* max(const T *a,int length,T *b) {
89 // transform( a, a+length, b, b, Max<T>() );
90 // return b;
91 // }
93 // /** Returns whether a[i]<=b[i] */
94 // template<class T> bool isOrdered(const T *a,const T *b,int length) {
95 // const T *aEnd=a+length;
96 // do {
97 // if ( *a++ > *b++ )
98 // return false;
99 // } while (a!=aEnd);
100 // return true;
101 // }
103 namespace NOSPACE {
104 template<class T> struct Adder {
105 void operator()(const T &a,const T &b,T &c)
106 { c=a+b; }
108 template<class T> struct Subtractor {
109 void operator()(const T &a,const T &b,T &c)
110 { c=a-b; }
113 template<class T> T* add(const T *a,const T *b,int length,T *c) {
114 transform3( a, a+length, b, c, Adder<T>() );
116 template<class T> T* subtract(const T *a,const T *b,int length,T *c) {
117 transform3( a, a+length, b, c, Subtractor<T>() );
122 template<class T> class KDBuilder;
123 /** A generic KD-tree */
124 template<class T> class KDTree {
125 public:
126 friend class KDBuilder<T>;
127 typedef KDBuilder<T> Builder;
128 protected:
129 /** Represents one node of the KD-tree */
130 struct Node {
131 int coord; ///< The coordinate along which the tree splits in this node
132 T threshold;///< The threshold of the split (left[#coord] <= #threshold <= right[#coord])
134 typedef T (*Bounds)[2];
136 public:
137 const int depth /// The depth of the tree = ::log2ceil(#count)
138 , length /// The length of the vectors
139 , count; ///< The number of the vectors
140 protected:
141 Node *nodes; ///< The array of the tree-nodes
142 int *dataIDs; ///< Data IDs for children of "leaf" nodes
143 Bounds bounds; ///< The bounding box for all the data
145 /** Builds a new KD-tree from \p count_ vectors of \p length_ elements stored in \data */
146 KDTree(int length_,int count_)
147 : depth( log2ceil(count_) ), length(length_), count(count_)
148 , nodes( new Node[count_] ), dataIDs( new int[count_] ), bounds( new T[length_][2] ) {
151 /** Copy constructor with swapping (or rather moving) semantics */
152 KDTree(KDTree &other)
153 : depth(other.depth), length(other.length), count(other.count)
154 , nodes(other.nodes), dataIDs(other.dataIDs), bounds(other.bounds) {
155 other.nodes= 0;
156 other.dataIDs= 0;
157 other.bounds= 0;
160 public:
161 /** Only frees the memory */
162 ~KDTree() {
163 // clean up
164 delete[] nodes;
165 delete[] dataIDs;
166 delete[] bounds;
168 /** Takes an index of "leaf" (nonexistent) node and returns the appropriate data ID */
169 int leafID2dataID(int leafID) const {
170 ASSERT( count<=leafID && leafID<2*count );
171 int index= leafID-powers[depth];
172 if (index<0)
173 index+= count;
174 ASSERT( 0<=index && index<count );
175 return dataIDs[index];
178 /** Manages a heap from nodes of a KDTree.
179 * It returns vectors (their indices) in the order according to distance (SE)
180 * from a given fixed point. It can return a lower bound of the SEs of the remaining
181 * vectors at any time. */
182 class PointHeap {
183 /** One element of the heap representing a node in the KDTree */
184 struct HeapNode {
185 int nodeIndex; ///< Index of the node in #kd
186 T *data; ///< Stores the SE and the coordinates of the nearest point
188 /** No-init constructor */
189 HeapNode() {}
190 /** Initializes the members from the parameters */
191 HeapNode(int nodeIndex_,T *data_): nodeIndex(nodeIndex_), data(data_) {}
193 /** Returns reference to the SE of the nearest point to this node's bounding box */
194 T& getSE() { return *data; }
195 /** Returns the SE of the nearest point to this node's bounding box */
196 T getSE() const { return *data; }
197 /** Returns pointer to the nearest point to this node's bounding box */
198 T* getNearest() { return data+1; }
200 /** Defines the order of #heap - ascending according to #getSE */
201 struct HeapOrder {
202 bool operator()(const HeapNode &a,const HeapNode &b)
203 { return a.getSE() > b.getSE(); }
206 const KDTree &kd; ///< Reference to the KDTree we operate on
207 const T* const point; ///< Pointer to the point we are trying to approach
208 vector<HeapNode> heap; ///< The current heap of the tree nodes
209 BulkAllocator<T> allocator; ///< The allocator for HeapNode::data
210 public:
211 /** Build the heap from the KDTree \p tree and vector \p point_
212 * (they've got to remain valid until destruction) */
213 PointHeap(const KDTree &tree,const T *point_,bool checkNaNs)
214 : kd(tree), point(point_) {
215 ASSERT(point);
216 // create the root heap-node
217 HeapNode rootNode( 1, allocator.makeField(kd.length+1) );
218 // compute the nearest point within the global bounds and corresponding SE
219 using namespace FieldMath;
220 rootNode.getSE()= checkNaNs
221 ? moveToBounds_copy<T,true> ( point, kd.bounds, kd.length, rootNode.getNearest() )
222 : moveToBounds_copy<T,false>( point, kd.bounds, kd.length, rootNode.getNearest() );
223 // push it onto the heap (and reserve more to speed up the first leaf-gettings)
224 heap.reserve(kd.depth*2);
225 heap.push_back(rootNode);
228 /** Returns whether the heap is empty ( !isEmpty() is needed for all other methods) */
229 bool isEmpty()
230 { return heap.empty(); }
232 /** Returns the SE of the top node (always equals the SE of the next leaf) */
233 T getTopSE() {
234 ASSERT( !isEmpty() );
235 return heap[0].getSE();
238 /** Removes a leaf, returns the matching vector's index */
239 template<bool CheckNaNs> int popLeaf() {
240 // ensure a leaf is on the top of the heap and get its ID
241 makeTopLeaf<CheckNaNs>();
242 int result= kd.leafID2dataID( heap.front().nodeIndex );
243 // remove the top from the heap (no need to free the memory - #allocator)
244 pop_heap( heap.begin(), heap.end(), HeapOrder() );
245 heap.pop_back();
246 return result;
248 protected:
249 /** Divides the top nodes until there's a leaf on the top */
250 template<bool CheckNaNs> void makeTopLeaf();
252 }; // PointHeap class
253 }; // KDTree class
256 template<class T> template<bool CheckNaNs>
257 void KDTree<T>::PointHeap::makeTopLeaf() {
258 ASSERT( !isEmpty() );
259 // exit if there's a leaf on the top already
260 if ( !(heap[0].nodeIndex<kd.count) )
261 return;
262 size_t oldHeapSize= heap.size();
263 HeapNode heapRoot= heap[0]; // making a local working copy of the top of the heap
265 do { // while heapRoot isn't leaf ... while ( heapRoot.nodeIndex<kd.count )
266 const Node &node= kd.nodes[heapRoot.nodeIndex];
267 // now replace the node with its two children:
268 // one of them will have the same SE (replaces its parent on the top),
269 // the other one can have higher SE (push_back-ed on the heap)
271 // create a new heap-node and allocate it's data
272 HeapNode newHNode;
273 newHNode.data= allocator.makeField(kd.length+1);
274 // assign index of the left child for both the heap-nodes (for now)
275 newHNode.nodeIndex= (heapRoot.nodeIndex*= 2);
276 // the nearest point of the new heap-node only differs in one coordinate
277 FieldMath::assign( heapRoot.getNearest(), kd.length, newHNode.getNearest() );
279 if ( !CheckNaNs || !isnan(point[node.coord]) ) { // valid coordinate -> normal processing
280 newHNode.getNearest()[node.coord]= node.threshold;
282 // the SE of the child heap-nodes can be computed from the parent heap-node
283 Real oldDistance= Real(point[node.coord]) - heapRoot.getNearest()[node.coord];
284 Real newDistance= Real(point[node.coord]) - node.threshold;
285 newHNode.getSE()= heapRoot.getSE() - sqr(oldDistance) + sqr(newDistance);
286 ASSERT( newHNode.getSE() >= heapRoot.getSE() );
287 /* another way to do this, according to: A^2-B^2 = (A-B)*(A+B)
288 Real nearestInCoord= heapRoot.getNearest()[node.coord];
289 newHNode.getSE()= heapRoot.getSE() + (nearestInCoord-node.threshold)
290 * ( ldexp((Real)point[node.coord],1) - nearestInCoord - node.threshold );
292 // correct the nodeIndex of the new children
293 if ( newDistance <= 0 )
294 // the point is in the left part -> the left child will be on the top
295 ++newHNode.nodeIndex;
296 else
297 // the point is in the right part -> the right child will be on the top
298 ++heapRoot.nodeIndex;
300 } else { // the coordinate doesn't have any effect
301 newHNode.getSE()= heapRoot.getSE();
302 ++newHNode.nodeIndex;
305 // add the new node to the back, restore the heap-property later
306 heap.push_back(newHNode);
307 } while ( heapRoot.nodeIndex<kd.count );
309 heap[0]= heapRoot; // restoring the working copy of the heap's top node
310 // restore the heap-property on the added nodes
311 typename vector<HeapNode>::iterator it= heap.begin()+oldHeapSize;
313 push_heap( heap.begin(), it, HeapOrder() );
314 while ( it++ != heap.end() );
315 } // KDTree<T>::PointHeap::makeTopLeaf<CheckNaNs>() method
318 template<class T> class KDBuilder: public KDTree<T> {
319 public:
320 typedef KDTree<T> Tree;
321 typedef T BoundsPair[2];
322 typedef typename Tree::Bounds Bounds;
323 typedef int (KDBuilder::*CoordChooser)
324 (int nodeIndex,int *beginIDs,int *endIDs,int depthLeft) const;
325 protected:
326 using Tree::depth; using Tree::length; using Tree::count;
327 using Tree::nodes; using Tree::dataIDs; using Tree::bounds;
329 const T *data;
330 const CoordChooser chooser;
331 mutable Bounds chooserTmp;
333 KDBuilder(const T *data_,int length,int count,CoordChooser chooser_)
334 : Tree(length,count), data(data_), chooser(chooser_), chooserTmp(0) {
335 ASSERT( length>0 && count>1 && chooser && data );
336 // create the index-vector, coumpute the bounding box, build the tree
337 for (int i=0; i<count; ++i)
338 dataIDs[i]= i;
339 getBounds(bounds);
340 buildNode(1,dataIDs,dataIDs+count,depth);
341 delete[] chooserTmp;
342 DEBUG_ONLY( chooserTmp= 0; )
345 /** Creates bounds containing one value */
346 struct NewBounds {
347 void operator()(const T &val,BoundsPair &bounds) const
348 { bounds[0]= bounds[1]= val; }
350 /** Expands a valid bounding box to contain a point (one coordinate at once) */
351 struct BoundsExpander {
352 void operator()(const T &val,BoundsPair &bounds) const {
353 if ( val < bounds[0] ) // lower bound
354 bounds[0]= val; else
355 if ( val > bounds[1] ) // upper bound
356 bounds[1]= val;
359 /** Computes the bounding box of #count vectors with length #length stored in #data.
360 * The vectors in #data are stored linearly, /p boundsRes should be preallocated */
361 void getBounds(Bounds boundsRes) const {
362 using namespace FieldMath;
363 ASSERT(length>0);
364 // make the initial bounds only contain the first point
365 transform2(data,data+length,boundsRes,NewBounds());
366 int count= Tree::count;
367 const T *nowData= data;
368 // expand the bounds by every point (except for the first one)
369 while (--count) {
370 nowData+= length;
371 transform2( nowData, nowData+length, boundsRes, BoundsExpander() );
374 /** Like #getBounds, but it only works on vectors with indices from [\p beginIDs..\p endIDs)
375 * instead of [0..#count), \p boundsRes should be preallocated to store the result */
376 void getBounds(const int *beginIDs,const int *endIDs,Bounds boundsRes) const {
377 using namespace FieldMath;
378 ASSERT(endIDs>beginIDs);
379 // make the initial bounds only contain the first point
380 const T *begin= data + *beginIDs*length;
381 transform2(begin,begin+length,boundsRes,NewBounds());
382 // expand the bounds by every point (except for the first one)
383 while ( ++beginIDs != endIDs ) {
384 begin= data + *beginIDs*length;
385 transform2( begin, begin+length, boundsRes, BoundsExpander() );
389 /** Recursively builds node \p nodeIndex and its subtree of depth \p depthLeft
390 * (including leaves), operates on data \p data in the range [\p beginIDs,\p endIDs) */
391 void buildNode(int nodeIndex,int *beginIDs,int *endIDs,int depthLeft);
393 public:
394 /** CoordChooser choosing the longest coordinate of the bounding box of the current interval */
395 int choosePrecise(int nodeIndex,int *beginIDs,int *endIDs,int /*depthLeft*/) const;
396 /** CoordChooser choosing the coordinate only according to the depth */
397 int chooseFast(int /*nodeIndex*/,int* /*beginIDs*/,int* /*endIDs*/,int depthLeft) const
398 { return depthLeft%length; }
399 /** CoordChooser choosing a random coordinate */
400 int chooseRand(int /*nodeIndex*/,int* /*beginIDs*/,int* /*endIDs*/,int /*depthLeft*/) const
401 { return rand()%length; }
402 int chooseApprox(int nodeIndex,int* /*beginIDs*/,int* /*endIDs*/,int depthLeft) const;
404 static Tree* makeTree(const T *data,int length,int count,CoordChooser chooser) {
405 KDBuilder builder(data,length,count,chooser);
406 return new Tree(builder);
408 }; // KDBuilder class
412 namespace NOSPACE {
413 /** Finds the longest coordinate of a bounding box */
414 template<class T> struct MaxDiffCoord {
415 typedef typename KDBuilder<T>::BoundsPair BoundsPair;
417 T maxDiff;
418 int bestIndex, nextIndex;
420 MaxDiffCoord(const BoundsPair& bounds0)
421 : maxDiff(bounds0[1]-bounds0[0]), bestIndex(0), nextIndex(1) {}
423 void operator()(const BoundsPair& bounds_i) {
424 T diff= bounds_i[1]-bounds_i[0];
425 if (diff>maxDiff) {
426 bestIndex= nextIndex;
427 maxDiff= diff;
429 ++nextIndex;
433 template<class T> int KDBuilder<T>
434 ::choosePrecise(int nodeIndex,int *beginIDs,int *endIDs,int) const {
435 ASSERT( nodeIndex>0 && beginIDs && endIDs && beginIDs<endIDs );
436 // temporary storage for computed bounding box
437 BoundsPair boundsStorage[length];
438 const BoundsPair *localBounds;
440 if ( nodeIndex>1 ) { // compute the bounding box
441 localBounds= boundsStorage;
442 getBounds( beginIDs, endIDs, boundsStorage );
443 } else // we are in the root -> we can use already computed bounds
444 localBounds= bounds;
445 // find and return the longest coordinate
446 MaxDiffCoord<T> mdc= for_each
447 ( localBounds+1, localBounds+length, MaxDiffCoord<T>(localBounds[0]) );
448 ASSERT( mdc.nextIndex == length );
449 return mdc.bestIndex;
451 template<class T> int KDBuilder<T>::chooseApprox(int nodeIndex,int*,int*,int) const {
452 using namespace FieldMath;
453 ASSERT(nodeIndex>0);
455 int myDepth= log2ceil(nodeIndex+1)-1;
456 if (!myDepth) { // I'm in the root - copy the bounds
457 ASSERT(nodeIndex==1);
458 chooserTmp= new BoundsPair[length*(depth+1)]; // allocate my temporary bound-array
459 assign( bounds, length, chooserTmp );
462 Bounds myBounds= chooserTmp+length*myDepth;
463 if (myDepth) { // I'm not the root - copy parent's bounds and modify them
464 const typename Tree::Node &parent= nodes[nodeIndex/2];
465 Bounds parentBounds= myBounds-length;
466 if (nodeIndex%2) { // I'm the right son -> bounds on this level not initialized
467 assign( parentBounds, length, myBounds );
468 myBounds[parent.coord][0]= parent.threshold; // adjusting the lower bound
469 } else // I'm the left son
470 if ( nodeIndex+1 < count ) { // almost the same as brother -> only adjust the coordinate
471 myBounds[parent.coord][0]= parentBounds[parent.coord][0];
472 myBounds[parent.coord][1]= parent.threshold;
473 } else { // I've got no brother
474 ASSERT( nodeIndex+1 == count );
475 assign( parentBounds, length, myBounds );
476 myBounds[parent.coord][1]= parent.threshold;
479 // find out the widest dimension
480 MaxDiffCoord<T> mdc= for_each( myBounds+1, myBounds+length, MaxDiffCoord<T>(myBounds[0]) );
481 ASSERT( mdc.nextIndex == length );
482 return mdc.bestIndex;
485 namespace NOSPACE {
486 /** Compares vectors (given by their indices) according to a given coordinate */
487 template<class T> class IndexComparator {
488 const T *data;
489 int length;
490 public:
491 IndexComparator(const T *data_,int length_,int index_)
492 : data(data_+index_), length(length_) {}
493 bool operator()(int a,int b) const
494 { return data[a*length] < data[b*length]; }
497 template<class T> void KDBuilder<T>
498 ::buildNode(int nodeIndex,int *beginIDs,int *endIDs,int depthLeft) {
499 int count= endIDs-beginIDs; // owershadowing Tree::count
500 // check we've got at least one vector and the depth&count are adequate to each other
501 ASSERT( count>=2 && powers[depthLeft-1]<count && count<=powers[depthLeft] );
502 --depthLeft;
503 bool shallowRight= ( count <= powers[depthLeft]+powers[depthLeft-1] );
504 int *middle= shallowRight
505 ? endIDs-powers[depthLeft-1]
506 : beginIDs+powers[depthLeft];
507 // find out the dividing coordinate and find the median in this coordinate
508 int coord= (this->*chooser)(nodeIndex,beginIDs,endIDs,depthLeft);
509 nth_element( beginIDs, middle , endIDs, IndexComparator<T>(data,length,coord) );
510 // fill the node's data (dividing coordinate and its threshold)
511 nodes[nodeIndex].coord= coord;
512 nodes[nodeIndex].threshold= data[*middle*length+coord];
513 // recurse on both halves (if needed; fall-through switch)
514 switch (count) {
515 default: // we've got enough nodes - build both subtrees (fall through)
516 // build the right subtree
517 buildNode( 2*nodeIndex+1, middle, endIDs, depthLeft-shallowRight );
518 case 3: // only a pair in the first half
519 // build the left subtree
520 buildNode( 2*nodeIndex, beginIDs, middle, depthLeft );
521 case 2: // nothing needs to be sorted
526 #endif // KDTREE_HEADER_