/*********************************************************************** * Software License Agreement (BSD License) * * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. * * THE BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************************/ #ifndef OPENCV_FLANN_KDTREE_INDEX_H_ #define OPENCV_FLANN_KDTREE_INDEX_H_ #include <algorithm> #include <map> #include <cassert> #include <cstring> #include "general.h" #include "nn_index.h" #include "dynamic_bitset.h" #include "matrix.h" #include "result_set.h" #include "heap.h" #include "allocator.h" #include "random.h" #include "saving.h" namespace cvflann { struct KDTreeIndexParams : public IndexParams { KDTreeIndexParams(int trees = 4) { (*this)["algorithm"] = FLANN_INDEX_KDTREE; (*this)["trees"] = trees; } }; /** * Randomized kd-tree index * * Contains the k-d trees and other information for indexing a set of points * for nearest-neighbor matching. */ template <typename Distance> class KDTreeIndex : public NNIndex<Distance> { public: typedef typename Distance::ElementType ElementType; typedef typename Distance::ResultType DistanceType; /** * KDTree constructor * * Params: * inputData = dataset with the input features * params = parameters passed to the kdtree algorithm */ KDTreeIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KDTreeIndexParams(), Distance d = Distance() ) : dataset_(inputData), index_params_(params), distance_(d) { size_ = dataset_.rows; veclen_ = dataset_.cols; trees_ = get_param(index_params_,"trees",4); tree_roots_ = new NodePtr[trees_]; // Create a permutable array of indices to the input vectors. vind_.resize(size_); for (size_t i = 0; i < size_; ++i) { vind_[i] = int(i); } mean_ = new DistanceType[veclen_]; var_ = new DistanceType[veclen_]; } KDTreeIndex(const KDTreeIndex&); KDTreeIndex& operator=(const KDTreeIndex&); /** * Standard destructor */ ~KDTreeIndex() { if (tree_roots_!=NULL) { delete[] tree_roots_; } delete[] mean_; delete[] var_; } /** * Builds the index */ void buildIndex() { /* Construct the randomized trees. */ for (int i = 0; i < trees_; i++) { /* Randomize the order of vectors to allow for unbiased sampling. */ std::random_shuffle(vind_.begin(), vind_.end()); tree_roots_[i] = divideTree(&vind_[0], int(size_) ); } } flann_algorithm_t getType() const { return FLANN_INDEX_KDTREE; } void saveIndex(FILE* stream) { save_value(stream, trees_); for (int i=0; i<trees_; ++i) { save_tree(stream, tree_roots_[i]); } } void loadIndex(FILE* stream) { load_value(stream, trees_); if (tree_roots_!=NULL) { delete[] tree_roots_; } tree_roots_ = new NodePtr[trees_]; for (int i=0; i<trees_; ++i) { load_tree(stream,tree_roots_[i]); } index_params_["algorithm"] = getType(); index_params_["trees"] = tree_roots_; } /** * Returns size of index. */ size_t size() const { return size_; } /** * Returns the length of an index feature. */ size_t veclen() const { return veclen_; } /** * Computes the inde memory usage * Returns: memory used by the index */ int usedMemory() const { return int(pool_.usedMemory+pool_.wastedMemory+dataset_.rows*sizeof(int)); // pool memory and vind array memory } /** * Find set of nearest neighbors to vec. Their indices are stored inside * the result object. * * Params: * result = the result object in which the indices of the nearest-neighbors are stored * vec = the vector for which to search the nearest neighbors * maxCheck = the maximum number of restarts (in a best-bin-first manner) */ void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) { int maxChecks = get_param(searchParams,"checks", 32); float epsError = 1+get_param(searchParams,"eps",0.0f); if (maxChecks==FLANN_CHECKS_UNLIMITED) { getExactNeighbors(result, vec, epsError); } else { getNeighbors(result, vec, maxChecks, epsError); } } IndexParams getParameters() const { return index_params_; } private: /*--------------------- Internal Data Structures --------------------------*/ struct Node { /** * Dimension used for subdivision. */ int divfeat; /** * The values used for subdivision. */ DistanceType divval; /** * The child nodes. */ Node* child1, * child2; }; typedef Node* NodePtr; typedef BranchStruct<NodePtr, DistanceType> BranchSt; typedef BranchSt* Branch; void save_tree(FILE* stream, NodePtr tree) { save_value(stream, *tree); if (tree->child1!=NULL) { save_tree(stream, tree->child1); } if (tree->child2!=NULL) { save_tree(stream, tree->child2); } } void load_tree(FILE* stream, NodePtr& tree) { tree = pool_.allocate<Node>(); load_value(stream, *tree); if (tree->child1!=NULL) { load_tree(stream, tree->child1); } if (tree->child2!=NULL) { load_tree(stream, tree->child2); } } /** * Create a tree node that subdivides the list of vecs from vind[first] * to vind[last]. The routine is called recursively on each sublist. * Place a pointer to this new tree node in the location pTree. * * Params: pTree = the new node to create * first = index of the first vector * last = index of the last vector */ NodePtr divideTree(int* ind, int count) { NodePtr node = pool_.allocate<Node>(); // allocate memory /* If too few exemplars remain, then make this a leaf node. */ if ( count == 1) { node->child1 = node->child2 = NULL; /* Mark as leaf node. */ node->divfeat = *ind; /* Store index of this vec. */ } else { int idx; int cutfeat; DistanceType cutval; meanSplit(ind, count, idx, cutfeat, cutval); node->divfeat = cutfeat; node->divval = cutval; node->child1 = divideTree(ind, idx); node->child2 = divideTree(ind+idx, count-idx); } return node; } /** * Choose which feature to use in order to subdivide this set of vectors. * Make a random choice among those with the highest variance, and use * its variance as the threshold value. */ void meanSplit(int* ind, int count, int& index, int& cutfeat, DistanceType& cutval) { memset(mean_,0,veclen_*sizeof(DistanceType)); memset(var_,0,veclen_*sizeof(DistanceType)); /* Compute mean values. Only the first SAMPLE_MEAN values need to be sampled to get a good estimate. */ int cnt = std::min((int)SAMPLE_MEAN+1, count); for (int j = 0; j < cnt; ++j) { ElementType* v = dataset_[ind[j]]; for (size_t k=0; k<veclen_; ++k) { mean_[k] += v[k]; } } for (size_t k=0; k<veclen_; ++k) { mean_[k] /= cnt; } /* Compute variances (no need to divide by count). */ for (int j = 0; j < cnt; ++j) { ElementType* v = dataset_[ind[j]]; for (size_t k=0; k<veclen_; ++k) { DistanceType dist = v[k] - mean_[k]; var_[k] += dist * dist; } } /* Select one of the highest variance indices at random. */ cutfeat = selectDivision(var_); cutval = mean_[cutfeat]; int lim1, lim2; planeSplit(ind, count, cutfeat, cutval, lim1, lim2); if (lim1>count/2) index = lim1; else if (lim2<count/2) index = lim2; else index = count/2; /* If either list is empty, it means that all remaining features * are identical. Split in the middle to maintain a balanced tree. */ if ((lim1==count)||(lim2==0)) index = count/2; } /** * Select the top RAND_DIM largest values from v and return the index of * one of these selected at random. */ int selectDivision(DistanceType* v) { int num = 0; size_t topind[RAND_DIM]; /* Create a list of the indices of the top RAND_DIM values. */ for (size_t i = 0; i < veclen_; ++i) { if ((num < RAND_DIM)||(v[i] > v[topind[num-1]])) { /* Put this element at end of topind. */ if (num < RAND_DIM) { topind[num++] = i; /* Add to list. */ } else { topind[num-1] = i; /* Replace last element. */ } /* Bubble end value down to right location by repeated swapping. */ int j = num - 1; while (j > 0 && v[topind[j]] > v[topind[j-1]]) { std::swap(topind[j], topind[j-1]); --j; } } } /* Select a random integer in range [0,num-1], and return that index. */ int rnd = rand_int(num); return (int)topind[rnd]; } /** * Subdivide the list of points by a plane perpendicular on axe corresponding * to the 'cutfeat' dimension at 'cutval' position. * * On return: * dataset[ind[0..lim1-1]][cutfeat]<cutval * dataset[ind[lim1..lim2-1]][cutfeat]==cutval * dataset[ind[lim2..count]][cutfeat]>cutval */ void planeSplit(int* ind, int count, int cutfeat, DistanceType cutval, int& lim1, int& lim2) { /* Move vector indices for left subtree to front of list. */ int left = 0; int right = count-1; for (;; ) { while (left<=right && dataset_[ind[left]][cutfeat]<cutval) ++left; while (left<=right && dataset_[ind[right]][cutfeat]>=cutval) --right; if (left>right) break; std::swap(ind[left], ind[right]); ++left; --right; } lim1 = left; right = count-1; for (;; ) { while (left<=right && dataset_[ind[left]][cutfeat]<=cutval) ++left; while (left<=right && dataset_[ind[right]][cutfeat]>cutval) --right; if (left>right) break; std::swap(ind[left], ind[right]); ++left; --right; } lim2 = left; } /** * Performs an exact nearest neighbor search. The exact search performs a full * traversal of the tree. */ void getExactNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, float epsError) { // checkID -= 1; /* Set a different unique ID for each search. */ if (trees_ > 1) { fprintf(stderr,"It doesn't make any sense to use more than one tree for exact search"); } if (trees_>0) { searchLevelExact(result, vec, tree_roots_[0], 0.0, epsError); } assert(result.full()); } /** * Performs the approximate nearest-neighbor search. The search is approximate * because the tree traversal is abandoned after a given number of descends in * the tree. */ void getNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, int maxCheck, float epsError) { int i; BranchSt branch; int checkCount = 0; Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_); DynamicBitset checked(size_); /* Search once through each tree down to root. */ for (i = 0; i < trees_; ++i) { searchLevel(result, vec, tree_roots_[i], 0, checkCount, maxCheck, epsError, heap, checked); } /* Keep searching other branches from heap until finished. */ while ( heap->popMin(branch) && (checkCount < maxCheck || !result.full() )) { searchLevel(result, vec, branch.node, branch.mindist, checkCount, maxCheck, epsError, heap, checked); } delete heap; assert(result.full()); } /** * Search starting from a given node of the tree. Based on any mismatches at * higher levels, all exemplars below this level must have a distance of * at least "mindistsq". */ void searchLevel(ResultSet<DistanceType>& result_set, const ElementType* vec, NodePtr node, DistanceType mindist, int& checkCount, int maxCheck, float epsError, Heap<BranchSt>* heap, DynamicBitset& checked) { if (result_set.worstDist()<mindist) { // printf("Ignoring branch, too far\n"); return; } /* If this is a leaf node, then do check and return. */ if ((node->child1 == NULL)&&(node->child2 == NULL)) { /* Do not check same node more than once when searching multiple trees. Once a vector is checked, we set its location in vind to the current checkID. */ int index = node->divfeat; if ( checked.test(index) || ((checkCount>=maxCheck)&& result_set.full()) ) return; checked.set(index); checkCount++; DistanceType dist = distance_(dataset_[index], vec, veclen_); result_set.addPoint(dist,index); return; } /* Which child branch should be taken first? */ ElementType val = vec[node->divfeat]; DistanceType diff = val - node->divval; NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; /* Create a branch record for the branch not taken. Add distance of this feature boundary (we don't attempt to correct for any use of this feature in a parent node, which is unlikely to happen and would have only a small effect). Don't bother adding more branches to heap after halfway point, as cost of adding exceeds their value. */ DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); // if (2 * checkCount < maxCheck || !result.full()) { if ((new_distsq*epsError < result_set.worstDist())|| !result_set.full()) { heap->insert( BranchSt(otherChild, new_distsq) ); } /* Call recursively to search next level down. */ searchLevel(result_set, vec, bestChild, mindist, checkCount, maxCheck, epsError, heap, checked); } /** * Performs an exact search in the tree starting from a node. */ void searchLevelExact(ResultSet<DistanceType>& result_set, const ElementType* vec, const NodePtr node, DistanceType mindist, const float epsError) { /* If this is a leaf node, then do check and return. */ if ((node->child1 == NULL)&&(node->child2 == NULL)) { int index = node->divfeat; DistanceType dist = distance_(dataset_[index], vec, veclen_); result_set.addPoint(dist,index); return; } /* Which child branch should be taken first? */ ElementType val = vec[node->divfeat]; DistanceType diff = val - node->divval; NodePtr bestChild = (diff < 0) ? node->child1 : node->child2; NodePtr otherChild = (diff < 0) ? node->child2 : node->child1; /* Create a branch record for the branch not taken. Add distance of this feature boundary (we don't attempt to correct for any use of this feature in a parent node, which is unlikely to happen and would have only a small effect). Don't bother adding more branches to heap after halfway point, as cost of adding exceeds their value. */ DistanceType new_distsq = mindist + distance_.accum_dist(val, node->divval, node->divfeat); /* Call recursively to search next level down. */ searchLevelExact(result_set, vec, bestChild, mindist, epsError); if (new_distsq*epsError<=result_set.worstDist()) { searchLevelExact(result_set, vec, otherChild, new_distsq, epsError); } } private: enum { /** * To improve efficiency, only SAMPLE_MEAN random values are used to * compute the mean and variance at each level when building a tree. * A value of 100 seems to perform as well as using all values. */ SAMPLE_MEAN = 100, /** * Top random dimensions to consider * * When creating random trees, the dimension on which to subdivide is * selected at random from among the top RAND_DIM dimensions with the * highest variance. A value of 5 works well. */ RAND_DIM=5 }; /** * Number of randomized trees that are used */ int trees_; /** * Array of indices to vectors in the dataset. */ std::vector<int> vind_; /** * The dataset used by this index */ const Matrix<ElementType> dataset_; IndexParams index_params_; size_t size_; size_t veclen_; DistanceType* mean_; DistanceType* var_; /** * Array of k-d trees used to find neighbours. */ NodePtr* tree_roots_; /** * Pooled memory allocator. * * Using a pooled memory allocator is more efficient * than allocating memory directly when there is a large * number small of memory allocations. */ PooledAllocator pool_; Distance distance_; }; // class KDTreeForest } #endif //OPENCV_FLANN_KDTREE_INDEX_H_