| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513 | 
							- /***********************************************************************
 
-  * Software License Agreement (BSD License)
 
-  *
 
-  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
 
-  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
 
-  *
 
-  * THE BSD LICENSE
 
-  *
 
-  * Redistribution and use in source and binary forms, with or without
 
-  * modification, are permitted provided that the following conditions
 
-  * are met:
 
-  *
 
-  * 1. Redistributions of source code must retain the above copyright
 
-  *    notice, this list of conditions and the following disclaimer.
 
-  * 2. Redistributions in binary form must reproduce the above copyright
 
-  *    notice, this list of conditions and the following disclaimer in the
 
-  *    documentation and/or other materials provided with the distribution.
 
-  *
 
-  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 
-  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 
-  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 
-  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 
-  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 
-  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 
-  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 
-  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 
-  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 
-  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-  *************************************************************************/
 
- /***********************************************************************
 
-  * Author: Vincent Rabaud
 
-  *************************************************************************/
 
- #ifndef OPENCV_FLANN_LSH_TABLE_H_
 
- #define OPENCV_FLANN_LSH_TABLE_H_
 
- #include <algorithm>
 
- #include <iostream>
 
- #include <iomanip>
 
- #include <limits.h>
 
- // TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP
 
- #ifdef __GXX_EXPERIMENTAL_CXX0X__
 
- #  define USE_UNORDERED_MAP 1
 
- #else
 
- #  define USE_UNORDERED_MAP 0
 
- #endif
 
- #if USE_UNORDERED_MAP
 
- #include <unordered_map>
 
- #else
 
- #include <map>
 
- #endif
 
- #include <math.h>
 
- #include <stddef.h>
 
- #include "dynamic_bitset.h"
 
- #include "matrix.h"
 
- namespace cvflann
 
- {
 
- namespace lsh
 
- {
 
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
- /** What is stored in an LSH bucket
 
-  */
 
- typedef uint32_t FeatureIndex;
 
- /** The id from which we can get a bucket back in an LSH table
 
-  */
 
- typedef unsigned int BucketKey;
 
- /** A bucket in an LSH table
 
-  */
 
- typedef std::vector<FeatureIndex> Bucket;
 
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
- /** POD for stats about an LSH table
 
-  */
 
- struct LshStats
 
- {
 
-     std::vector<unsigned int> bucket_sizes_;
 
-     size_t n_buckets_;
 
-     size_t bucket_size_mean_;
 
-     size_t bucket_size_median_;
 
-     size_t bucket_size_min_;
 
-     size_t bucket_size_max_;
 
-     size_t bucket_size_std_dev;
 
-     /** Each contained vector contains three value: beginning/end for interval, number of elements in the bin
 
-      */
 
-     std::vector<std::vector<unsigned int> > size_histogram_;
 
- };
 
- /** Overload the << operator for LshStats
 
-  * @param out the streams
 
-  * @param stats the stats to display
 
-  * @return the streams
 
-  */
 
- inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
 
- {
 
-     int w = 20;
 
-     out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
 
-     << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
 
-     << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
 
-     << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
 
-     << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
 
-     << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
 
-     << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
 
-     // Display the histogram
 
-     out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
 
-     << std::setiosflags(std::ios::left);
 
-     for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
 
-              stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ",  ";
 
-     return out;
 
- }
 
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
- /** Lsh hash table. As its key is a sub-feature, and as usually
 
-  * the size of it is pretty small, we keep it as a continuous memory array.
 
-  * The value is an index in the corpus of features (we keep it as an unsigned
 
-  * int for pure memory reasons, it could be a size_t)
 
-  */
 
- template<typename ElementType>
 
- class LshTable
 
- {
 
- public:
 
-     /** A container of all the feature indices. Optimized for space
 
-      */
 
- #if USE_UNORDERED_MAP
 
-     typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
 
- #else
 
-     typedef std::map<BucketKey, Bucket> BucketsSpace;
 
- #endif
 
-     /** A container of all the feature indices. Optimized for speed
 
-      */
 
-     typedef std::vector<Bucket> BucketsSpeed;
 
-     /** Default constructor
 
-      */
 
-     LshTable()
 
-     {
 
-         key_size_ = 0;
 
-         feature_size_ = 0;
 
-         speed_level_ = kArray;
 
-     }
 
-     /** Default constructor
 
-      * Create the mask and allocate the memory
 
-      * @param feature_size is the size of the feature (considered as a ElementType[])
 
-      * @param key_size is the number of bits that are turned on in the feature
 
-      */
 
-     LshTable(unsigned int feature_size, unsigned int key_size)
 
-     {
 
-         feature_size_ = feature_size;
 
-         (void)key_size;
 
-         std::cerr << "LSH is not implemented for that type" << std::endl;
 
-         assert(0);
 
-     }
 
-     /** Add a feature to the table
 
-      * @param value the value to store for that feature
 
-      * @param feature the feature itself
 
-      */
 
-     void add(unsigned int value, const ElementType* feature)
 
-     {
 
-         // Add the value to the corresponding bucket
 
-         BucketKey key = (lsh::BucketKey)getKey(feature);
 
-         switch (speed_level_) {
 
-         case kArray:
 
-             // That means we get the buckets from an array
 
-             buckets_speed_[key].push_back(value);
 
-             break;
 
-         case kBitsetHash:
 
-             // That means we can check the bitset for the presence of a key
 
-             key_bitset_.set(key);
 
-             buckets_space_[key].push_back(value);
 
-             break;
 
-         case kHash:
 
-         {
 
-             // That means we have to check for the hash table for the presence of a key
 
-             buckets_space_[key].push_back(value);
 
-             break;
 
-         }
 
-         }
 
-     }
 
-     /** Add a set of features to the table
 
-      * @param dataset the values to store
 
-      */
 
-     void add(Matrix<ElementType> dataset)
 
-     {
 
- #if USE_UNORDERED_MAP
 
-         buckets_space_.rehash((buckets_space_.size() + dataset.rows) * 1.2);
 
- #endif
 
-         // Add the features to the table
 
-         for (unsigned int i = 0; i < dataset.rows; ++i) add(i, dataset[i]);
 
-         // Now that the table is full, optimize it for speed/space
 
-         optimize();
 
-     }
 
-     /** Get a bucket given the key
 
-      * @param key
 
-      * @return
 
-      */
 
-     inline const Bucket* getBucketFromKey(BucketKey key) const
 
-     {
 
-         // Generate other buckets
 
-         switch (speed_level_) {
 
-         case kArray:
 
-             // That means we get the buckets from an array
 
-             return &buckets_speed_[key];
 
-             break;
 
-         case kBitsetHash:
 
-             // That means we can check the bitset for the presence of a key
 
-             if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
 
-             else return 0;
 
-             break;
 
-         case kHash:
 
-         {
 
-             // That means we have to check for the hash table for the presence of a key
 
-             BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
 
-             bucket_it = buckets_space_.find(key);
 
-             // Stop here if that bucket does not exist
 
-             if (bucket_it == bucket_end) return 0;
 
-             else return &bucket_it->second;
 
-             break;
 
-         }
 
-         }
 
-         return 0;
 
-     }
 
-     /** Compute the sub-signature of a feature
 
-      */
 
-     size_t getKey(const ElementType* /*feature*/) const
 
-     {
 
-         std::cerr << "LSH is not implemented for that type" << std::endl;
 
-         assert(0);
 
-         return 1;
 
-     }
 
-     /** Get statistics about the table
 
-      * @return
 
-      */
 
-     LshStats getStats() const;
 
- private:
 
-     /** defines the speed fo the implementation
 
-      * kArray uses a vector for storing data
 
-      * kBitsetHash uses a hash map but checks for the validity of a key with a bitset
 
-      * kHash uses a hash map only
 
-      */
 
-     enum SpeedLevel
 
-     {
 
-         kArray, kBitsetHash, kHash
 
-     };
 
-     /** Initialize some variables
 
-      */
 
-     void initialize(size_t key_size)
 
-     {
 
-         const size_t key_size_lower_bound = 1;
 
-         //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t
 
-         const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT);
 
-         if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound)
 
-         {
 
-             CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound));
 
-         }
 
-         speed_level_ = kHash;
 
-         key_size_ = (unsigned)key_size;
 
-     }
 
-     /** Optimize the table for speed/space
 
-      */
 
-     void optimize()
 
-     {
 
-         // If we are already using the fast storage, no need to do anything
 
-         if (speed_level_ == kArray) return;
 
-         // Use an array if it will be more than half full
 
-         if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {
 
-             speed_level_ = kArray;
 
-             // Fill the array version of it
 
-             buckets_speed_.resize(size_t(1) << key_size_);
 
-             for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
 
-             // Empty the hash table
 
-             buckets_space_.clear();
 
-             return;
 
-         }
 
-         // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two
 
-         // for the vector) or less than 512MB (key_size_ <= 30)
 
-         if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
 
-              >= (size_t(1) << key_size_)) || (key_size_ <= 32)) {
 
-             speed_level_ = kBitsetHash;
 
-             key_bitset_.resize(size_t(1) << key_size_);
 
-             key_bitset_.reset();
 
-             // Try with the BucketsSpace
 
-             for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
 
-         }
 
-         else {
 
-             speed_level_ = kHash;
 
-             key_bitset_.clear();
 
-         }
 
-     }
 
-     /** The vector of all the buckets if they are held for speed
 
-      */
 
-     BucketsSpeed buckets_speed_;
 
-     /** The hash table of all the buckets in case we cannot use the speed version
 
-      */
 
-     BucketsSpace buckets_space_;
 
-     /** What is used to store the data */
 
-     SpeedLevel speed_level_;
 
-     /** If the subkey is small enough, it will keep track of which subkeys are set through that bitset
 
-      * That is just a speedup so that we don't look in the hash table (which can be mush slower that checking a bitset)
 
-      */
 
-     DynamicBitset key_bitset_;
 
-     /** The size of the sub-signature in bits
 
-      */
 
-     unsigned int key_size_;
 
-     unsigned int feature_size_;
 
-     // Members only used for the unsigned char specialization
 
-     /** The mask to apply to a feature to get the hash key
 
-      * Only used in the unsigned char case
 
-      */
 
-     std::vector<size_t> mask_;
 
- };
 
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
- // Specialization for unsigned char
 
- template<>
 
- inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
 
- {
 
-     feature_size_ = feature_size;
 
-     initialize(subsignature_size);
 
-     // Allocate the mask
 
-     mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0);
 
-     // A bit brutal but fast to code
 
-     std::vector<int> indices(feature_size * CHAR_BIT);
 
-     for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i;
 
- #ifndef OPENCV_FLANN_USE_STD_RAND
 
-     cv::randShuffle(indices);
 
- #else
 
-     std::random_shuffle(indices.begin(), indices.end());
 
- #endif
 
-     // Generate a random set of order of subsignature_size_ bits
 
-     for (unsigned int i = 0; i < key_size_; ++i) {
 
-         size_t index = indices[i];
 
-         // Set that bit in the mask
 
-         size_t divisor = CHAR_BIT * sizeof(size_t);
 
-         size_t idx = index / divisor; //pick the right size_t index
 
-         mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset
 
-     }
 
-     // Set to 1 if you want to display the mask for debug
 
- #if 0
 
-     {
 
-         size_t bcount = 0;
 
-         BOOST_FOREACH(size_t mask_block, mask_){
 
-             out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
 
-                 << std::endl;
 
-             bcount += __builtin_popcountll(mask_block);
 
-         }
 
-         out << "bit count : " << std::dec << bcount << std::endl;
 
-         out << "mask size : " << mask_.size() << std::endl;
 
-         return out;
 
-     }
 
- #endif
 
- }
 
- /** Return the Subsignature of a feature
 
-  * @param feature the feature to analyze
 
-  */
 
- template<>
 
- inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
 
- {
 
-     // no need to check if T is dividable by sizeof(size_t) like in the Hamming
 
-     // distance computation as we have a mask
 
-     // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer
 
-     const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature);
 
-     // Figure out the subsignature of the feature
 
-     // Given the feature ABCDEF, and the mask 001011, the output will be
 
-     // 000CEF
 
-     size_t subsignature = 0;
 
-     size_t bit_index = 1;
 
-     for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) {
 
-         // get the mask and signature blocks
 
-         size_t feature_block;
 
-         if (i <= feature_size_ - sizeof(size_t))
 
-         {
 
-             feature_block = *feature_block_ptr;
 
-         }
 
-         else
 
-         {
 
-             size_t tmp = 0;
 
-             memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order
 
-             feature_block = tmp;
 
-         }
 
-         size_t mask_block = mask_[i / sizeof(size_t)];
 
-         while (mask_block) {
 
-             // Get the lowest set bit in the mask block
 
-             size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);
 
-             // Add it to the current subsignature if necessary
 
-             subsignature += (feature_block & lowest_bit) ? bit_index : 0;
 
-             // Reset the bit in the mask block
 
-             mask_block ^= lowest_bit;
 
-             // increment the bit index for the subsignature
 
-             bit_index <<= 1;
 
-         }
 
-         // Check the next feature block
 
-         ++feature_block_ptr;
 
-     }
 
-     return subsignature;
 
- }
 
- template<>
 
- inline LshStats LshTable<unsigned char>::getStats() const
 
- {
 
-     LshStats stats;
 
-     stats.bucket_size_mean_ = 0;
 
-     if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
 
-         stats.n_buckets_ = 0;
 
-         stats.bucket_size_median_ = 0;
 
-         stats.bucket_size_min_ = 0;
 
-         stats.bucket_size_max_ = 0;
 
-         return stats;
 
-     }
 
-     if (!buckets_speed_.empty()) {
 
-         for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
 
-             stats.bucket_sizes_.push_back((lsh::FeatureIndex)pbucket->size());
 
-             stats.bucket_size_mean_ += pbucket->size();
 
-         }
 
-         stats.bucket_size_mean_ /= buckets_speed_.size();
 
-         stats.n_buckets_ = buckets_speed_.size();
 
-     }
 
-     else {
 
-         for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
 
-             stats.bucket_sizes_.push_back((lsh::FeatureIndex)x->second.size());
 
-             stats.bucket_size_mean_ += x->second.size();
 
-         }
 
-         stats.bucket_size_mean_ /= buckets_space_.size();
 
-         stats.n_buckets_ = buckets_space_.size();
 
-     }
 
-     std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
 
-     //  BOOST_FOREACH(int size, stats.bucket_sizes_)
 
-     //          std::cout << size << " ";
 
-     //  std::cout << std::endl;
 
-     stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
 
-     stats.bucket_size_min_ = stats.bucket_sizes_.front();
 
-     stats.bucket_size_max_ = stats.bucket_sizes_.back();
 
-     // TODO compute mean and std
 
-     /*float mean, stddev;
 
-        stats.bucket_size_mean_ = mean;
 
-        stats.bucket_size_std_dev = stddev;*/
 
-     // Include a histogram of the buckets
 
-     unsigned int bin_start = 0;
 
-     unsigned int bin_end = 20;
 
-     bool is_new_bin = true;
 
-     for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
 
-          != end; )
 
-         if (*iterator < bin_end) {
 
-             if (is_new_bin) {
 
-                 stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
 
-                 stats.size_histogram_.back()[0] = bin_start;
 
-                 stats.size_histogram_.back()[1] = bin_end - 1;
 
-                 is_new_bin = false;
 
-             }
 
-             ++stats.size_histogram_.back()[2];
 
-             ++iterator;
 
-         }
 
-         else {
 
-             bin_start += 20;
 
-             bin_end += 20;
 
-             is_new_bin = true;
 
-         }
 
-     return stats;
 
- }
 
- // End the two namespaces
 
- }
 
- }
 
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
- #endif /* OPENCV_FLANN_LSH_TABLE_H_ */
 
 
  |