dense_bitset.hpp

00001 /*
00002 This file is part of GraphLab.
00003 
00004 GraphLab is free software: you can redistribute it and/or modify
00005 it under the terms of the GNU Lesser General Public License as 
00006 published by the Free Software Foundation, either version 3 of 
00007 the License, or (at your option) any later version.
00008 
00009 GraphLab is distributed in the hope that it will be useful,
00010 but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 GNU Lesser General Public License for more details.
00013 
00014 You should have received a copy of the GNU Lesser General Public 
00015 License along with GraphLab.  If not, see <http://www.gnu.org/licenses/>.
00016 */
00017 
00018 #ifndef GRAPHLAB_DENSE_BITSET_HPP
00019 #define GRAPHLAB_DENSE_BITSET_HPP
00020 
00021 #include <cstdio>
00022 #include <cstdlib>
00023 #include <cstring>
00024 #include <stdint.h>
00025 #include <graphlab/logger/logger.hpp>
00026 #include <graphlab/serialization/serialization_includes.hpp>
00027 
00028 namespace graphlab {
00029   
00030   /**  \ingroup util
00031    *  Implements an atomic dense bitset
00032    */
00033   class dense_bitset {
00034   public:
00035 
00036     /// Constructs a bitset of 0 length
00037     dense_bitset() : array(NULL), len(0), arrlen(0) {
00038     }
00039 
00040     /// Constructs a bitset with 'size' bits. All bits will be cleared.
00041     dense_bitset(size_t size) : array(NULL), len(size) {
00042       resize(size);
00043       clear();
00044     }
00045 
00046     /// Make a copy of the bitset db
00047     dense_bitset(const dense_bitset &db) {
00048       array = NULL;
00049       len = 0;
00050       arrlen = 0;
00051       *this = db;
00052     }
00053     
00054     /// destructor
00055     ~dense_bitset() {free(array);}
00056   
00057     /// Make a copy of the bitset db
00058     inline dense_bitset& operator=(const dense_bitset& db) {
00059       resize(db.size());
00060       len = db.len;
00061       arrlen = db.arrlen;
00062       memcpy(array, db.array, sizeof(size_t) * arrlen);
00063       return *this;
00064     }
00065   
00066     /** Resizes the current bitset to hold n bits.
00067     Existing bits will not be changed. If the array size is increased,
00068     the value of the new bits are undefined
00069     */
00070     inline void resize(size_t n) {
00071       len = n;
00072       //need len bits
00073       arrlen = next_powerof2(n) / sizeof(size_t) + 1;
00074       array = (size_t*)realloc(array, sizeof(size_t) * arrlen);
00075     }
00076   
00077     /// Sets all bits to 0
00078     inline void clear() {
00079       for (size_t i = 0;i < arrlen; ++i) array[i] = 0;
00080     }
00081     
00082     /// Sets all bits to 1
00083     inline void fill() {
00084       for (size_t i = 0;i < arrlen; ++i) array[i] = (size_t)-1;
00085     }
00086 
00087     /// Prefetches the word containing the bit b
00088     inline void prefetch(uint32_t b) const{
00089       __builtin_prefetch(&(array[b / (8 * sizeof(size_t))]));
00090     }
00091     
00092     /// Returns the value of the bit b
00093     inline bool get(uint32_t b) const{
00094       uint32_t arrpos, bitpos;
00095       bit_to_pos(b, arrpos, bitpos);
00096       return array[arrpos] & (size_t(1) << size_t(bitpos));
00097     }
00098 
00099     //! Atomically sets the bit at position b to true returning the old value
00100     inline bool set_bit(uint32_t b) {
00101       // use CAS to set the bit
00102       uint32_t arrpos, bitpos;
00103       bit_to_pos(b, arrpos, bitpos);
00104       const size_t mask(size_t(1) << size_t(bitpos)); 
00105       return __sync_fetch_and_or(array + arrpos, mask) & mask;
00106     }
00107     
00108     /** Set the bit at position b to true returning the old value.
00109         Unlike set_bit(), this uses a non-atomic set which is faster,
00110         but is unsafe if accessed by multiple threads.
00111     */
00112     inline bool set_bit_unsync(uint32_t b) {
00113       // use CAS to set the bit
00114       uint32_t arrpos, bitpos;
00115       bit_to_pos(b, arrpos, bitpos);
00116       const size_t mask(size_t(1) << size_t(bitpos)); 
00117       bool ret = array[arrpos] & mask;
00118       array[arrpos] |= mask;
00119       return ret;
00120     }
00121 
00122     //! Atomically sets the state of the bit to the new value returning the old value
00123     inline bool set(uint32_t b, bool value) {
00124       if (value) return set_bit(b);
00125       else return clear_bit(b);
00126     }
00127 
00128     /** Set the state of the bit returning the old value.
00129       This version uses a non-atomic set which is faster, but
00130       is unsafe if accessed by multiple threads.
00131     */
00132     inline bool set_unsync(uint32_t b, bool value) {
00133       if (value) return set_bit_unsync(b);
00134       else return clear_bit_unsync(b);
00135     }
00136 
00137 
00138     //! Atomically set the bit at b to false returning the old value
00139     inline bool clear_bit(uint32_t b) {
00140       // use CAS to set the bit
00141       uint32_t arrpos, bitpos;
00142       bit_to_pos(b, arrpos, bitpos);
00143       const size_t test_mask(size_t(1) << size_t(bitpos)); 
00144       const size_t clear_mask(~test_mask); 
00145       return __sync_fetch_and_and(array + arrpos, clear_mask) & test_mask;
00146     }
00147 
00148     /** Clears the state of the bit returning the old value.
00149       This version uses a non-atomic set which is faster, but
00150       is unsafe if accessed by multiple threads.
00151     */
00152     inline bool clear_bit_unsync(uint32_t b) {
00153       // use CAS to set the bit
00154       uint32_t arrpos, bitpos;
00155       bit_to_pos(b, arrpos, bitpos);
00156       const size_t test_mask(size_t(1) << size_t(bitpos)); 
00157       const size_t clear_mask(~test_mask); 
00158       bool ret = array[arrpos] & test_mask;
00159       array[arrpos] &= clear_mask;
00160       return ret;
00161     }
00162 
00163     /** Returns true with b containing the position of the 
00164         first bit set to true.
00165         If such a bit does not exist, this function returns false.
00166     */
00167     inline bool first_bit(uint32_t &b) {
00168       for (size_t i = 0; i < arrlen; ++i) {
00169         if (array[i]) {
00170           b = (uint32_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
00171           return true;
00172         }
00173       }
00174       return false;
00175     }
00176 
00177     /** Where b is a bit index, this function will return in b,
00178         the position of the next bit set to true, and return true.
00179         If all bits after b are false, this function returns false.
00180     */
00181     inline bool next_bit(uint32_t &b) {
00182       // use CAS to set the bit
00183       uint32_t arrpos, bitpos;
00184       bit_to_pos(b, arrpos, bitpos);
00185       //try to find the next bit in this block
00186       bitpos = next_bit_in_block(bitpos, array[arrpos]);
00187       if (bitpos != 0) {
00188         b = (uint32_t)(arrpos * (sizeof(size_t) * 8)) + bitpos;
00189         return true;
00190       }
00191       else {
00192         // we have to loop through the rest of the array
00193         for (size_t i = arrpos + 1; i < arrlen; ++i) {
00194           if (array[i]) {
00195             b = (uint32_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
00196             return true;
00197           }
00198         }
00199       }
00200       return false;
00201     }
00202 
00203     ///  Returns the number of bits in this bitset
00204     inline size_t size() const {
00205       return len;
00206     }
00207     
00208     /// Serializes this bitset to an archive
00209     inline void save(oarchive& oarc) const {
00210       oarc <<len << arrlen;
00211       if (arrlen > 0) serialize(oarc, array, arrlen*sizeof(size_t));
00212     }
00213 
00214     /// Deserializes this bitset from an archive
00215     inline void load(iarchive& iarc) {
00216       if (array != NULL) free(array);
00217       array = NULL;
00218       iarc >> len >> arrlen;
00219       if (arrlen > 0) {
00220         array = (size_t*)malloc(arrlen*sizeof(size_t));
00221         deserialize(iarc, array, arrlen*sizeof(size_t));
00222       }
00223     }
00224 
00225   private:
00226    
00227     inline size_t next_powerof2(size_t val) {
00228       --val;
00229       val = val | (val >> 1);
00230       val = val | (val >> 2);
00231       val = val | (val >> 4);
00232       val = val | (val >> 8);
00233       val = val | (val >> 16);
00234 #ifdef _LP64
00235       val = val | (val >> 32);
00236 #endif
00237       return val + 1; 
00238     }
00239   
00240  
00241     inline static void bit_to_pos(uint32_t b, uint32_t& arrpos, uint32_t& bitpos) {
00242       // the compiler better optimize this...
00243       arrpos = b / (8 * sizeof(size_t));
00244       bitpos = b & (8 * sizeof(size_t) - 1);
00245     }
00246   
00247     // returns 0 on failure
00248     inline uint32_t next_bit_in_block(const uint32_t& b, const size_t& block) {
00249       // use CAS to set the bit
00250       size_t belowselectedbit = size_t(-1) - (((size_t(1) << b) - 1)|(size_t(1)<<b));
00251       size_t x = block & belowselectedbit ;
00252       if (x == 0) return 0;
00253       else return (uint32_t)__builtin_ctzl(x);
00254     }
00255 
00256     // returns 0 on failure
00257     inline uint32_t first_bit_in_block(const size_t& block) {
00258       // use CAS to set the bit
00259       if (block == 0) return 0;
00260       else return (uint32_t)__builtin_ctzl(block);
00261     }
00262 
00263     size_t* array;
00264     size_t len;
00265     size_t arrlen;
00266 
00267     template <int len>
00268     friend class fixed_dense_bitset;
00269   };
00270 
00271   
00272   
00273   /**
00274   Like bitset, but of a fixed length as defined by the template parameter
00275   */
00276   template <int len>
00277   class fixed_dense_bitset {
00278   public:
00279     /// Constructs a bitset of 0 length
00280     fixed_dense_bitset() {
00281       clear();
00282     }
00283     
00284    /// Make a copy of the bitset db
00285     fixed_dense_bitset(const fixed_dense_bitset &db) {
00286       *this = db;
00287     }
00288     
00289     /// destructor
00290     ~fixed_dense_bitset() {}
00291   
00292     /// Make a copy of the bitset db
00293     inline fixed_dense_bitset& operator=(const fixed_dense_bitset& db) {
00294       memcpy(array, db.array, sizeof(size_t) * arrlen);
00295       return *this;
00296     }
00297   
00298     /// Sets all bits to 0
00299     inline void clear() {
00300       for (size_t i = 0;i < arrlen; ++i) array[i] = 0;
00301     }
00302     
00303     /// Sets all bits to 1
00304     inline void fill() {
00305       for (size_t i = 0;i < arrlen; ++i) array[i] = -1;
00306     }
00307 
00308     /// Prefetches the word containing the bit b
00309     inline void prefetch(uint32_t b) const{
00310       __builtin_prefetch(&(array[b / (8 * sizeof(size_t))]));
00311     }
00312     
00313     /// Returns the value of the bit b
00314     inline bool get(uint32_t b) const{
00315       uint32_t arrpos, bitpos;
00316       bit_to_pos(b, arrpos, bitpos);
00317       return array[arrpos] & (size_t(1) << size_t(bitpos));
00318     }
00319 
00320     //! Atomically sets the bit at b to true returning the old value
00321     inline bool set_bit(uint32_t b) {
00322       // use CAS to set the bit
00323       uint32_t arrpos, bitpos;
00324       bit_to_pos(b, arrpos, bitpos);
00325       const size_t mask(size_t(1) << size_t(bitpos)); 
00326       return __sync_fetch_and_or(array + arrpos, mask) & mask;
00327     }
00328     
00329     /** Set the bit at position b to true returning the old value.
00330         Unlike set_bit(), this uses a non-atomic set which is faster,
00331         but is unsafe if accessed by multiple threads.
00332     */
00333     inline bool set_bit_unsync(uint32_t b) {
00334       // use CAS to set the bit
00335       uint32_t arrpos, bitpos;
00336       bit_to_pos(b, arrpos, bitpos);
00337       const size_t mask(size_t(1) << size_t(bitpos)); 
00338       bool ret = array[arrpos] & mask;
00339       array[arrpos] |= mask;
00340       return ret;
00341     }
00342 
00343     /** Set the state of the bit returning the old value.
00344       This version uses a non-atomic set which is faster, but
00345       is unsafe if accessed by multiple threads.
00346     */
00347     inline bool set(uint32_t b, bool value) {
00348       if (value) return set_bit(b);
00349       else return clear_bit(b);
00350     }
00351 
00352     /** Set the state of the bit returning the old value.
00353       This version uses a non-atomic set which is faster, but
00354       is unsafe if accessed by multiple threads.
00355     */
00356     inline bool set_unsync(uint32_t b, bool value) {
00357       if (value) return set_bit_unsync(b);
00358       else return clear_bit_unsync(b);
00359     }
00360 
00361 
00362     //! Atomically set the bit at b to false returning the old value
00363     inline bool clear_bit(uint32_t b) {
00364       // use CAS to set the bit
00365       uint32_t arrpos, bitpos;
00366       bit_to_pos(b, arrpos, bitpos);
00367       const size_t test_mask(size_t(1) << size_t(bitpos)); 
00368       const size_t clear_mask(~test_mask); 
00369       return __sync_fetch_and_and(array + arrpos, clear_mask) & test_mask;
00370     }
00371 
00372     /** Clears the state of the bit returning the old value.
00373       This version uses a non-atomic set which is faster, but
00374       is unsafe if accessed by multiple threads.
00375     */
00376     inline bool clear_bit_unsync(uint32_t b) {
00377       // use CAS to set the bit
00378       uint32_t arrpos, bitpos;
00379       bit_to_pos(b, arrpos, bitpos);
00380       const size_t test_mask(size_t(1) << size_t(bitpos)); 
00381       const size_t clear_mask(~test_mask); 
00382       bool ret = array[arrpos] & test_mask;
00383       array[arrpos] &= clear_mask;
00384       return ret;
00385     }
00386 
00387     /** Returns true with b containing the position of the 
00388         first bit set to true.
00389         If such a bit does not exist, this function returns false.
00390     */
00391     inline bool first_bit(uint32_t &b) {
00392       for (size_t i = 0; i < arrlen; ++i) {
00393         if (array[i]) {
00394           b = (uint32_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
00395           return true;
00396         }
00397       }
00398       return false;
00399     }
00400 
00401     /** Where b is a bit index, this function will return in b,
00402         the position of the next bit set to true, and return true.
00403         If all bits after b are false, this function returns false.
00404     */
00405     inline bool next_bit(uint32_t &b) {
00406       // use CAS to set the bit
00407       uint32_t arrpos, bitpos;
00408       bit_to_pos(b, arrpos, bitpos);
00409       //try to find the next bit in this block
00410       bitpos = next_bit_in_block(bitpos, array[arrpos]);
00411       if (bitpos != 0) {
00412         b = (uint32_t)(arrpos * (sizeof(size_t) * 8)) + bitpos;
00413         return true;
00414       }
00415       else {
00416         // we have to loop through the rest of the array
00417         for (size_t i = arrpos + 1; i < arrlen; ++i) {
00418           if (array[i]) {
00419             b = (uint32_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
00420             return true;
00421           }
00422         }
00423       }
00424       return false;
00425     }
00426     
00427     ///  Returns the number of bits in this bitset
00428     inline size_t size() const {
00429       return len;
00430     }
00431     
00432     /// Serializes this bitset to an archive
00433     inline void save(oarchive& oarc) const {
00434       oarc <<len << arrlen;
00435       if (arrlen > 0) serialize(oarc, array, arrlen*sizeof(size_t));
00436     }
00437 
00438     /// Deserializes this bitset from an archive
00439     inline void load(iarchive& iarc) {
00440       if (array != NULL) free(array);
00441       array = NULL;
00442       size_t l;
00443       size_t arl;
00444       iarc >> l >> arl;
00445       ASSERT_EQ(l, len);
00446       ASSERT_EQ(arl, arrlen);
00447       if (arrlen > 0) {
00448         deserialize(iarc, array, arrlen*sizeof(size_t));
00449       }
00450     }
00451 
00452   private:
00453    
00454     inline size_t next_powerof2(size_t val) {
00455       --val;
00456       val = val | (val >> 1);
00457       val = val | (val >> 2);
00458       val = val | (val >> 4);
00459       val = val | (val >> 8);
00460       val = val | (val >> 16);
00461 #ifdef _LP64
00462       val = val | (val >> 32);
00463 #endif
00464       return val + 1; 
00465     }
00466   
00467  
00468     inline static void bit_to_pos(uint32_t b, uint32_t &arrpos, uint32_t &bitpos) {
00469       // the compiler better optimize this...
00470       arrpos = b / (8 * sizeof(size_t));
00471       bitpos = b & (8 * sizeof(size_t) - 1);
00472     }
00473   
00474 
00475     // returns 0 on failure
00476     inline uint32_t next_bit_in_block(const uint32_t &b, const size_t &block) {
00477       // use CAS to set the bit
00478       size_t belowselectedbit = size_t(-1) - (((size_t(1) << b) - 1)|(size_t(1)<<b));
00479       size_t x = block & belowselectedbit ;
00480       if (x == 0) return 0;
00481       else return (uint32_t)__builtin_ctzl(x);
00482     }
00483 
00484     // returns 0 on failure
00485     inline uint32_t first_bit_in_block(const size_t &block) {
00486       // use CAS to set the bit
00487       if (block == 0) return 0;
00488       else return (uint32_t)__builtin_ctzl(block);
00489     }
00490 
00491     static const size_t arrlen;
00492     size_t array[len / sizeof(size_t) + 1];
00493   };
00494 
00495   template<int len>
00496   const size_t fixed_dense_bitset<len>::arrlen = len / sizeof(size_t) + 1;
00497 }
00498 #endif
00499