stl_util.hpp

00001 /*
00002 This file is part of GraphLab.
00003 
00004 GraphLab is free software: you can redistribute it and/or modify
00005 it under the terms of the GNU Lesser General Public License as 
00006 published by the Free Software Foundation, either version 3 of 
00007 the License, or (at your option) any later version.
00008 
00009 GraphLab is distributed in the hope that it will be useful,
00010 but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 GNU Lesser General Public License for more details.
00013 
00014 You should have received a copy of the GNU Lesser General Public 
00015 License along with GraphLab.  If not, see <http://www.gnu.org/licenses/>.
00016 */
00017 
00018 // Probabilistic Reasoning Library (PRL)
00019 // Copyright 2009 (see AUTHORS.txt for a list of contributors)
00020 //
00021 // This library is free software; you can redistribute it and/or
00022 // modify it under the terms of the GNU Lesser General Public
00023 // License as published by the Free Software Foundation; either
00024 // version 2.1 of the License, or (at your option) any later version.
00025 //
00026 // This library is distributed in the hope that it will be useful,
00027 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00028 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00029 // Lesser General Public License for more details.
00030 //
00031 // You should have received a copy of the GNU Lesser General Public
00032 // License along with this library; if not, write to the Free Software
00033 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00034 
00035 #ifndef GRAPHLAB_STL_UTIL_HPP
00036 #define GRAPHLAB_STL_UTIL_HPP
00037 
00038 
00039 #include <set>
00040 #include <map>
00041 #include <vector>
00042 #include <algorithm>
00043 #include <iterator>
00044 #include <sstream>
00045 #include <iostream>
00046 #include <iomanip>
00047 
00048 
00049 #include <graphlab/serialization/serialize.hpp>
00050 #include <graphlab/serialization/set.hpp>
00051 #include <graphlab/serialization/map.hpp>
00052 
00053 #include <graphlab/macros_def.hpp>
00054 
00055 namespace graphlab {
00056 
00057   // Functions on sets
00058   //============================================================================
00059 
00060   /**
00061    * computes the union of two sets.
00062    */
00063   template <typename T>
00064   std::set<T> set_union(const std::set<T>& a, const std::set<T>& b) {
00065     std::set<T> output;
00066     std::set_union(a.begin(), a.end(), 
00067                    b.begin(), b.end(),
00068                    std::inserter(output, output.begin()));
00069     return output;
00070   }
00071   
00072   template <typename T>
00073   std::set<T> set_union(const std::set<T>& a, const T& b) {
00074     std::set<T> output = a;
00075     output.insert(b);
00076     return output;
00077   }
00078 
00079   template <typename T>
00080   std::set<T> set_intersect(const std::set<T>& a, const std::set<T>& b) {
00081     std::set<T> output;
00082     std::set_intersection(a.begin(), a.end(), 
00083                           b.begin(), b.end(),
00084                           std::inserter(output, output.begin()));
00085     return output;
00086   }
00087 
00088   template <typename T>
00089   std::set<T> set_difference(const std::set<T>& a, const std::set<T>& b) {
00090     std::set<T> output;
00091     std::set_difference(a.begin(), a.end(), 
00092                         b.begin(), b.end(),
00093                         std::inserter(output, output.begin()));
00094     return output;
00095   }
00096 
00097 
00098   template <typename T>
00099   std::set<T> set_difference(const std::set<T>& a, const T& b) {
00100     std::set<T> output = a;
00101     output.erase(b);
00102     return output;
00103   }
00104 
00105   //! @return 2 sets: <s in partition, s not in partition>
00106   template <typename T>
00107   std::pair<std::set<T>,std::set<T> > 
00108   set_partition(const std::set<T>& s, const std::set<T>& partition) {
00109     std::set<T> a, b;
00110     a = set_intersect(s, partition);
00111     b = set_difference(s, partition);
00112     return std::make_pair(a, b);
00113   }
00114 
00115   template <typename T>
00116   bool set_disjoint(const std::set<T>& a, const std::set<T>& b) {
00117     return (intersection_size(a,b) == 0);
00118   }
00119   
00120   template <typename T>
00121   bool set_equal(const std::set<T>& a, const std::set<T>& b) {
00122     if (a.size() != b.size()) return false;
00123     return a == b; // defined in <set>
00124   }
00125   
00126   template <typename T>
00127   bool includes(const std::set<T>& a, const std::set<T>& b) {
00128     return std::includes(a.begin(), a.end(), b.begin(), b.end());
00129   }
00130 
00131   /** 
00132    * Returns true if $a \subseteq b$
00133    */
00134   template <typename T>
00135   bool is_subset(const std::set<T>& a, const std::set<T>& b) {
00136     return includes(b, a);
00137   }
00138 
00139   template <typename T>
00140   bool is_superset(const std::set<T>& a,const std::set<T>& b) {
00141     return includes(a, b);
00142   }
00143   
00144   //! Writes a human representation of the set to the supplied stream.
00145   template <typename T>
00146   std::ostream& operator<<(std::ostream& out, const std::set<T>& s) {
00147     return print_range(out, s, '{', ' ', '}');
00148   }
00149 
00150   // Functions on maps
00151   //============================================================================
00152 
00153   /**
00154    * constant lookup in a map. assertion failure of key not found in map
00155    */
00156   template <typename Key, typename T>
00157   const T& safe_get(const std::map<Key, T>& map,
00158                     const Key& key) {
00159     typedef typename std::map<Key, T>::const_iterator iterator;
00160     iterator iter = map.find(key);
00161     ASSERT_TRUE(iter != map.end());
00162     return iter->second;
00163   } // end of safe_get
00164 
00165   /**
00166    * constant lookup in a map. If key is not found in map, 
00167    * 'default_value' is returned. Note that this can't return a reference
00168    * and must return a copy
00169    */
00170   template <typename Key, typename T>
00171   const T safe_get(const std::map<Key, T>& map,
00172                     const Key& key, const T default_value) {
00173     typedef typename std::map<Key, T>::const_iterator iterator;
00174     iterator iter = map.find(key);
00175     if (iter == map.end())   return default_value;
00176     else return iter->second;
00177   } // end of safe_get
00178 
00179   /**
00180    * Transform each key in the map using the key_map
00181    * transformation. The resulting map will have the form
00182    * output[key_map[i]] = map[i]
00183    */
00184   template <typename OldKey, typename NewKey, typename T>
00185   std::map<NewKey, T>
00186   rekey(const std::map<OldKey, T>& map,
00187         const std::map<OldKey, NewKey>& key_map) {
00188     std::map<NewKey, T> output;
00189     typedef std::pair<OldKey, T> pair_type;
00190     foreach(const pair_type& pair, map) {
00191       output[safe_get(key_map, pair.first)] = pair.second;
00192     }
00193     return output;
00194   }
00195 
00196   /**
00197    * Transform each key in the map using the key_map
00198    * transformation. The resulting map will have the form
00199    output[i] = remap[map[i]]
00200   */
00201   template <typename Key, typename OldT, typename NewT>
00202   std::map<Key, NewT>
00203   remap(const std::map<Key, OldT>& map,
00204         const std::map<OldT, NewT>& val_map) {
00205     std::map<Key, NewT> output;
00206     typedef std::pair<Key, OldT> pair_type;
00207     foreach(const pair_type& pair, map) {
00208       output[pair.first] = safe_get(val_map, pair.second);
00209     }
00210     return output;
00211   }
00212 
00213   /**
00214    * Inplace version of remap
00215    */
00216   template <typename Key, typename T>
00217   void remap(std::map<Key, T>& map,
00218              const std::map<T, T>& val_map) {
00219     typedef std::pair<Key, T> pair_type;
00220     foreach(pair_type& pair, map) {
00221       pair.second = safe_get(val_map, pair.second);
00222     }
00223   }
00224 
00225   /**
00226    * Computes the union of two maps
00227    */
00228   template <typename Key, typename T>
00229   std::map<Key, T> 
00230   map_union(const std::map<Key, T>& a,
00231             const std::map<Key, T>& b) {
00232     // Initialize the output map
00233     std::map<Key, T> output;
00234     std::set_union(a.begin(), a.end(),
00235                    b.begin(), b.end(),
00236                    std::inserter(output, output.begin()),
00237                    output.value_comp());
00238     return output;
00239   }
00240   
00241   /**
00242    * Computes the intersection of two maps
00243    */
00244   template <typename Key, typename T>
00245   std::map<Key, T> 
00246   map_intersect(const std::map<Key, T>& a,
00247                 const std::map<Key, T>& b) {
00248     // Initialize the output map
00249     std::map<Key, T> output;
00250     // compute the intersection
00251     std::set_intersection(a.begin(), a.end(),
00252                           b.begin(), b.end(),
00253                           std::inserter(output, output.begin()),
00254                           output.value_comp());
00255     return output;
00256   }
00257   
00258   /**
00259    * Returns the entries of a map whose keys show up in the set keys
00260    */
00261   template <typename Key, typename T>
00262   std::map<Key, T> 
00263   map_intersect(const std::map<Key, T>& m,
00264                 const std::set<Key>& keys) {
00265     std::map<Key, T> output;
00266     foreach(const Key& key, keys) {
00267       typename std::map<Key,T>::const_iterator it = m.find(key);
00268       if (it != m.end())
00269         output[key] = it->second;
00270     }
00271     return output;
00272   }
00273 
00274   /**
00275    * Computes the difference between two maps
00276    */
00277   template <typename Key, typename T>
00278   std::map<Key, T> 
00279   map_difference(const std::map<Key, T>& a,
00280                  const std::map<Key, T>& b) {
00281     // Initialize the output map
00282     std::map<Key, T> output;
00283     // compute the intersection
00284     std::set_difference(a.begin(), a.end(),
00285                         b.begin(), b.end(),
00286                         std::inserter(output, output.begin()),
00287                         output.value_comp());
00288     return output;
00289   }
00290 
00291 
00292   /**
00293    * Returns the set of keys in a map
00294    */
00295   template <typename Key, typename T>
00296   std::set<Key> keys(const std::map<Key, T>& map) {
00297     std::set<Key> output;
00298     typedef std::pair<Key, T> pair_type;
00299     foreach(const pair_type& pair, map) {
00300       output.insert(pair.first);
00301     }
00302     return output;
00303   }
00304 
00305   /**
00306    * Get teh set of keys in a map as a vector
00307    */
00308   template <typename Key, typename T>
00309   std::vector<Key> keys_as_vector(const std::map<Key, T>& map) {
00310     std::vector<Key> output(map.size());   
00311     typedef std::pair<Key, T> pair_type;
00312     size_t i = 0;
00313     foreach(const pair_type& pair, map) {
00314       output[i++] = pair.first;
00315     }
00316     return output;
00317   }
00318 
00319 
00320   /**
00321    * Gets the values from a map
00322    */
00323   template <typename Key, typename T>
00324   std::set<T> values(const std::map<Key, T>& map) {
00325     std::set<T> output;
00326     typedef std::pair<Key, T> pair_type;
00327     foreach(const pair_type& pair, map) {
00328       output.insert(pair.second);
00329     }
00330     return output;
00331   }
00332   
00333   template <typename Key, typename T>
00334   std::vector<T> values(const std::map<Key, T>& m, 
00335                         const std::set<Key>& keys) {
00336     std::vector<T> output;
00337 
00338     foreach(const Key &i, keys) {
00339       output.push_back(safe_get(m, i));
00340     }
00341     return output;
00342   }
00343   
00344   template <typename Key, typename T>
00345   std::vector<T> values(const std::map<Key, T>& m, 
00346                         const std::vector<Key>& keys) {
00347     std::vector<T> output;
00348     foreach(const Key &i, keys) {
00349       output.push_back(safe_get(m, i));
00350     }
00351     return output;
00352   }
00353   
00354   //! Creates an identity map (a map from elements to themselves)
00355   template <typename Key>
00356   std::map<Key, Key> make_identity_map(const std::set<Key>& keys) {
00357     std::map<Key, Key> m;
00358     foreach(const Key& key, keys) 
00359       m[key] = key;
00360     return m;
00361   }
00362 
00363   //! Writes a map to the supplied stream.
00364   template <typename Key, typename T>
00365   std::ostream& operator<<(std::ostream& out, const std::map<Key, T>& m) {
00366     out << "{";
00367     for (typename std::map<Key, T>::const_iterator it = m.begin(); 
00368          it != m.end();) {
00369       out << it->first << "-->" << it->second;
00370       if (++it != m.end()) out << " ";
00371     }
00372     out << "}";
00373     return out;
00374   }
00375 
00376   /** Removes white space (space and tabs) from the beginning and end of str,
00377       returning the resultant string
00378   */
00379   inline std::string trim(const std::string& str) {
00380     std::string::size_type pos1 = str.find_first_not_of(" \t");
00381     std::string::size_type pos2 = str.find_last_not_of(" \t");
00382     return str.substr(pos1 == std::string::npos ? 0 : pos1,
00383                       pos2 == std::string::npos ? str.size()-1 : pos2-pos1+1);
00384   }
00385 
00386   /**
00387   * Convenience function for using std streams to convert anything to a string
00388   */
00389   template<typename T>
00390   std::string tostr(const T& t) {
00391     std::stringstream strm;
00392     strm << t;
00393     return strm.str();
00394   }
00395 
00396   /**
00397   * Convenience function for using std streams to convert a string to anything
00398   */
00399   template<typename T>
00400   T fromstr(const std::string& str) {
00401     std::stringstream strm(str);
00402     T elem; 
00403     strm >> elem;
00404     assert(!strm.fail());
00405     return elem;
00406   }
00407 
00408   /**
00409   Returns a string representation of the number,
00410   padded to 'npad' characters using the pad_value character
00411   */
00412   inline std::string pad_number(const size_t number,
00413                                 const size_t npad,
00414                                 const char pad_value = '0') {
00415     std::stringstream strm;
00416     strm << std::setw((int)npad) << std::setfill(pad_value)
00417          << number;
00418     return strm.str();
00419   }
00420 
00421 
00422   // inline std::string change_suffix(const std::string& fname,
00423   //                                  const std::string& new_suffix) {             
00424   //   size_t pos = fname.rfind('.');
00425   //   assert(pos != std::string::npos); 
00426   //   const std::string new_base(fname.substr(0, pos));
00427   //   return new_base + new_suffix;
00428   // } // end of change_suffix
00429 
00430 
00431   /**
00432   Using splitchars as delimiters, splits the string into a vector of strings.
00433   if auto_trim is true, trim() is called on all the extracted strings
00434   before returning.
00435   */
00436   inline std::vector<std::string> strsplit(const std::string& str, 
00437                                            const std::string& splitchars,
00438                                            const bool auto_trim = false) {
00439     std::vector<std::string> tokens;
00440     for(size_t beg = 0, end = 0; end != std::string::npos; beg = end+1) {
00441       end = str.find_first_of(splitchars, beg);
00442       if(auto_trim) {
00443         if(end - beg > 0) {
00444           std::string tmp = trim(str.substr(beg, end - beg));
00445           if(!tmp.empty()) tokens.push_back(tmp);
00446         }
00447       } else tokens.push_back(str.substr(beg, end - beg));
00448     }
00449     return tokens;
00450     // size_t pos = 0;
00451     // while(1) {
00452     //   size_t nextpos = s.find_first_of(splitchars, pos);
00453     //   if (nextpos != std::string::npos) {
00454     //     ret.push_back(s.substr(pos, nextpos - pos));
00455     //     pos = nextpos + 1;
00456     //   } else {
00457     //     ret.push_back(s.substr(pos));
00458     //     break;
00459     //   }
00460     // }
00461     // return ret;
00462   }
00463 }; // end of namespace graphlab
00464 
00465 #include <graphlab/macros_undef.hpp>
00466 
00467 #endif
00468