00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef GRAPHLAB_FACTORS_HPP
00019 #define GRAPHLAB_FACTORS_HPP
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <cassert>
00036 #include <cmath>
00037
00038 #include <iostream>
00039 #include <algorithm>
00040 #include <limits>
00041 #include <vector>
00042 #include <set>
00043
00044
00045
00046 #include <graphlab/util/random.hpp>
00047
00048 #include <graphlab/serialization/serialization_includes.hpp>
00049
00050 #include <graphlab/factors/discrete_variable.hpp>
00051 #include <graphlab/factors/discrete_domain.hpp>
00052 #include <graphlab/factors/discrete_assignment.hpp>
00053
00054
00055
00056
00057 #include <graphlab/macros_def.hpp>
00058
00059 namespace graphlab {
00060
00061
00062
00063
00064
00065 static const double EPSILON = std::numeric_limits<double>::min();
00066 static const double LOG_EPSILON = log(EPSILON);
00067 static const double MAX_DOUBLE = std::numeric_limits<double>::max();
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077 template<size_t MAX_DIM>
00078 class table_factor {
00079 public:
00080 typedef discrete_variable variable_type;
00081 typedef discrete_domain<MAX_DIM> domain_type;
00082 typedef discrete_assignment<MAX_DIM> assignment_type;
00083
00084 table_factor() { }
00085
00086 table_factor(const domain_type& dom) :
00087 _args(dom), _data(dom.size()) {
00088
00089 }
00090
00091 table_factor(const table_factor& other) :
00092 _args(other._args), _data(other._data) { }
00093
00094 table_factor& operator=(const table_factor& other) {
00095 _args = other._args;
00096 _data = other._data;
00097 return *this;
00098 }
00099
00100 void set_args(const domain_type& args) {
00101 _args = args;
00102 _data.resize(args.size());
00103
00104 }
00105
00106 const domain_type& args() const {
00107 return _args;
00108 }
00109
00110 const double& logP(size_t index) const {
00111 assert(index < _args.size());
00112 return _data[index];
00113 }
00114
00115 double& logP(size_t index) {
00116 assert(index < _args.size());
00117 return _data[index];
00118 }
00119
00120 const double& logP(const assignment_type& asg) const {
00121 if(asg.args() == args()) {
00122
00123 size_t index = asg.linear_index();
00124 assert(index < _data.size());
00125 return _data[index];
00126 } else {
00127
00128 assignment_type sub_asg = asg.restrict(_args);
00129 return _data[sub_asg.linear_index()];
00130 }
00131 }
00132
00133
00134 double& logP(const assignment_type& asg) {
00135 if(asg.args() == args()) {
00136
00137 size_t index = asg.linear_index();
00138 assert(index < _data.size());
00139 return _data[index];
00140 } else {
00141
00142 assignment_type sub_asg = asg.restrict(_args);
00143 return _data[sub_asg.linear_index()];
00144 }
00145 }
00146
00147 size_t size() const { return _args.size(); }
00148
00149 size_t num_vars() const { return _args.num_vars(); }
00150
00151 double* begin() { return _data.begin(); }
00152 const double* begin() const { return _data.begin(); }
00153
00154
00155
00156 double* end() { return _data.end(); }
00157 const double* end() const { return _data.end(); }
00158
00159 void zero() { std::fill(_data.begin(), _data.end(), 0); }
00160
00161 void uniform() {
00162 std::fill(_data.begin(), _data.end(),
00163 log(1.0/_data.size()));
00164 }
00165
00166 void uniform(double value) {
00167 std::fill(_data.begin(), _data.end(), value);
00168 }
00169
00170
00171
00172
00173 void normalize() {
00174
00175 double max_value = logP(0);
00176 for(size_t asg = 0; asg < size(); ++asg)
00177 max_value = std::max(max_value, logP(asg));
00178
00179
00180
00181 double Z = 0.0;
00182 for(size_t asg = 0; asg < size(); ++asg) {
00183 logP(asg) -= max_value;
00184 Z += exp(logP(asg));
00185 }
00186
00187
00188
00189 double logZ = log(Z);
00190 assert( !std::isinf(logZ) );
00191 assert( !std::isnan(logZ) );
00192
00193 for(size_t asg = 0; asg < size(); ++asg) {
00194 logP(asg) -= logZ;
00195 if(logP(asg) < LOG_EPSILON) logP(asg) = -MAX_DOUBLE;
00196 }
00197 }
00198
00199
00200
00201 inline table_factor& operator+=(const table_factor& other) {
00202 for(assignment_type asg = args().begin(); asg < args().end(); ++asg) {
00203 logP(asg.linear_index()) =
00204 log( exp(logP(asg.linear_index())) + exp(other.logP(asg)) );
00205
00206
00207
00208 }
00209 return *this;
00210 }
00211
00212
00213
00214
00215 inline table_factor& operator*=(const table_factor& other) {
00216 for(assignment_type asg = args().begin(); asg < args().end(); ++asg) {
00217 logP(asg.linear_index()) += other.logP(asg);
00218 if(std::isinf( logP(asg.linear_index()) ) ||
00219 std::isnan( logP(asg.linear_index()) ) ) {
00220 logP(asg.linear_index()) = -MAX_DOUBLE;
00221 }
00222
00223
00224 }
00225 return *this;
00226 }
00227
00228
00229 table_factor operator*(const table_factor& other) const {
00230 table_factor factor = *this;
00231 return factor *= other;
00232 }
00233
00234
00235 inline table_factor& operator/=(const table_factor& other) {
00236 for(assignment_type asg = args().begin(); asg < args().end(); ++asg) {
00237 logP(asg.linear_index()) -= other.logP(asg);
00238 assert( !std::isinf( logP(asg.linear_index()) ) );
00239 assert( !std::isnan( logP(asg.linear_index()) ) );
00240 }
00241 return *this;
00242 }
00243
00244
00245 table_factor operator/(const table_factor& other) const {
00246 table_factor factor = *this;
00247 return factor /= other;
00248 }
00249
00250
00251
00252
00253 inline void convolve(const table_factor& joint,
00254 const table_factor& other) {
00255
00256 assert(args() + other.args() == joint.args());
00257
00258
00259 uniform(0);
00260 for(assignment_type asg = joint.args().begin();
00261 asg < joint.args().end(); ++asg) {
00262 const double value =
00263 exp(joint.logP(asg.linear_index()) + other.logP(asg));
00264 assert( !std::isinf(value) );
00265 assert( !std::isnan(value) );
00266 logP(asg) += value;
00267 }
00268
00269 for(size_t i = 0; i < _data.size(); ++i) {
00270 double& sum = _data[i];
00271 assert(sum >= 0.0);
00272 if(sum == 0) sum = -MAX_DOUBLE;
00273 else sum = std::log(sum);
00274 }
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293 }
00294
00295
00296
00297 inline void condition(const table_factor& other,
00298 const assignment_type& asg) {
00299 assert(args() == other.args() - asg.args());
00300 for(assignment_type xasg = args().begin();
00301 xasg < args().end(); ++xasg) {
00302 assignment_type joint = xasg & asg;
00303 assert(joint.args() == other.args());
00304 logP(xasg.linear_index()) = other.logP(joint.linear_index());
00305 }
00306 }
00307
00308
00309
00310 inline void times_condition(const table_factor& other,
00311 const assignment_type& asg) {
00312
00313 if(asg.num_vars() == 0) {
00314 *this *= other;
00315 } else {
00316 for(assignment_type xasg = args().begin();
00317 xasg < args().end(); ++xasg) {
00318 assignment_type joint = xasg & asg;
00319 assert(joint.args() == other.args());
00320 logP(xasg.linear_index()) += other.logP(joint.linear_index());
00321 }
00322 }
00323 }
00324
00325
00326
00327 inline void marginalize(const table_factor& joint) {
00328
00329 if(args() == joint.args()) {
00330
00331 *this = joint;
00332 return;
00333 }
00334
00335 domain_type ydom = joint.args() - args();
00336 assert(ydom.num_vars() > 0);
00337
00338
00339 for(assignment_type xasg = args().begin();
00340 xasg < args().end(); ++xasg) {
00341 double sum = 0;
00342 for(assignment_type yasg = ydom.begin();
00343 yasg < ydom.end(); ++yasg) {
00344 assignment_type joint_asg = xasg & yasg;
00345 sum += exp(joint.logP(joint_asg.linear_index()));
00346 }
00347 assert( !std::isinf(sum) );
00348 assert( !std::isnan(sum) );
00349 assert(sum >= 0.0);
00350 if(sum == 0) logP(xasg.linear_index()) = -MAX_DOUBLE;
00351 else logP(xasg.linear_index()) = log(sum);
00352 }
00353 }
00354
00355
00356 inline void damp(const table_factor& other, double damping) {
00357
00358
00359 assert(args() == other.args());
00360 if(damping == 0) return;
00361 assert(damping >= 0.0);
00362 assert(damping < 1.0);
00363 for(size_t i = 0; i < args().size(); ++i) {
00364 double val = damping * exp(other.logP(i)) +
00365 (1-damping) * exp(logP(i));
00366 assert(val >= 0);
00367 if(val == 0) logP(i) = -MAX_DOUBLE;
00368 else logP(i) = log(val);
00369 assert( !std::isinf(logP(i)) );
00370 assert( !std::isnan(logP(i)) );
00371 }
00372 }
00373
00374
00375
00376 inline double l1_diff(const table_factor& other) const {
00377
00378
00379 assert(args() == other.args());
00380 double sum = 0;
00381 for(size_t i = 0; i < args().size(); ++i) {
00382 sum += fabs(exp(other.logP(i)) - exp(logP(i)));
00383 }
00384 return sum / args().size();
00385 }
00386
00387
00388
00389 inline double l1_logdiff(const table_factor& other) const {
00390 assert(args() == other.args());
00391 double sum = 0;
00392 for(size_t i = 0; i < args().size(); ++i) {
00393 sum += fabs(other.logP(i) - logP(i));
00394 }
00395 return sum / args().size();
00396 }
00397
00398
00399 inline assignment_type max_asg() const {
00400 assignment_type max_asg = args().begin();
00401 double max_value = logP(max_asg.linear_index());
00402 for(assignment_type asg = args().begin();
00403 asg < args().end(); ++asg) {
00404 if(logP(asg.linear_index()) > max_value) {
00405 max_value = logP(asg.linear_index());
00406 max_asg = asg;
00407 }
00408 }
00409 return max_asg;
00410 }
00411
00412 inline void expectation(std::vector<double>& values) const {
00413 values.clear();
00414 values.resize(num_vars(), 0);
00415 double sum = 0;
00416 for(assignment_type asg = args().begin();
00417 asg < args().end(); ++asg) {
00418 double scale = exp(logP(asg.linear_index()));
00419 sum += scale;
00420 for(size_t i = 0; i < num_vars(); ++i) {
00421 values[i] += asg.asg_at(i) * scale;
00422 }
00423 }
00424
00425 for(size_t i = 0; i < num_vars(); ++i) values[i] /= sum;
00426 }
00427
00428
00429 inline assignment_type sample() const {
00430 assert(size() > 0);
00431
00432 const double t = random::rand01();
00433 assert( t >= 0 && t < 1);
00434 double sum = 0;
00435 for(assignment_type asg = args().begin();
00436 asg < args().end(); ++asg) {
00437 sum += exp(logP(asg.linear_index()));
00438 if(t <= sum) return asg;
00439 assert(sum < 1);
00440 }
00441
00442 std::cout << "{";
00443 for(size_t i = 0; i < num_vars(); ++i) {
00444 std::cout << args().var(i).id() << " ";
00445 }
00446 std::cout << "}" << std::endl;
00447 assert(false);
00448 }
00449
00450 void set_as_agreement(double lambda) {
00451 assert(num_vars() == 2);
00452 for(assignment_type asg = args().begin();
00453 asg < args().end(); ++asg) {
00454 int diff = abs( int(asg.asg(0)) - int(asg.asg(1)) );
00455 if( diff > 0) logP(asg.linear_index()) = -lambda;
00456 else logP(asg.linear_index()) = 0;
00457 }
00458 }
00459
00460
00461
00462 void set_as_laplace(double lambda) {
00463 assert(num_vars() == 2);
00464 for(assignment_type asg = args().begin();
00465 asg < args().end(); ++asg) {
00466 int diff = abs( int(asg.asg_at(0)) - int(asg.asg_at(1)) );
00467 logP(asg.linear_index()) = -diff * lambda;
00468 }
00469 }
00470
00471 void load(iarchive& arc) {
00472 arc >> _args;
00473 arc >> _data;
00474 }
00475
00476 void save(oarchive& arc) const {
00477 arc << _args;
00478 arc << _data;
00479 }
00480
00481 private:
00482 domain_type _args;
00483 std::vector<double> _data;
00484 };
00485
00486
00487 };
00488
00489
00490
00491
00492
00493
00494
00495 template<size_t MAX_DIM>
00496 std::ostream& operator<<(std::ostream& out,
00497 const graphlab::table_factor<MAX_DIM>& factor) {
00498 typedef graphlab::table_factor<MAX_DIM> factor_type;
00499 typedef typename factor_type::assignment_type assignment_type;
00500 out << "Table Factor: " << factor.args() << "{" << std::endl;
00501 for(assignment_type asg = factor.args().begin();
00502 asg < factor.args().end(); ++asg) {
00503 out << "\tLogP(" << asg << ")=" << factor.logP(asg) << std::endl;
00504 }
00505 return out << "}";
00506 }
00507
00508
00509
00510
00511
00512 #include <graphlab/macros_undef.hpp>
00513 #endif
00514
00515
00516
00517
00518
00519
00520