00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef GRAPHLAB_FACTORS_HPP
00019 #define GRAPHLAB_FACTORS_HPP
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <cassert>
00036 #include <cmath>
00037
00038 #include <iostream>
00039 #include <algorithm>
00040 #include <limits>
00041 #include <vector>
00042 #include <set>
00043
00044
00045
00046 #include <graphlab/util/random.hpp>
00047
00048 #include <graphlab/serialization/serialization_includes.hpp>
00049
00050 #include <graphlab/factors/discrete_variable.hpp>
00051 #include <graphlab/factors/discrete_domain.hpp>
00052 #include <graphlab/factors/discrete_assignment.hpp>
00053 #include <graphlab/factors/fast_discrete_assignment.hpp>
00054
00055
00056
00057
00058 #include <graphlab/macros_def.hpp>
00059
00060 namespace graphlab {
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078 template<size_t MAX_DIM>
00079 class table_factor {
00080 public:
00081 typedef discrete_variable variable_type;
00082 typedef discrete_domain<MAX_DIM> domain_type;
00083 typedef discrete_assignment<MAX_DIM> assignment_type;
00084
00085
00086
00087 table_factor() { }
00088
00089
00090 table_factor(const domain_type& dom) :
00091 _args(dom), _data(dom.size()) { }
00092
00093
00094 table_factor(const table_factor& other) :
00095 _args(other._args), _data(other._data) { }
00096
00097
00098 table_factor& operator=(const table_factor& other) {
00099 _args = other._args;
00100 _data = other._data;
00101 return *this;
00102 }
00103
00104 void set_args(const domain_type& args) {
00105 _args = args;
00106 _data.resize(args.size());
00107 }
00108
00109 const domain_type& args() const {
00110 return _args;
00111 }
00112
00113 const double& logP(size_t index) const {
00114 ASSERT_LT(index, _data.size());
00115 return _data[index];
00116 }
00117
00118 double& logP(size_t index) {
00119 ASSERT_LT(index, _data.size());
00120 return _data[index];
00121 }
00122
00123 const double& logP(const assignment_type& asg) const {
00124 if(asg.args() == args()) {
00125
00126 const size_t index(asg.linear_index());
00127 ASSERT_LT(index, _data.size());
00128 return _data[index];
00129 } else {
00130
00131 const assignment_type sub_asg = asg.restrict(_args);
00132 const size_t index(sub_asg.linear_index());
00133 ASSERT_LT(index, _data.size());
00134 return _data[index];
00135 }
00136 }
00137
00138
00139 double& logP(const assignment_type& asg) {
00140 if(asg.args() == args()) {
00141
00142 const size_t index(asg.linear_index());
00143 ASSERT_LT(index, _data.size());
00144 return _data[index];
00145 } else {
00146
00147 const assignment_type sub_asg = asg.restrict(_args);
00148 const size_t index(sub_asg.linear_index());
00149 ASSERT_LT(index, _data.size());
00150 return _data[sub_asg.linear_index()];
00151 }
00152 }
00153
00154 size_t size() const { return _args.size(); }
00155
00156 size_t num_vars() const { return _args.num_vars(); }
00157
00158 double* begin() { return _data.begin(); }
00159 const double* begin() const { return _data.begin(); }
00160
00161
00162
00163 double* end() { return _data.end(); }
00164 const double* end() const { return _data.end(); }
00165
00166 void zero() { std::fill(_data.begin(), _data.end(), 0); }
00167
00168 void uniform() {
00169 std::fill(_data.begin(), _data.end(),
00170 log(1.0/_data.size()));
00171 }
00172
00173 void uniform(double value) {
00174 std::fill(_data.begin(), _data.end(), value);
00175 }
00176
00177
00178
00179 void normalize() {
00180
00181 double max_value = logP(0);
00182 for(size_t i = 0; i < _data.size(); ++i)
00183 max_value = std::max(max_value, _data[i] );
00184
00185 double Z = 0.0;
00186 for(size_t i = 0; i < _data.size(); ++i)
00187 Z += exp( _data[i] -= max_value );
00188
00189
00190
00191 const double logZ(log(Z));
00192 ASSERT_FALSE( std::isinf(logZ) );
00193 ASSERT_FALSE( std::isnan(logZ) );
00194
00195 for(size_t i = 0; i < _data.size(); ++i) _data[i] -= logZ;
00196 }
00197
00198
00199
00200
00201
00202
00203 void shift_normalize() {
00204
00205 double max_value = logP(0);
00206 for(size_t i = 0; i < _data.size(); ++i)
00207 max_value = std::max(max_value, _data[i]);
00208 for(size_t i = 0; i < _data.size(); ++i) _data[i] -= max_value;
00209 }
00210
00211
00212
00213
00214
00215 bool is_finite() {
00216 for(size_t i = 0; i < _data.size(); ++i) {
00217 const bool is_inf( std::isinf( _data[i] ) );
00218 const bool is_nan( std::isnan( _data[i] ) );
00219 if( __builtin_expect( is_inf || is_nan, 0) ) return false;
00220 }
00221 return true;
00222 }
00223
00224
00225
00226 inline table_factor& operator+=(const table_factor& other) {
00227 if(args() == other.args()) {
00228 ASSERT_EQ(_data.size(), other._data.size());
00229
00230 for(size_t i = 0; i < _data.size(); ++i)
00231 _data[i] = log( exp(_data[i]) + exp(other._data[i]) );
00232 } else {
00233 for(assignment_type asg = args().begin(); asg < args().end(); ++asg) {
00234 logP(asg.linear_index()) =
00235 log( exp(logP(asg.linear_index())) + exp(other.logP(asg)) );
00236
00237
00238 }
00239 }
00240 return *this;
00241 }
00242
00243
00244
00245
00246 inline table_factor& operator*=(const table_factor& other) {
00247 if(args() == other.args()) {
00248 ASSERT_EQ(_data.size(), other._data.size());
00249
00250 for(size_t i = 0; i < _data.size(); ++i) _data[i] += other._data[i];
00251 } else {
00252 for(assignment_type asg = args().begin(); asg < args().end(); ++asg) {
00253 logP(asg.linear_index()) += other.logP(asg);
00254
00255
00256 }
00257 }
00258 return *this;
00259 }
00260
00261
00262 table_factor operator*(const table_factor& other) const {
00263 table_factor factor = *this;
00264 return factor *= other;
00265 }
00266
00267
00268 inline table_factor& operator/=(const table_factor& other) {
00269 if(args() == other.args()) {
00270 ASSERT_EQ(_data.size(), other._data.size());
00271
00272 for(size_t i = 0; i < _data.size(); ++i) _data[i] -= other._data[i];
00273 } else {
00274 for(assignment_type asg = args().begin(); asg < args().end(); ++asg) {
00275 logP(asg.linear_index()) -= other.logP(asg);
00276
00277
00278 }
00279 }
00280 return *this;
00281 }
00282
00283
00284 table_factor operator/(const table_factor& other) const {
00285 table_factor factor = *this;
00286 return factor /= other;
00287 }
00288
00289
00290
00291
00292 inline void convolve(const table_factor& joint,
00293 const table_factor& other) {
00294
00295 ASSERT_EQ(args() + other.args(), joint.args());
00296
00297
00298 uniform(0);
00299 for(assignment_type asg = joint.args().begin();
00300 asg < joint.args().end(); ++asg) {
00301 const double value =
00302 exp(joint.logP(asg.linear_index()) + other.logP(asg));
00303 ASSERT_FALSE(std::isinf( value ));
00304 ASSERT_FALSE(std::isnan( value ));
00305 logP(asg) += value;
00306 }
00307
00308 for(size_t i = 0; i < _data.size(); ++i) {
00309 double& sum = _data[i];
00310 ASSERT_GE(sum, 0.0);
00311 if(sum == 0) sum = -std::numeric_limits<double>::max();
00312 else sum = log(sum);
00313 }
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332 }
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351 inline void condition(const table_factor& other,
00352 const assignment_type& asg) {
00353 ASSERT_EQ(args(), other.args() - asg.args());
00354
00355
00356
00357 fast_discrete_assignment<MAX_DIM> fastyasg(args().begin() & asg);
00358
00359 fastyasg.transpose_to_start(args());
00360
00361 for(assignment_type xasg = args().begin();
00362 xasg < args().end(); ++xasg) {
00363 logP(xasg.linear_index()) = other.logP(fastyasg.linear_index());
00364 ++fastyasg;
00365 }
00366 }
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387 inline void times_condition(const table_factor& other,
00388 const assignment_type& asg) {
00389
00390
00391
00392
00393 fast_discrete_assignment<MAX_DIM> fastyasg(args().begin() & asg);
00394
00395 fastyasg.transpose_to_start(args());
00396 if(asg.num_vars() == 0) {
00397 *this *= other;
00398 } else {
00399 for(assignment_type xasg = args().begin();
00400 xasg < args().end(); ++xasg) {
00401 logP(xasg.linear_index()) += other.logP(fastyasg.linear_index());
00402 ++fastyasg;
00403 }
00404 }
00405 }
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439 inline void marginalize(const table_factor& joint) {
00440
00441 if(args() == joint.args()) {
00442
00443 *this = joint;
00444 return;
00445 }
00446
00447 domain_type ydom = joint.args() - args();
00448 ASSERT_GT(ydom.num_vars(), 0);
00449
00450 fast_discrete_assignment<MAX_DIM> fastyasg(joint.args());
00451 fastyasg.transpose_to_start(ydom);
00452
00453 size_t numel = 1;
00454 for (size_t i = 0;i < ydom.num_vars(); ++i) {
00455 numel *= ydom.var(i).size();
00456 }
00457
00458 for(assignment_type xasg = args().begin();
00459 xasg < args().end(); ++xasg) {
00460 double sum = 0;
00461 for(size_t i = 0;i < numel; ++i) {
00462 sum += exp(joint.logP(fastyasg.linear_index()));
00463 ++fastyasg;
00464 }
00465 ASSERT_FALSE( std::isinf(sum) );
00466 ASSERT_FALSE( std::isnan(sum) );
00467 ASSERT_GE(sum, 0.0);
00468 if(sum == 0)
00469 logP(xasg.linear_index()) = -std::numeric_limits<double>::max();
00470 else logP(xasg.linear_index()) = log(sum);
00471 }
00472 }
00473
00474
00475
00476 inline void damp(const table_factor& other, double damping) {
00477
00478
00479 if(damping == 0) return;
00480 ASSERT_EQ(args(), other.args());
00481 ASSERT_GT(damping, 0.0);
00482 ASSERT_LT(damping, 1.0);
00483 for(size_t i = 0; i < args().size(); ++i) {
00484 double val = damping * exp(other.logP(i)) +
00485 (1-damping) * exp(logP(i));
00486 ASSERT_GE(val, 0);
00487 if(val == 0) logP(i) = -std::numeric_limits<double>::max();
00488 else logP(i) = log(val);
00489 ASSERT_FALSE( std::isinf(logP(i)) );
00490 ASSERT_FALSE( std::isnan(logP(i)) );
00491 }
00492 }
00493
00494
00495
00496 inline double l1_diff(const table_factor& other) const {
00497
00498
00499 ASSERT_EQ(args(), other.args());
00500 double sum = 0;
00501 for(size_t i = 0; i < args().size(); ++i) {
00502 sum += fabs(exp(other.logP(i)) - exp(logP(i)));
00503 }
00504 return sum / args().size();
00505 }
00506
00507
00508
00509 inline double l1_logdiff(const table_factor& other) const {
00510 ASSERT_EQ(args(), other.args());
00511 double sum = 0;
00512 for(size_t i = 0; i < args().size(); ++i) {
00513 sum += fabs(other.logP(i) - logP(i));
00514 }
00515 return sum / args().size();
00516 }
00517
00518
00519 inline assignment_type max_asg() const {
00520 assignment_type max_asg = args().begin();
00521 double max_value = logP(max_asg.linear_index());
00522 for(assignment_type asg = args().begin();
00523 asg < args().end(); ++asg) {
00524 if(logP(asg.linear_index()) > max_value) {
00525 max_value = logP(asg.linear_index());
00526 max_asg = asg;
00527 }
00528 }
00529 return max_asg;
00530 }
00531
00532
00533
00534
00535 inline void expectation(std::vector<double>& values) const {
00536 values.clear();
00537 values.resize(num_vars(), 0);
00538 double sum = 0;
00539 for(assignment_type asg = args().begin();
00540 asg < args().end(); ++asg) {
00541 const double scale = exp(logP(asg.linear_index()));
00542 sum += scale;
00543 for(size_t i = 0; i < num_vars(); ++i) {
00544 values[i] += asg.asg_at(i) * scale;
00545 }
00546 }
00547
00548 for(size_t i = 0; i < num_vars(); ++i) values[i] /= sum;
00549 }
00550
00551
00552
00553
00554
00555 inline assignment_type sample() const {
00556 ASSERT_GT(size(), 0);
00557
00558 const double t = random::rand01();
00559 ASSERT_GE( t, 0 );
00560 ASSERT_LT( t, 1 );
00561 double sum = 0;
00562 for(size_t i = 0; i < _data.size(); ++i) {
00563 sum += exp( logP(i) );
00564 if(t <= sum) return assignment_type(args(), i) ;
00565 ASSERT_LT(sum, 1);
00566 }
00567
00568 throw("Invalid state reached in sample()");
00569 return assignment_type();
00570 }
00571
00572
00573
00574
00575 void set_as_agreement(double lambda) {
00576 ASSERT_EQ(num_vars(), 2);
00577 for(assignment_type asg = args().begin();
00578 asg < args().end(); ++asg) {
00579 const int diff = abs( int(asg.asg(0)) - int(asg.asg(1)) );
00580 if( diff > 0) logP(asg.linear_index()) = -lambda;
00581 else logP(asg.linear_index()) = 0;
00582 }
00583 }
00584
00585
00586
00587 void set_as_laplace(double lambda) {
00588 ASSERT_EQ(num_vars(), 2);
00589 for(assignment_type asg = args().begin();
00590 asg < args().end(); ++asg) {
00591 const int diff = abs( int(asg.asg_at(0)) - int(asg.asg_at(1)) );
00592 logP(asg.linear_index()) = -diff * lambda;
00593 }
00594 }
00595
00596 void load(iarchive& arc) {
00597 arc >> _args;
00598 arc >> _data;
00599 }
00600
00601 void save(oarchive& arc) const {
00602 arc << _args;
00603 arc << _data;
00604 }
00605
00606 private:
00607 domain_type _args;
00608 std::vector<double> _data;
00609 };
00610
00611
00612 };
00613
00614
00615
00616
00617
00618
00619
00620 template<size_t MAX_DIM>
00621 std::ostream& operator<<(std::ostream& out,
00622 const graphlab::table_factor<MAX_DIM>& factor) {
00623 typedef graphlab::table_factor<MAX_DIM> factor_type;
00624 typedef typename factor_type::assignment_type assignment_type;
00625 out << "Table Factor: " << factor.args() << "{" << std::endl;
00626 for(assignment_type asg = factor.args().begin();
00627 asg < factor.args().end(); ++asg) {
00628 out << "\tLogP(" << asg << ")=" << factor.logP(asg) << std::endl;
00629 }
00630 return out << "}";
00631 }
00632
00633
00634
00635
00636
00637 #include <graphlab/macros_undef.hpp>
00638 #endif
00639
00640
00641
00642
00643
00644
00645