IT++ Logo

mog_diag_kmeans.cpp

Go to the documentation of this file.
00001 
00031 #include <itpp/stat/mog_diag_kmeans.h>
00032 #include <iostream>
00033 
00034 namespace itpp {
00035 
00036   inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const {
00037     double acc = 0.0;
00038     for(int d=0;d<D;d++) { double tmp = x[d]-y[d]; acc += tmp*tmp; }
00039     return(acc);
00040   }
00041 
00042   void MOG_diag_kmeans_sup::assign_to_means() {
00043 
00044     for(int k=0;k<K;k++) c_count[k] = 0;
00045 
00046     for(int n=0;n<N;n++) {
00047 
00048       int k = 0;
00049       double min_dist = dist( c_means[k], c_X[n] );
00050       int k_winner = k;
00051 
00052       for(int k=1;k<K;k++) {
00053         double tmp_dist = dist( c_means[k], c_X[n] );
00054         if(tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; }
00055       }
00056 
00057       c_partitions[ k_winner ][ count[k_winner] ] = n;
00058       c_count[k_winner]++;
00059     }
00060   }
00061 
00062 
00063   void MOG_diag_kmeans_sup::recalculate_means() {
00064 
00065     for(int k=0;k<K;k++) {
00066 
00067       for(int d=0;d<D;d++)  c_tmpvec[d] = 0.0;
00068 
00069       int Nk = c_count[k];
00070 
00071       for(int n=0;n<Nk;n++) {
00072         double * x = c_X[ c_partitions[k][n] ];
00073         for(int d=0;d<D;d++)  c_tmpvec[d] += x[d];
00074       }
00075 
00076       if(Nk > 0) {
00077         double * c_mean = c_means[k];
00078         for(int d=0;d<D;d++)  c_mean[d] = c_tmpvec[d] / Nk;
00079       }
00080     }
00081 
00082   }
00083 
00084 
00085   bool MOG_diag_kmeans_sup::dezombify_means() {
00086 
00087     static int counter = 0;
00088 
00089     bool zombie_mean = false;
00090 
00091     int k = 0;
00092     int max_count = count[k];
00093     int k_hog = k;
00094 
00095     for(int k=1;k<K;k++)  if( c_count[k] > max_count ) { max_count = c_count[k]; k_hog = k; }
00096 
00097     for(int k=0;k<K;k++) {
00098       if( c_count[k] == 0 ) {
00099 
00100         zombie_mean = true;
00101         if(verbose)  it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean");
00102 
00103         if(k_hog == k) {
00104           it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k");
00105           return(false);
00106         }
00107 
00108         if( counter >= c_count[k_hog] )  counter = 0;
00109 
00110         double * c_mean = c_means[k];
00111         double * c_x = c_X[ c_partitions[k_hog][counter] ];
00112 
00113         for(int d=0;d<D;d++)  c_mean[d] = 0.5*( c_means[k_hog][d] + c_x[d] );
00114         counter++;
00115       }
00116 
00117     }
00118 
00119     if(zombie_mean)  assign_to_means();
00120 
00121     return(true);
00122   }
00123 
00124 
00125   double MOG_diag_kmeans_sup::measure_change() const {
00126 
00127     double tmp_dist = 0.0;
00128     for(int k=0;k<K;k++)   tmp_dist += dist( c_means[k], c_means_old[k] );
00129     return(tmp_dist);
00130   }
00131 
00132 
00133   void MOG_diag_kmeans_sup::initial_means() {
00134 
00135     for(int d=0;d<D;d++) c_tmpvec[d] = 0.0;
00136 
00137     for(int n=0;n<N;n++) {
00138       double * c_x = c_X[n];
00139       for(int d=0;d<D;d++)  c_tmpvec[d] += c_x[d];
00140     }
00141 
00142     for(int d=0;d<D;d++) c_tmpvec[d] /= N;
00143 
00144     int step = int(floor(double(N)/double(K)));
00145     for(int k=0;k<K;k++) {
00146       double * c_mean = c_means[k];
00147       double * c_x = c_X[k*step];
00148 
00149       for(int d=0;d<D;d++)  c_mean[d] = 0.5*(c_tmpvec[d] + c_x[d]);
00150     }
00151   }
00152 
00153 
00154   void MOG_diag_kmeans_sup::iterate() {
00155 
00156     for(int k=0;k<K;k++)  for(int d=0;d<D;d++)  c_means_old[k][d] = c_means[k][d];
00157 
00158     for(int i=0;i<max_iter;i++) {
00159 
00160       assign_to_means();
00161       if(!dezombify_means()) return;
00162       recalculate_means();
00163 
00164       double change = measure_change();
00165 
00166       if(verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << "  change = " << change << std::endl;
00167       if(change == 0) break;
00168 
00169       for(int k=0;k<K;k++)  for(int d=0;d<D;d++)   c_means_old[k][d] = c_means[k][d];
00170     }
00171 
00172   }
00173 
00174 
00175   void MOG_diag_kmeans_sup::calc_means() {
00176     initial_means();
00177     iterate();
00178   }
00179 
00180 
00181   void MOG_diag_kmeans_sup::calc_covs() {
00182 
00183     for(int k=0;k<K;k++) {
00184       int Nk = c_count[k];
00185 
00186       if(Nk >= 2) {
00187         double * c_mean = c_means[k];
00188 
00189         for(int d=0;d<D;d++) c_tmpvec[d] = 0.0;
00190 
00191         for(int n=0;n<Nk;n++) {
00192           double * c_x = c_X[ c_partitions[k][n] ];
00193           for(int d=0;d<D;d++) { double tmp = c_x[d] - c_mean[d];  c_tmpvec[d] += tmp*tmp; }
00194         }
00195 
00196         for(int d=0;d<D;d++)  c_diag_covs[k][d] = trust*(c_tmpvec[d] / (Nk-1.0) ) + (1.0-trust)*(1.0);
00197       }
00198       else {
00199         for(int d=0;d<D;d++)  c_diag_covs[k][d] = 1.0;
00200       }
00201     }
00202 
00203   }
00204 
00205 
00206   void MOG_diag_kmeans_sup::calc_weights() {
00207     for(int k=0;k<K;k++)  c_weights[k] = trust*(c_count[k] / double(N)) + (1.0-trust)*(1.0/K);
00208     }
00209 
00210 
00211   void MOG_diag_kmeans_sup::normalise_vectors() {
00212 
00213     for(int d=0;d<D;d++) {
00214       double acc = 0.0;  for(int n=0;n<N;n++)  acc += c_X[n][d];
00215       c_norm_mu[d] = acc / N;
00216     }
00217 
00218     for(int d=0;d<D;d++) {
00219       double acc = 0.0;  for(int n=0;n<N;n++) { double tmp = c_X[n][d] - c_norm_mu[d];  acc += tmp*tmp; }
00220       c_norm_sd[d] = std::sqrt(acc/(N-1));
00221     }
00222 
00223     for(int n=0;n<N;n++) for(int d=0;d<D;d++) {
00224       c_X[n][d] -= c_norm_mu[d];
00225       if(c_norm_sd[d] > 0.0)  c_X[n][d] /= c_norm_sd[d];
00226     }
00227   }
00228 
00229 
00230   void MOG_diag_kmeans_sup::unnormalise_vectors() {
00231 
00232     for(int n=0;n<N;n++)  for(int d=0;d<D;d++) {
00233       if(c_norm_sd[d] > 0.0)  c_X[n][d] *= c_norm_sd[d];
00234       c_X[n][d] += c_norm_mu[d];
00235     }
00236   }
00237 
00238 
00239   void MOG_diag_kmeans_sup::unnormalise_means() {
00240 
00241     for(int k=0;k<K;k++) for(int d=0;d<D;d++) {
00242       if(norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d];
00243       c_means[k][d] += norm_mu[d];
00244     }
00245   }
00246 
00247 
00248   void MOG_diag_kmeans_sup::run(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) {
00249 
00250     it_assert( model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid" );
00251     it_assert( (max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero" );
00252     it_assert( ((trust_in >= 0.0) && (trust_in <= 1.0) ), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)" );
00253 
00254     verbose = verbose_in;
00255 
00256     Array<vec> means_in = model_in.get_means();
00257     Array<vec> diag_covs_in = model_in.get_diag_covs();
00258     vec weights_in = model_in.get_weights();
00259 
00260     init( means_in, diag_covs_in, weights_in );
00261 
00262     means_in.set_size(0); diag_covs_in.set_size(0); weights_in.set_size(0);
00263 
00264     it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality" );
00265 
00266     N = X_in.size();
00267 
00268     if(K > N)    it_warning("MOG_diag_kmeans_sup::run(): K > N");
00269     else
00270     if(K > N/10) it_warning("MOG_diag_kmeans_sup::run(): K > N/10");
00271 
00272     max_iter = max_iter_in;
00273     trust = trust_in;
00274 
00275     means_old.set_size(K);  for(int k=0;k<K;k++) means_old(k).set_size(D);
00276     partitions.set_size(K);  for(int k=0;k<K;k++) partitions(k).set_size(N);
00277     count.set_size(K);
00278     tmpvec.set_size(D);
00279     norm_mu.set_size(D);
00280     norm_sd.set_size(D);
00281 
00282     c_X = enable_c_access(X_in);
00283     c_means_old = enable_c_access(means_old);
00284     c_partitions = enable_c_access(partitions);
00285     c_count = enable_c_access(count);
00286     c_tmpvec = enable_c_access(tmpvec);
00287     c_norm_mu = enable_c_access(norm_mu);
00288     c_norm_sd = enable_c_access(norm_sd);
00289 
00290     if(normalise_in)  normalise_vectors();
00291 
00292     calc_means();  if(normalise_in) { unnormalise_vectors(); unnormalise_means(); }
00293     calc_covs();
00294     calc_weights();
00295 
00296     model_in.init(means, diag_covs, weights);
00297 
00298     disable_c_access(c_X);
00299     disable_c_access(c_means_old);
00300     disable_c_access(c_partitions);
00301     disable_c_access(c_count);
00302     disable_c_access(c_tmpvec);
00303     disable_c_access(c_norm_mu);
00304     disable_c_access(c_norm_sd);
00305 
00306     means_old.set_size(0);
00307     partitions.set_size(0);
00308     count.set_size(0);
00309     tmpvec.set_size(0);
00310     norm_mu.set_size(0);
00311     norm_sd.set_size(0);
00312 
00313     cleanup();
00314 
00315   }
00316 
00317   //
00318   // convenience functions
00319 
00320   void MOG_diag_kmeans(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) {
00321     MOG_diag_kmeans_sup km;
00322     km.run(model_in, X_in, max_iter_in, trust_in, normalise_in, verbose_in);
00323   }
00324 
00325 
00326 }
00327 
SourceForge Logo

Generated on Sat Apr 19 10:41:15 2008 for IT++ by Doxygen 1.5.5