#include <analyzer.h>
Inheritance diagram for EMAN::KMeansAnalyzer:
Public Member Functions | |
KMeansAnalyzer () | |
virtual int | insert_image (EMData *image) |
insert a image to the list of input images | |
virtual vector< EMData * > | analyze () |
main function for Analyzer, analyze input images and create output images | |
string | get_name () const |
Get the Analyzer's name. | |
string | get_desc () const |
Get the Analyzer's description. | |
void | set_params (const Dict &new_params) |
Set the Analyzer parameters using a key/value dictionary. | |
TypeDict | get_param_types () const |
Get Analyzer parameter information in a dictionary. | |
Static Public Member Functions | |
static Analyzer * | NEW () |
Static Public Attributes | |
static const string | NAME = "kmeans" |
Protected Member Functions | |
void | update_centers (int sigmas=0) |
void | reclassify () |
void | reseed () |
Protected Attributes | |
vector< EMData * > | centers |
int | ncls |
int | verbose |
int | minchange |
int | maxiter |
int | mininclass |
int | nchanged |
int | slowseed |
int | calcsigmamean |
verbose | Display progress if set, more detail with larger numbers (9 max) | |
ncls | number of desired classes | |
maxiter | maximum number of iterations | |
minchange | Terminate if fewer than minchange members move in an iteration | |
mininclass | Minumum number of particles to keep a class as good (not enforced at termination | |
slowseed | Instead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations | |
calcsigmamean | Computes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers |
Definition at line 138 of file analyzer.h.
EMAN::KMeansAnalyzer::KMeansAnalyzer | ( | ) | [inline] |
vector< EMData * > KMeansAnalyzer::analyze | ( | ) | [virtual] |
main function for Analyzer, analyze input images and create output images
Implements EMAN::Analyzer.
Definition at line 90 of file analyzer.cpp.
References calcsigmamean, centers, copy(), EMAN::Util::get_irand(), get_xsize(), get_ysize(), get_zsize(), EMAN::Analyzer::images, maxiter, minchange, mininclass, nchanged, ncls, reclassify(), reseed(), set_attr(), slowseed, update_centers(), and verbose.
00091 { 00092 if (ncls<=1) return vector<EMData *>(); 00093 //srandom(time(0)); 00094 00095 // These are the class centers, start each with a random image 00096 int nptcl=images.size(); 00097 int nclstot=ncls; 00098 if (calcsigmamean) centers.resize(nclstot*2); 00099 else centers.resize(nclstot); 00100 if (mininclass<1) mininclass=1; 00101 00102 for (int i=0; i<nptcl; i++) images[i]->set_attr("is_ok_center",(int)5); // if an image becomes part of too small a set, it will (eventually) be marked as a bad center 00103 00104 if (slowseed) { 00105 if (ncls>25) slowseed=ncls/25+1; // this becomes the number to seed in each step 00106 // if (maxiter<ncls*3+20) maxiter=ncls*3+20; // We need to make sure we have enough iterations to seed all of the classes 00107 // ncls=2; 00108 } 00109 00110 for (int i=0; i<ncls; i++) { 00111 // Fixed by d.woolford, Util.get_irand is inclusive (added a -1) 00112 centers[i]=images[Util::get_irand(0,nptcl-1)]->copy(); 00113 00114 } 00115 00116 if (calcsigmamean) { 00117 for (int i=nclstot; i<nclstot*2; i++) centers[i]=new EMData(images[0]->get_xsize(),images[0]->get_ysize(),images[0]->get_zsize()); 00118 } 00119 00120 00121 for (int i=0; i<maxiter; i++) { 00122 nchanged=0; 00123 reclassify(); 00124 if (verbose) printf("iter %d> %d (%d)\n",i,nchanged,ncls); 00125 if (nchanged<minchange && ncls==nclstot) break; 00126 update_centers(); 00127 00128 if (slowseed && i%3==2 && ncls<nclstot) { 00129 for (int j=0; j<slowseed && ncls<nclstot; j++) { 00130 centers[ncls]=0; 00131 ncls++; 00132 } 00133 reseed(); 00134 } 00135 } 00136 update_centers(calcsigmamean); 00137 00138 return centers; 00139 }
string EMAN::KMeansAnalyzer::get_desc | ( | ) | const [inline, virtual] |
Get the Analyzer's description.
Implements EMAN::Analyzer.
Definition at line 155 of file analyzer.h.
string EMAN::KMeansAnalyzer::get_name | ( | ) | const [inline, virtual] |
Get the Analyzer's name.
Each Analyzer is identified by a unique name.
Implements EMAN::Analyzer.
Definition at line 150 of file analyzer.h.
References NAME.
00151 { 00152 return NAME; 00153 }
TypeDict EMAN::KMeansAnalyzer::get_param_types | ( | ) | const [inline, virtual] |
Get Analyzer parameter information in a dictionary.
Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.
Implements EMAN::Analyzer.
Definition at line 167 of file analyzer.h.
References EMAN::EMObject::INT, and EMAN::TypeDict::put().
00168 { 00169 TypeDict d; 00170 d.put("verbose", EMObject::INT, "Display progress if set, more detail with larger numbers (9 max)"); 00171 d.put("ncls", EMObject::INT, "number of desired classes"); 00172 d.put("maxiter", EMObject::INT, "maximum number of iterations"); 00173 d.put("minchange", EMObject::INT, "Terminate if fewer than minchange members move in an iteration"); 00174 d.put("mininclass", EMObject::INT, "Minumum number of particles to keep a class as good (not enforced at termination"); 00175 d.put("slowseed",EMObject::INT, "Instead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations"); 00176 d.put("calcsigmamean",EMObject::INT, "Computes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers"); 00177 return d; 00178 }
virtual int EMAN::KMeansAnalyzer::insert_image | ( | EMData * | image | ) | [inline, virtual] |
insert a image to the list of input images
image |
Implements EMAN::Analyzer.
Definition at line 143 of file analyzer.h.
References EMAN::Analyzer::images.
00143 { 00144 images.push_back(image); 00145 return 0; 00146 }
static Analyzer* EMAN::KMeansAnalyzer::NEW | ( | ) | [inline, static] |
Definition at line 160 of file analyzer.h.
References KMeansAnalyzer().
00161 { 00162 return new KMeansAnalyzer(); 00163 }
void KMeansAnalyzer::reclassify | ( | ) | [protected] |
Definition at line 227 of file analyzer.cpp.
References centers, EMAN::Cmp::cmp(), EMAN::Analyzer::images, nchanged, and ncls.
Referenced by analyze().
00227 { 00228 int nptcl=images.size(); 00229 00230 Cmp *c = Factory < Cmp >::get("sqeuclidean"); 00231 for (int i=0; i<nptcl; i++) { 00232 float best=1.0e38f; 00233 int bestn=0; 00234 for (int j=0; j<ncls; j++) { 00235 float d=c->cmp(images[i],centers[j]); 00236 //images[i]->cmp("sqeuclidean",centers[j]); 00237 if (d<best) { best=d; bestn=j; } 00238 } 00239 int oldn=images[i]->get_attr_default("class_id",0); 00240 if (oldn!=bestn) nchanged++; 00241 images[i]->set_attr("class_id",bestn); 00242 } 00243 delete c; 00244 }
void KMeansAnalyzer::reseed | ( | ) | [protected] |
Definition at line 198 of file analyzer.cpp.
References centers, get_attr(), EMAN::Util::get_irand(), EMAN::Analyzer::images, ncls, and UnexpectedBehaviorException.
Referenced by analyze(), and update_centers().
00198 { 00199 int nptcl=images.size(); 00200 int i,j; 00201 00202 // if no classes need reseeding just return 00203 for (i=0; i<ncls; i++) { 00204 if (!centers[i]) break; 00205 } 00206 if (i==ncls) return; 00207 00208 // make a list of all particles which could be centers 00209 vector<int> goodcen; 00210 for (int i=0; i<nptcl; i++) if ((int)images[i]->get_attr("is_ok_center")>0) goodcen.push_back(i); 00211 00212 if (goodcen.size()==0) throw UnexpectedBehaviorException("Kmeans ran out of valid center particles with the provided parameters"); 00213 00214 // pick a random particle for the new seed 00215 for (i=0; i<ncls; i++) { 00216 if (centers[i]) continue; // center doesn't need reseeding 00217 j=Util::get_irand(0,goodcen.size()-1); 00218 centers[i]=images[j]->copy(); 00219 centers[i]->set_attr("ptcl_repr",1); 00220 printf("reseed %d -> %d\n",i,j); 00221 } 00222 00223 00224 }
void KMeansAnalyzer::set_params | ( | const Dict & | new_params | ) | [virtual] |
Set the Analyzer parameters using a key/value dictionary.
new_params | A dictionary containing the new parameters. |
Reimplemented from EMAN::Analyzer.
Definition at line 77 of file analyzer.cpp.
References calcsigmamean, EMAN::Dict::has_key(), maxiter, minchange, mininclass, ncls, EMAN::Analyzer::params, slowseed, and verbose.
00078 { 00079 params = new_params; 00080 if (params.has_key("ncls")) ncls = params["ncls"]; 00081 if (params.has_key("maxiter"))maxiter = params["maxiter"]; 00082 if (params.has_key("minchange"))minchange = params["minchange"]; 00083 if (params.has_key("mininclass"))mininclass = params["mininclass"]; 00084 if (params.has_key("slowseed"))slowseed = params["slowseed"]; 00085 if (params.has_key("verbose"))verbose = params["verbose"]; 00086 if (params.has_key("calcsigmamean")) calcsigmamean=params["calcsigmamean"]; 00087 00088 }
void KMeansAnalyzer::update_centers | ( | int | sigmas = 0 |
) | [protected] |
Definition at line 141 of file analyzer.cpp.
References add(), addsquare(), centers, get_attr(), EMAN::Analyzer::images, mininclass, ncls, reseed(), set_attr(), sqrt(), to_zero(), and verbose.
Referenced by analyze().
00141 { 00142 int nptcl=images.size(); 00143 //int repr[ncls]; 00144 int * repr = new int[ncls]; 00145 00146 for (int i=0; i<ncls; i++) { 00147 centers[i]->to_zero(); 00148 if (sigmas) centers[i+ncls]->to_zero(); 00149 repr[i]=0; 00150 } 00151 00152 // compute new position for each center 00153 for (int i=0; i<nptcl; i++) { 00154 int cid=images[i]->get_attr("class_id"); 00155 if ((int)images[i]->get_attr("is_ok_center")>0) { 00156 centers[cid]->add(*images[i]); 00157 if (sigmas) centers[cid+ncls]->addsquare(*images[i]); 00158 repr[cid]++; 00159 } 00160 } 00161 00162 for (int i=0; i<ncls; i++) { 00163 // If this class is too small 00164 if (repr[i]<mininclass) { 00165 // find all of the particles in the class, and decrement their "is_ok_center" counter. 00166 // when it reaches zero the particle will no longer participate in determining the location of a center 00167 for (int j=0; j<nptcl; j++) { 00168 if ((int)images[j]->get_attr("class_id")==i) images[i]->set_attr("is_ok_center",(int)images[i]->get_attr("is_ok_center")-1); 00169 } 00170 // Mark the center for reseeding 00171 delete centers[i]; 00172 centers[i]=0; 00173 repr[i]=0; 00174 } 00175 // finishes off the statistics we started computing above 00176 else { 00177 centers[i]->mult((float)1.0/(float)(repr[i])); 00178 centers[i]->set_attr("ptcl_repr",repr[i]); 00179 if (sigmas) { 00180 centers[i+ncls]->mult((float)1.0/(float)(repr[i])); // sum of squares over n 00181 centers[i+ncls]->subsquare(*centers[i]); // subtract the mean value squared 00182 centers[i+ncls]->process("math.sqrt"); // square root 00183 centers[i+ncls]->mult((float)1.0/(float)sqrt((float)repr[i])); // divide by sqrt(N) to get std. dev. of mean 00184 } 00185 00186 } 00187 if (verbose>1) printf("%d(%d)\t",i,(int)repr[i]); 00188 } 00189 00190 if (verbose>1) printf("\n"); 00191 00192 reseed(); 00193 00194 delete [] repr; 00195 }
int EMAN::KMeansAnalyzer::calcsigmamean [protected] |
vector<EMData *> EMAN::KMeansAnalyzer::centers [protected] |
Definition at line 187 of file analyzer.h.
Referenced by analyze(), reclassify(), reseed(), and update_centers().
int EMAN::KMeansAnalyzer::maxiter [protected] |
int EMAN::KMeansAnalyzer::minchange [protected] |
int EMAN::KMeansAnalyzer::mininclass [protected] |
Definition at line 192 of file analyzer.h.
Referenced by analyze(), set_params(), and update_centers().
const string EMAN::KMeansAnalyzer::NAME = "kmeans" [static] |
int EMAN::KMeansAnalyzer::nchanged [protected] |
int EMAN::KMeansAnalyzer::ncls [protected] |
Definition at line 188 of file analyzer.h.
Referenced by analyze(), reclassify(), reseed(), set_params(), and update_centers().
int EMAN::KMeansAnalyzer::slowseed [protected] |
int EMAN::KMeansAnalyzer::verbose [protected] |
Definition at line 189 of file analyzer.h.
Referenced by analyze(), set_params(), and update_centers().