#include <analyzer.h>
Inheritance diagram for EMAN::KMeansAnalyzer:


Public Member Functions | |
| KMeansAnalyzer () | |
| virtual int | insert_image (EMData *image) |
| insert a image to the list of input images | |
| virtual vector< EMData * > | analyze () |
| main function for Analyzer, analyze input images and create output images | |
| string | get_name () const |
| Get the Analyzer's name. | |
| string | get_desc () const |
| Get the Analyzer's description. | |
| void | set_params (const Dict &new_params) |
| Set the Analyzer parameters using a key/value dictionary. | |
| TypeDict | get_param_types () const |
| Get Analyzer parameter information in a dictionary. | |
Static Public Member Functions | |
| Analyzer * | NEW () |
Static Public Attributes | |
| const string | NAME = "kmeans" |
Protected Member Functions | |
| void | update_centers (int sigmas=0) |
| void | reclassify () |
| void | reseed () |
Protected Attributes | |
| vector< EMData * > | centers |
| int | ncls |
| int | verbose |
| int | minchange |
| int | maxiter |
| int | mininclass |
| int | nchanged |
| int | slowseed |
| int | calcsigmamean |
| verbose | Display progress if set, more detail with larger numbers (9 max) | |
| ncls | number of desired classes | |
| maxiter | maximum number of iterations | |
| minchange | Terminate if fewer than minchange members move in an iteration | |
| mininclass | Minumum number of particles to keep a class as good (not enforced at termination | |
| slowseed | Instead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations | |
| calcsigmamean | Computes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers |
Definition at line 138 of file analyzer.h.
|
|
Definition at line 141 of file analyzer.h.
|
|
|
main function for Analyzer, analyze input images and create output images
Implements EMAN::Analyzer. Definition at line 90 of file analyzer.cpp. References calcsigmamean, centers, get_xsize(), get_ysize(), get_zsize(), minchange, mininclass, nchanged, ncls, reclassify(), reseed(), slowseed, and update_centers(). 00091 {
00092 if (ncls<=1) return vector<EMData *>();
00093 //srandom(time(0));
00094
00095 // These are the class centers, start each with a random image
00096 int nptcl=images.size();
00097 int nclstot=ncls;
00098 if (calcsigmamean) centers.resize(nclstot*2);
00099 else centers.resize(nclstot);
00100 if (mininclass<1) mininclass=1;
00101
00102 for (int i=0; i<nptcl; i++) images[i]->set_attr("is_ok_center",(int)5); // if an image becomes part of too small a set, it will (eventually) be marked as a bad center
00103
00104 if (slowseed) {
00105 if (ncls>25) slowseed=ncls/25+1; // this becomes the number to seed in each step
00106 // if (maxiter<ncls*3+20) maxiter=ncls*3+20; // We need to make sure we have enough iterations to seed all of the classes
00107 // ncls=2;
00108 }
00109
00110 for (int i=0; i<ncls; i++) {
00111 // Fixed by d.woolford, Util.get_irand is inclusive (added a -1)
00112 centers[i]=images[Util::get_irand(0,nptcl-1)]->copy();
00113
00114 }
00115
00116 if (calcsigmamean) {
00117 for (int i=nclstot; i<nclstot*2; i++) centers[i]=new EMData(images[0]->get_xsize(),images[0]->get_ysize(),images[0]->get_zsize());
00118 }
00119
00120
00121 for (int i=0; i<maxiter; i++) {
00122 nchanged=0;
00123 reclassify();
00124 if (verbose) printf("iter %d> %d (%d)\n",i,nchanged,ncls);
00125 if (nchanged<minchange && ncls==nclstot) break;
00126 update_centers();
00127
00128 if (slowseed && i%3==2 && ncls<nclstot) {
00129 for (int j=0; j<slowseed && ncls<nclstot; j++) {
00130 centers[ncls]=0;
00131 ncls++;
00132 }
00133 reseed();
00134 }
00135 }
00136 update_centers(calcsigmamean);
00137
00138 return centers;
00139 }
|
|
|
Get the Analyzer's description.
Implements EMAN::Analyzer. Definition at line 155 of file analyzer.h. 00156 {
00157 return "k-means classification";
00158 }
|
|
|
Get the Analyzer's name. Each Analyzer is identified by a unique name.
Implements EMAN::Analyzer. Definition at line 150 of file analyzer.h. 00151 {
00152 return NAME;
00153 }
|
|
|
Get Analyzer parameter information in a dictionary. Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.
Implements EMAN::Analyzer. Definition at line 167 of file analyzer.h. References EMAN::TypeDict::put(). 00168 {
00169 TypeDict d;
00170 d.put("verbose", EMObject::INT, "Display progress if set, more detail with larger numbers (9 max)");
00171 d.put("ncls", EMObject::INT, "number of desired classes");
00172 d.put("maxiter", EMObject::INT, "maximum number of iterations");
00173 d.put("minchange", EMObject::INT, "Terminate if fewer than minchange members move in an iteration");
00174 d.put("mininclass", EMObject::INT, "Minumum number of particles to keep a class as good (not enforced at termination");
00175 d.put("slowseed",EMObject::INT, "Instead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations");
00176 d.put("calcsigmamean",EMObject::INT, "Computes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers");
00177 return d;
00178 }
|
|
|
insert a image to the list of input images
Implements EMAN::Analyzer. Definition at line 143 of file analyzer.h. References images. 00143 {
00144 images.push_back(image);
00145 return 0;
00146 }
|
|
|
Definition at line 160 of file analyzer.h. 00161 {
00162 return new KMeansAnalyzer();
00163 }
|
|
|
Definition at line 227 of file analyzer.cpp. References centers, EMAN::Cmp::cmp(), nchanged, and EMAN::Dict::size(). Referenced by analyze(). 00227 {
00228 int nptcl=images.size();
00229
00230 Cmp *c = Factory < Cmp >::get("sqeuclidean");
00231 for (int i=0; i<nptcl; i++) {
00232 float best=1.0e38f;
00233 int bestn=0;
00234 for (int j=0; j<ncls; j++) {
00235 float d=c->cmp(images[i],centers[j]);
00236 //images[i]->cmp("sqeuclidean",centers[j]);
00237 if (d<best) { best=d; bestn=j; }
00238 }
00239 int oldn=images[i]->get_attr_default("class_id",0);
00240 if (oldn!=bestn) nchanged++;
00241 images[i]->set_attr("class_id",bestn);
00242 }
00243 delete c;
00244 }
|
|
|
Definition at line 198 of file analyzer.cpp. References centers, EMAN::Dict::size(), and UnexpectedBehaviorException. Referenced by analyze(), and update_centers(). 00198 {
00199 int nptcl=images.size();
00200 int i,j;
00201
00202 // if no classes need reseeding just return
00203 for (i=0; i<ncls; i++) {
00204 if (!centers[i]) break;
00205 }
00206 if (i==ncls) return;
00207
00208 // make a list of all particles which could be centers
00209 vector<int> goodcen;
00210 for (int i=0; i<nptcl; i++) if ((int)images[i]->get_attr("is_ok_center")>0) goodcen.push_back(i);
00211
00212 if (goodcen.size()==0) throw UnexpectedBehaviorException("Kmeans ran out of valid center particles with the provided parameters");
00213
00214 // pick a random particle for the new seed
00215 for (i=0; i<ncls; i++) {
00216 if (centers[i]) continue; // center doesn't need reseeding
00217 j=Util::get_irand(0,goodcen.size()-1);
00218 centers[i]=images[j]->copy();
00219 centers[i]->set_attr("ptcl_repr",1);
00220 printf("reseed %d -> %d\n",i,j);
00221 }
00222
00223
00224 }
|
|
|
Set the Analyzer parameters using a key/value dictionary.
Reimplemented from EMAN::Analyzer. Definition at line 77 of file analyzer.cpp. References calcsigmamean, EMAN::Dict::has_key(), maxiter, minchange, mininclass, ncls, slowseed, and verbose. 00078 {
00079 params = new_params;
00080 if (params.has_key("ncls")) ncls = params["ncls"];
00081 if (params.has_key("maxiter"))maxiter = params["maxiter"];
00082 if (params.has_key("minchange"))minchange = params["minchange"];
00083 if (params.has_key("mininclass"))mininclass = params["mininclass"];
00084 if (params.has_key("slowseed"))slowseed = params["slowseed"];
00085 if (params.has_key("verbose"))verbose = params["verbose"];
00086 if (params.has_key("calcsigmamean")) calcsigmamean=params["calcsigmamean"];
00087
00088 }
|
|
|
Definition at line 141 of file analyzer.cpp. References centers, get_attr(), reseed(), EMAN::Dict::size(), sqrt(), and verbose. Referenced by analyze(). 00141 {
00142 int nptcl=images.size();
00143 //int repr[ncls];
00144 int * repr = new int[ncls];
00145
00146 for (int i=0; i<ncls; i++) {
00147 centers[i]->to_zero();
00148 if (sigmas) centers[i+ncls]->to_zero();
00149 repr[i]=0;
00150 }
00151
00152 // compute new position for each center
00153 for (int i=0; i<nptcl; i++) {
00154 int cid=images[i]->get_attr("class_id");
00155 if ((int)images[i]->get_attr("is_ok_center")>0) {
00156 centers[cid]->add(*images[i]);
00157 if (sigmas) centers[cid+ncls]->addsquare(*images[i]);
00158 repr[cid]++;
00159 }
00160 }
00161
00162 for (int i=0; i<ncls; i++) {
00163 // If this class is too small
00164 if (repr[i]<mininclass) {
00165 // find all of the particles in the class, and decrement their "is_ok_center" counter.
00166 // when it reaches zero the particle will no longer participate in determining the location of a center
00167 for (int j=0; j<nptcl; j++) {
00168 if ((int)images[j]->get_attr("class_id")==i) images[i]->set_attr("is_ok_center",(int)images[i]->get_attr("is_ok_center")-1);
00169 }
00170 // Mark the center for reseeding
00171 delete centers[i];
00172 centers[i]=0;
00173 repr[i]=0;
00174 }
00175 // finishes off the statistics we started computing above
00176 else {
00177 centers[i]->mult((float)1.0/(float)(repr[i]));
00178 centers[i]->set_attr("ptcl_repr",repr[i]);
00179 if (sigmas) {
00180 centers[i+ncls]->mult((float)1.0/(float)(repr[i])); // sum of squares over n
00181 centers[i+ncls]->subsquare(*centers[i]); // subtract the mean value squared
00182 centers[i+ncls]->process("math.sqrt"); // square root
00183 centers[i+ncls]->mult((float)1.0/(float)sqrt((float)repr[i])); // divide by sqrt(N) to get std. dev. of mean
00184 }
00185
00186 }
00187 if (verbose>1) printf("%d(%d)\t",i,(int)repr[i]);
00188 }
00189
00190 if (verbose>1) printf("\n");
00191
00192 reseed();
00193
00194 delete [] repr;
00195 }
|
|
|
Definition at line 195 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
|
Definition at line 187 of file analyzer.h. Referenced by analyze(), reclassify(), reseed(), and update_centers(). |
|
|
Definition at line 191 of file analyzer.h. Referenced by set_params(). |
|
|
Definition at line 190 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
|
Definition at line 192 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
|
Definition at line 54 of file analyzer.cpp. |
|
|
Definition at line 193 of file analyzer.h. Referenced by analyze(), and reclassify(). |
|
|
Definition at line 188 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
|
Definition at line 194 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
|
Definition at line 189 of file analyzer.h. Referenced by set_params(), and update_centers(). |
1.3.9.1