#include <analyzer.h>
Inheritance diagram for EMAN::KMeansAnalyzer:
Public Member Functions | |
KMeansAnalyzer () | |
virtual int | insert_image (EMData *image) |
insert a image to the list of input images | |
virtual vector< EMData * > | analyze () |
main function for Analyzer, analyze input images and create output images | |
string | get_name () const |
Get the Analyzer's name. | |
string | get_desc () const |
Get the Analyzer's description. | |
void | set_params (const Dict &new_params) |
Set the Analyzer parameters using a key/value dictionary. | |
TypeDict | get_param_types () const |
Get Analyzer parameter information in a dictionary. | |
Static Public Member Functions | |
Analyzer * | NEW () |
Static Public Attributes | |
const string | NAME = "kmeans" |
Protected Member Functions | |
void | update_centers (int sigmas=0) |
void | reclassify () |
void | reseed () |
Protected Attributes | |
vector< EMData * > | centers |
int | ncls |
int | verbose |
int | minchange |
int | maxiter |
int | mininclass |
int | nchanged |
int | slowseed |
int | calcsigmamean |
verbose | Display progress if set, more detail with larger numbers (9 max) | |
ncls | number of desired classes | |
maxiter | maximum number of iterations | |
minchange | Terminate if fewer than minchange members move in an iteration | |
mininclass | Minumum number of particles to keep a class as good (not enforced at termination | |
slowseed | Instead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations | |
calcsigmamean | Computes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers |
Definition at line 138 of file analyzer.h.
|
Definition at line 141 of file analyzer.h.
|
|
main function for Analyzer, analyze input images and create output images
Implements EMAN::Analyzer. Definition at line 90 of file analyzer.cpp. References calcsigmamean, centers, get_xsize(), get_ysize(), get_zsize(), maxiter, minchange, mininclass, nchanged, ncls, reclassify(), reseed(), slowseed, and update_centers(). 00091 { 00092 if (ncls<=1) return vector<EMData *>(); 00093 //srandom(time(0)); 00094 00095 // These are the class centers, start each with a random image 00096 int nptcl=images.size(); 00097 int nclstot=ncls; 00098 if (calcsigmamean) centers.resize(nclstot*2); 00099 else centers.resize(nclstot); 00100 if (mininclass<1) mininclass=1; 00101 00102 if (slowseed) { 00103 if (maxiter<ncls*3+20) maxiter=ncls*3+20; // We need to make sure we have enough iterations to seed all of the classes 00104 ncls=2; 00105 } 00106 00107 for (int i=0; i<ncls; i++) { 00108 // Fixed by d.woolford, Util.get_irand is inclusive (added a -1) 00109 centers[i]=images[Util::get_irand(0,nptcl-1)]->copy(); 00110 00111 } 00112 00113 if (calcsigmamean) { 00114 for (int i=nclstot; i<nclstot*2; i++) centers[i]=new EMData(images[0]->get_xsize(),images[0]->get_ysize(),images[0]->get_zsize()); 00115 } 00116 00117 00118 for (int i=0; i<maxiter; i++) { 00119 nchanged=0; 00120 reclassify(); 00121 if (verbose) printf("iter %d> %d (%d)\n",i,nchanged,ncls); 00122 if (nchanged<minchange && ncls==nclstot) break; 00123 update_centers(); 00124 00125 if (slowseed && i%3==2 && ncls<nclstot) { 00126 centers[ncls]=0; 00127 ncls++; 00128 reseed(); 00129 } 00130 } 00131 update_centers(calcsigmamean); 00132 00133 return centers; 00134 }
|
|
Get the Analyzer's description.
Implements EMAN::Analyzer. Definition at line 155 of file analyzer.h. 00156 { 00157 return "k-means classification"; 00158 }
|
|
Get the Analyzer's name. Each Analyzer is identified by a unique name.
Implements EMAN::Analyzer. Definition at line 150 of file analyzer.h. 00151 {
00152 return NAME;
00153 }
|
|
Get Analyzer parameter information in a dictionary. Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.
Implements EMAN::Analyzer. Definition at line 167 of file analyzer.h. References EMAN::TypeDict::put(). 00168 { 00169 TypeDict d; 00170 d.put("verbose", EMObject::INT, "Display progress if set, more detail with larger numbers (9 max)"); 00171 d.put("ncls", EMObject::INT, "number of desired classes"); 00172 d.put("maxiter", EMObject::INT, "maximum number of iterations"); 00173 d.put("minchange", EMObject::INT, "Terminate if fewer than minchange members move in an iteration"); 00174 d.put("mininclass", EMObject::INT, "Minumum number of particles to keep a class as good (not enforced at termination"); 00175 d.put("slowseed",EMObject::INT, "Instead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations"); 00176 d.put("calcsigmamean",EMObject::INT, "Computes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers"); 00177 return d; 00178 }
|
|
insert a image to the list of input images
Implements EMAN::Analyzer. Definition at line 143 of file analyzer.h. References images. 00143 { 00144 images.push_back(image); 00145 return 0; 00146 }
|
|
Definition at line 160 of file analyzer.h. 00161 { 00162 return new KMeansAnalyzer(); 00163 }
|
|
Definition at line 236 of file analyzer.cpp. References centers, EMAN::Cmp::cmp(), and nchanged. Referenced by analyze(). 00236 { 00237 int nptcl=images.size(); 00238 00239 Cmp *c = Factory < Cmp >::get("sqeuclidean"); 00240 for (int i=0; i<nptcl; i++) { 00241 float best=1.0e38f; 00242 int bestn=0; 00243 for (int j=0; j<ncls; j++) { 00244 float d=c->cmp(images[i],centers[j]); 00245 //images[i]->cmp("sqeuclidean",centers[j]); 00246 if (d<best) { best=d; bestn=j; } 00247 } 00248 int oldn=images[i]->get_attr_default("class_id",0); 00249 if (oldn!=bestn) nchanged++; 00250 images[i]->set_attr("class_id",bestn); 00251 } 00252 delete c; 00253 }
|
|
Definition at line 182 of file analyzer.cpp. References centers, EMAN::Cmp::cmp(), and EMAN::Dict::size(). Referenced by analyze(), and update_centers(). 00182 { 00183 // if no classes need reseeding just return 00184 int nptcl=images.size(); 00185 int i,j; 00186 for (i=0; i<ncls; i++) { 00187 if (!centers[i]) break; 00188 } 00189 if (i==ncls) return; 00190 00191 int * best = new int[ncls]; // particles in the average 00192 float *sigmas = new float[ncls]; // array of deviations 00193 00194 for (int i=0; i<ncls; i++) { sigmas[i]=0; best[i]=0; } 00195 00196 // compute the deviation of each class 00197 Cmp *c = Factory < Cmp >::get("sqeuclidean"); 00198 for (int i=0; i<nptcl; i++) { 00199 int cid=images[i]->get_attr("class_id"); 00200 if (!centers[cid]) continue; 00201 // sigmas[cid]+=(float)imc->get_attr("square_sum"); 00202 float d=c->cmp(images[i],centers[cid]); 00203 if (d>sigmas[cid]) { 00204 sigmas[cid]=d; // Instead of using sigma, use the largest distance in the class 00205 best[cid]=i; 00206 } 00207 } 00208 delete c; 00209 //for (i=0; i<ncls; i++) sigmas[i]/=repr[i]; //since we aren't doing a sigma now... 00210 00211 //we could sort the list, but for this use we just search 00212 for (i=0; i<ncls; i++) { 00213 if (centers[i]) continue; 00214 00215 float maxsig=0; 00216 int maxi=0; 00217 // find the class with the largest sigma 00218 for (j=0; j<ncls; j++) { 00219 if (sigmas[j]>maxsig) { maxsig=sigmas[j]; maxi=j; } 00220 } 00221 00222 // find an image in that class 00223 for (j=0; j<ncls; j++) if ((int)images[j]->get_attr("class_id")==maxi) break; 00224 if (Util::get_irand(0,1)==0) centers[i]=images[best[maxi]]->copy(); 00225 else centers[i]=images[j]->copy(); 00226 centers[i]->set_attr("ptcl_repr",1); 00227 sigmas[maxi]=0; // if we get another one to reseed, pick the next largest set (zero out the current one) 00228 printf("reseed %d -> %d (%d or %d)\n",i,maxi,best[maxi],j); 00229 } 00230 00231 delete [] sigmas; 00232 delete [] best; 00233 }
|
|
Set the Analyzer parameters using a key/value dictionary.
Reimplemented from EMAN::Analyzer. Definition at line 77 of file analyzer.cpp. References calcsigmamean, EMAN::Dict::has_key(), maxiter, minchange, mininclass, ncls, slowseed, and verbose. 00078 { 00079 params = new_params; 00080 if (params.has_key("ncls")) ncls = params["ncls"]; 00081 if (params.has_key("maxiter"))maxiter = params["maxiter"]; 00082 if (params.has_key("minchange"))minchange = params["minchange"]; 00083 if (params.has_key("mininclass"))mininclass = params["mininclass"]; 00084 if (params.has_key("slowseed"))slowseed = params["slowseed"]; 00085 if (params.has_key("verbose"))verbose = params["verbose"]; 00086 if (params.has_key("calcsigmamean")) calcsigmamean=params["calcsigmamean"]; 00087 00088 }
|
|
Definition at line 136 of file analyzer.cpp. References centers, reseed(), EMAN::Dict::size(), sqrt(), and verbose. Referenced by analyze(). 00136 { 00137 int nptcl=images.size(); 00138 //int repr[ncls]; 00139 int * repr = new int[ncls]; 00140 00141 for (int i=0; i<ncls; i++) { 00142 centers[i]->to_zero(); 00143 if (sigmas) centers[i+ncls]->to_zero(); 00144 repr[i]=0; 00145 } 00146 00147 for (int i=0; i<nptcl; i++) { 00148 int cid=images[i]->get_attr("class_id"); 00149 centers[cid]->add(*images[i]); 00150 if (sigmas) centers[cid+ncls]->addsquare(*images[i]); 00151 repr[cid]++; 00152 } 00153 00154 for (int i=0; i<ncls; i++) { 00155 if (repr[i]<mininclass) { 00156 delete centers[i]; 00157 centers[i]=0; 00158 repr[i]=0; 00159 } 00160 else { 00161 centers[i]->mult((float)1.0/(float)(repr[i])); 00162 centers[i]->set_attr("ptcl_repr",repr[i]); 00163 if (sigmas) { 00164 centers[i+ncls]->mult((float)1.0/(float)(repr[i])); // sum of squares over n 00165 centers[i+ncls]->subsquare(*centers[i]); // subtract the mean value squared 00166 centers[i+ncls]->process("math.sqrt"); // square root 00167 centers[i+ncls]->mult((float)1.0/(float)sqrt((float)repr[i])); // divide by sqrt(N) to get std. dev. of mean 00168 } 00169 00170 } 00171 if (verbose>1) printf("%d(%d)\t",i,(int)repr[i]); 00172 } 00173 00174 if (verbose>1) printf("\n"); 00175 00176 reseed(); 00177 00178 delete [] repr; 00179 }
|
|
Definition at line 195 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
Definition at line 187 of file analyzer.h. Referenced by analyze(), reclassify(), reseed(), and update_centers(). |
|
Definition at line 191 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
Definition at line 190 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
Definition at line 192 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
Definition at line 54 of file analyzer.cpp. |
|
Definition at line 193 of file analyzer.h. Referenced by analyze(), and reclassify(). |
|
Definition at line 188 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
Definition at line 194 of file analyzer.h. Referenced by analyze(), and set_params(). |
|
Definition at line 189 of file analyzer.h. Referenced by set_params(), and update_centers(). |