EMAN2
analyzer.cpp
Go to the documentation of this file.
00001 
00006 /*
00007  * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
00008  * Copyright (c) 2000-2006 Baylor College of Medicine
00009  *
00010  * This software is issued under a joint BSD/GNU license. You may use the
00011  * source code in this file under either license. However, note that the
00012  * complete EMAN2 and SPARX software packages have some GPL dependencies,
00013  * so you are responsible for compliance with the licenses of these packages
00014  * if you opt to use BSD licensing. The warranty disclaimer below holds
00015  * in either instance.
00016  *
00017  * This complete copyright notice must be included in any revised version of the
00018  * source code. Additional authorship citations may be added, but existing
00019  * author citations must be preserved.
00020  *
00021  * This program is free software; you can redistribute it and/or modify
00022  * it under the terms of the GNU General Public License as published by
00023  * the Free Software Foundation; either version 2 of the License, or
00024  * (at your option) any later version.
00025  *
00026  * This program is distributed in the hope that it will be useful,
00027  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00028  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00029  * GNU General Public License for more details.
00030  *
00031  * You should have received a copy of the GNU General Public License
00032  * along with this program; if not, write to the Free Software
00033  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00034  *
00035  * */
00036 
00037 #include <ctime>
00038 #include <memory>
00039 #include "emdata.h"
00040 #include "analyzer.h"
00041 #include "sparx/analyzer_sparx.h"
00042 #include "util.h"
00043 #include "cmp.h"
00044 #include "sparx/lapackblas.h"
00045 #include "sparx/varimax.h"
00046 
00047 using namespace EMAN;
00048 
00049 namespace EMAN {
00050 
00051         const string PCAsmall::NAME = "pca";
00052         const string PCAlarge::NAME = "pca_large";
00053         const string varimax::NAME = "varimax";
00054         const string InertiaMatrixAnalyzer::NAME = "inertiamatrix";
00055         const string ShapeAnalyzer::NAME = "shape";
00056         const string KMeansAnalyzer::NAME = "kmeans";
00057         const string SVDAnalyzer::NAME = "svd_gsl";
00058 
00059         template <> Factory < Analyzer >::Factory()
00060         {
00061                 force_add<PCAsmall>();
00062                 force_add<PCAlarge>();
00063                 force_add<varimax>();
00064                 force_add<InertiaMatrixAnalyzer>();
00065                 force_add<ShapeAnalyzer>();
00066                 force_add<KMeansAnalyzer>();
00067                 force_add<SVDAnalyzer>();
00068         }
00069 
00070 }
00071 
00072 int Analyzer::insert_images_list(vector<EMData *> image_list)
00073 {
00074         vector<EMData *>::const_iterator iter;
00075                 for(iter=image_list.begin(); iter!=image_list.end(); ++iter) {
00076                         insert_image(*iter);
00077                 }
00078         return 0;
00079 }
00080 
00081 vector<EMData *> InertiaMatrixAnalyzer::analyze() {
00082         int verbose = params.set_default("verbose",0);
00083         EMData *mx = new EMData(3,3);   // result is a 3x3 matrix
00084         mx->to_zero();
00085         ret.push_back(mx);
00086 
00087         if (images.size()!=1) throw ImageDimensionException("Inertia matrix computation accepts only a single volume as input");
00088         int nx=images[0]->get_xsize();
00089         int ny=images[0]->get_ysize();
00090         int nz=images[0]->get_zsize();
00091         if (nz==1 || ny==1 || nz==1) throw ImageDimensionException("Map must be 3-D");
00092 
00093         if (verbose>0) printf("Inertia volume size: %d %d %d\n",nx,ny,nz);
00094 
00095         for (int z=0; z<nz; z++) {
00096                 for (int y=0; y<ny; y++) {
00097                         for (int x=0; x<nx; x++) {
00098                                 int xx=x-nx/2;
00099                                 int yy=y-ny/2;
00100                                 int zz=z-nz/2;
00101                                 float v=images[0]->get_value_at(x,y,z);
00102                                 mx->set_value_at(0,0,mx->get_value_at(0,0)+v*(yy*yy+zz*zz));
00103                                 mx->set_value_at(0,1,mx->get_value_at(0,1)+v*(-xx*yy));
00104                                 mx->set_value_at(0,2,mx->get_value_at(0,2)+v*(-xx*zz));
00105                                 mx->set_value_at(1,0,mx->get_value_at(1,0)+v*(-xx*yy));
00106                                 mx->set_value_at(1,1,mx->get_value_at(1,1)+v*(zz*zz+xx*xx));
00107                                 mx->set_value_at(1,2,mx->get_value_at(1,2)+v*(-yy*zz));
00108                                 mx->set_value_at(2,0,mx->get_value_at(2,0)+v*(-xx*zz));
00109                                 mx->set_value_at(2,1,mx->get_value_at(2,1)+v*(-yy*zz));
00110                                 mx->set_value_at(2,2,mx->get_value_at(2,2)+v*(xx*xx+yy*yy));
00111                         }
00112                 }
00113         }
00114         mx->mult(1.0f/(nx*ny*nz));
00115 
00116         if (verbose>0) {
00117                 printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,0),mx->get_value_at(1,0),mx->get_value_at(2,0));
00118                 printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,1),mx->get_value_at(1,1),mx->get_value_at(2,1));
00119                 printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,2),mx->get_value_at(1,2),mx->get_value_at(2,2));
00120         }
00121 
00122         return ret;
00123 }
00124 
00125 vector<EMData *> ShapeAnalyzer::analyze() {
00126         int verbose = params.set_default("verbose",0);
00127         EMData *mx = new EMData(4,2,1); // result is 4 values
00128         mx->to_zero();
00129         ret.push_back(mx);
00130 
00131         if (images.size()!=1) throw ImageDimensionException("Shape computation accepts only a single volume as input");
00132         int nx=images[0]->get_xsize();
00133         int ny=images[0]->get_ysize();
00134         int nz=images[0]->get_zsize();
00135         if (nz==1 || ny==1 || nz==1) throw ImageDimensionException("Map must be 3-D");
00136 
00137         if (verbose>0) printf("Shape size: %d %d %d\n",nx,ny,nz);
00138 
00139         for (int z=0; z<nz; z++) {
00140                 for (int y=0; y<ny; y++) {
00141                         for (int x=0; x<nx; x++) {
00142                                 int xx=x-nx/2;
00143                                 int yy=y-ny/2;
00144                                 int zz=z-nz/2;
00145                                 float v=images[0]->get_value_at(x,y,z);
00146                                 mx->set_value_at(0,0,mx->get_value_at(0,0)+v*(xx*xx));
00147                                 mx->set_value_at(1,0,mx->get_value_at(1,0)+v*(yy*yy));
00148                                 mx->set_value_at(2,0,mx->get_value_at(2,0)+v*(zz*zz));
00149                                 mx->set_value_at(3,0,mx->get_value_at(3,0)+v*(xx*xx+yy*yy+zz*zz)); 
00150                                 // sum(m*r^2), in which r is the distance to the center. Used for minicircle classification
00151                                 mx->set_value_at(0,1,mx->get_value_at(0,0)+v*abs(xx));
00152                                 mx->set_value_at(1,1,mx->get_value_at(1,0)+v*abs(yy));
00153                                 mx->set_value_at(2,1,mx->get_value_at(2,0)+v*abs(zz));
00154                                 mx->set_value_at(3,1,mx->get_value_at(3,0)+v*(float)sqrt((float)(xx*xx+yy*yy+zz*zz)));
00155                         }
00156                 }
00157         }
00158         mx->mult(1.0f/(nx*ny*nz));
00159 
00160         return ret;
00161 }
00162 
00163 
00164 void KMeansAnalyzer::set_params(const Dict & new_params)
00165 {
00166         params = new_params;
00167         if (params.has_key("ncls")) ncls = params["ncls"];
00168         if (params.has_key("maxiter"))maxiter = params["maxiter"];
00169         if (params.has_key("minchange"))minchange = params["minchange"];
00170         if (params.has_key("mininclass"))mininclass = params["mininclass"];
00171         if (params.has_key("slowseed"))slowseed = params["slowseed"];
00172         if (params.has_key("verbose"))verbose = params["verbose"];
00173         if (params.has_key("calcsigmamean")) calcsigmamean=params["calcsigmamean"];
00174 
00175 }
00176 
00177 vector<EMData *> KMeansAnalyzer::analyze()
00178 {
00179 if (ncls<=1) return vector<EMData *>();
00180 //srandom(time(0));
00181 
00182 // These are the class centers, start each with a random image
00183 int nptcl=images.size();
00184 int nclstot=ncls;
00185 if (calcsigmamean) centers.resize(nclstot*2);
00186 else centers.resize(nclstot);
00187 if (mininclass<1) mininclass=1;
00188 
00189 for (int i=0; i<nptcl; i++) images[i]->set_attr("is_ok_center",(int)5);  // if an image becomes part of too small a set, it will (eventually) be marked as a bad center
00190 
00191 if (slowseed) {
00192         if (ncls>25) slowseed=ncls/25+1;        // this becomes the number to seed in each step
00193 //      if (maxiter<ncls*3+20) maxiter=ncls*3+20;       // We need to make sure we have enough iterations to seed all of the classes
00194 //      ncls=2;
00195 }
00196 
00197 for (int i=0; i<ncls; i++) {
00198         // Fixed by d.woolford, Util.get_irand is inclusive (added a -1)
00199         centers[i]=images[Util::get_irand(0,nptcl-1)]->copy();
00200 
00201 }
00202 
00203 if (calcsigmamean) {
00204         for (int i=nclstot; i<nclstot*2; i++) centers[i]=new EMData(images[0]->get_xsize(),images[0]->get_ysize(),images[0]->get_zsize());
00205 }
00206 
00207 
00208 for (int i=0; i<maxiter; i++) {
00209         nchanged=0;
00210         reclassify();
00211         if (verbose) printf("iter %d>  %d (%d)\n",i,nchanged,ncls);
00212         if (nchanged<minchange && ncls==nclstot) break;
00213         update_centers();
00214 
00215         if (slowseed && i%3==2 && ncls<nclstot) {
00216                 for (int j=0; j<slowseed && ncls<nclstot; j++) {
00217                         centers[ncls]=0;
00218                         ncls++;
00219                 }
00220                 reseed();
00221         }
00222 }
00223 update_centers(calcsigmamean);
00224 
00225 return centers;
00226 }
00227 
00228 void KMeansAnalyzer::update_centers(int sigmas) {
00229 int nptcl=images.size();
00230 //int repr[ncls];
00231 int * repr = new int[ncls];
00232 
00233 for (int i=0; i<ncls; i++) {
00234         centers[i]->to_zero();
00235         if (sigmas) centers[i+ncls]->to_zero();
00236         repr[i]=0;
00237 }
00238 
00239 // compute new position for each center
00240 for (int i=0; i<nptcl; i++) {
00241         int cid=images[i]->get_attr("class_id");
00242         if ((int)images[i]->get_attr("is_ok_center")>0) {
00243                 centers[cid]->add(*images[i]);
00244                 if (sigmas) centers[cid+ncls]->addsquare(*images[i]);
00245                 repr[cid]++;
00246         }
00247 }
00248 
00249 for (int i=0; i<ncls; i++) {
00250         // If this class is too small
00251         if (repr[i]<mininclass) {
00252                 // find all of the particles in the class, and decrement their "is_ok_center" counter.
00253                 // when it reaches zero the particle will no longer participate in determining the location of a center
00254                 for (int j=0; j<nptcl; j++) {
00255                         if ((int)images[j]->get_attr("class_id")==i) images[i]->set_attr("is_ok_center",(int)images[i]->get_attr("is_ok_center")-1);
00256                 }
00257                 // Mark the center for reseeding
00258                 delete centers[i];
00259                 centers[i]=0;
00260                 repr[i]=0;
00261         }
00262         // finishes off the statistics we started computing above
00263         else {
00264                 centers[i]->mult((float)1.0/(float)(repr[i]));
00265                 centers[i]->set_attr("ptcl_repr",repr[i]);
00266                 if (sigmas) {
00267                         centers[i+ncls]->mult((float)1.0/(float)(repr[i]));             // sum of squares over n
00268                         centers[i+ncls]->subsquare(*centers[i]);                                        // subtract the mean value squared
00269                         centers[i+ncls]->process("math.sqrt");                                  // square root
00270                         centers[i+ncls]->mult((float)1.0/(float)sqrt((float)repr[i]));          // divide by sqrt(N) to get std. dev. of mean
00271                 }
00272 
00273         }
00274         if (verbose>1) printf("%d(%d)\t",i,(int)repr[i]);
00275 }
00276 
00277 if (verbose>1) printf("\n");
00278 
00279 reseed();
00280 
00281 delete [] repr;
00282 }
00283 
00284 // This will look for any unassigned points and reseed each inside the class with the broadest distribution widely distributed
00285 void KMeansAnalyzer::reseed() {
00286 int nptcl=images.size();
00287 int i,j;
00288 
00289 // if no classes need reseeding just return
00290 for (i=0; i<ncls; i++) {
00291         if (!centers[i]) break;
00292 }
00293 if (i==ncls) return;
00294 
00295 // make a list of all particles which could be centers
00296 vector<int> goodcen;
00297 for (int i=0; i<nptcl; i++) if ((int)images[i]->get_attr("is_ok_center")>0) goodcen.push_back(i);
00298 
00299 if (goodcen.size()==0) throw UnexpectedBehaviorException("Kmeans ran out of valid center particles with the provided parameters");
00300 
00301 // pick a random particle for the new seed
00302 for (i=0; i<ncls; i++) {
00303         if (centers[i]) continue;               // center doesn't need reseeding
00304         j=Util::get_irand(0,goodcen.size()-1);
00305         centers[i]=images[j]->copy();
00306         centers[i]->set_attr("ptcl_repr",1);
00307         printf("reseed %d -> %d\n",i,j);
00308 }
00309 
00310 
00311 }
00312 
00313 
00314 void KMeansAnalyzer::reclassify() {
00315 int nptcl=images.size();
00316 
00317 Cmp *c = Factory < Cmp >::get("sqeuclidean");
00318 for (int i=0; i<nptcl; i++) {
00319         float best=1.0e38f;
00320         int bestn=0;
00321         for (int j=0; j<ncls; j++) {
00322                 float d=c->cmp(images[i],centers[j]);
00323 //images[i]->cmp("sqeuclidean",centers[j]);
00324                 if (d<best) { best=d; bestn=j; }
00325         }
00326         int oldn=images[i]->get_attr_default("class_id",0);
00327         if (oldn!=bestn) nchanged++;
00328         images[i]->set_attr("class_id",bestn);
00329 }
00330 delete c;
00331 }
00332 
00333 #define covmat(i,j) covmat[ ((j)-1)*nx + (i)-1 ]
00334 #define imgdata(i)  imgdata[ (i)-1 ]
00335 int PCAsmall::insert_image(EMData * image)
00336 {
00337         if(mask==0)
00338                 throw NullPointerException("Null mask image pointer, set_params() first");
00339 
00340    EMData *maskedimage = Util::compress_image_mask(image,mask);
00341 
00342    int nx = maskedimage->get_xsize();
00343    float *imgdata = maskedimage->get_data();
00344    if (nx != ncov) {
00345       fprintf(stderr,"insert_image: something is wrong...\n");
00346       exit(1);
00347    }
00348 
00349    // there is a faster version of the following rank-1 update
00350    nimages++;
00351    for (int j = 1; j <= nx; j++)
00352        for (int i = 1; i<=nx; i++) {
00353            covmat(i,j) += imgdata(i)*imgdata(j);
00354    }
00355 
00356    EMDeletePtr(maskedimage);
00357    return 0;
00358 }
00359 #undef covmat
00360 
00361 #define eigvec(i,j) eigvec[(j)*ncov + (i)]
00362 vector<EMData*> PCAsmall::analyze()
00363 {
00364         float *eigvec;
00365         int status = 0;
00366 //              printf("start analyzing..., ncov = %d\n", ncov);
00367         eigval = (float*)calloc(ncov,sizeof(float));
00368         eigvec = (float*)calloc(ncov*ncov,sizeof(float));
00369         status = Util::coveig(ncov, covmat, eigval, eigvec);
00370 //       for (int i=1; i<=nvec; i++) printf("eigval = %11.4e\n",
00371 //            eigval[ncov-i]);
00372 
00373         // pack eigenvectors into the return imagelist
00374         EMData *eigenimage = new EMData();
00375         eigenimage->set_size(ncov,1,1);
00376         float *rdata = eigenimage->get_data();
00377         for (int j = 1; j<= nvec; j++) {
00378             for (int i = 0; i < ncov; i++) rdata[i] = eigvec(i,ncov-j);
00379 
00380                 EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);
00381                 recons_eigvec->set_attr( "eigval", eigval[j-1] );
00382             images.push_back(recons_eigvec);
00383         }
00384 
00385         free(eigvec);
00386         EMDeletePtr(eigenimage);
00387 
00388         return images;
00389 }
00390 #undef eigvec
00391 
00392 void PCAsmall::set_params(const Dict & new_params)
00393 {
00394         params = new_params;
00395         mask = params["mask"];
00396         nvec = params["nvec"];
00397 
00398         // count the number of pixels under the mask
00399         // (this is really ugly!!!)
00400         EMData *dummy = new EMData();
00401 
00402         int nx = mask->get_xsize();
00403         int ny = mask->get_ysize();
00404         int nz = mask->get_zsize();
00405 
00406         dummy->set_size(nx,ny,nz);
00407 
00408         EMData *dummy1d = Util::compress_image_mask(dummy,mask);
00409         ncov = dummy1d->get_xsize();
00410         EMDeletePtr(dummy);
00411         EMDeletePtr(dummy1d);
00412 
00413         // allocate and set up the covriance matrix
00414         nimages = 0;
00415         covmat = (float*)calloc(ncov*ncov,sizeof(float));
00416 }
00417 
00418 //------------------------------------------------------------------
00419 // for large-scale PCA incore
00420 
00421 int PCAlarge::insert_image(EMData * image)
00422 {
00423         if(mask==0)
00424                 throw NullPointerException("Null mask image pointer, set_params() first");
00425 
00426    EMData *maskedimage = Util::compress_image_mask(image,mask);
00427 
00428    FILE *fp;
00429    string scratchfile = string("maskedimages.scratch");
00430 
00431    fp = fopen(scratchfile.c_str(),"ab");
00432 
00433    int nx = maskedimage->get_xsize();
00434    float *imgdata = maskedimage->get_data();
00435    fwrite(imgdata, sizeof(float), nx, fp);
00436    nimages++;
00437 
00438    fclose(fp);
00439 
00440    EMDeletePtr(maskedimage);
00441 
00442    return 0;
00443 }
00444 
00445 void PCAlarge::set_params(const Dict & new_params)
00446 {
00447         params = new_params;
00448         mask = params["mask"];
00449         nvec = params["nvec"];
00450 
00451         // count the number of pixels under the mask
00452         // (this is really ugly!!!)
00453         EMData *dummy = new EMData();
00454 
00455         int nx = mask->get_xsize();
00456         int ny = mask->get_ysize();
00457         int nz = mask->get_zsize();
00458 
00459         dummy->set_size(nx,ny,nz);
00460 
00461         EMData *dummy1d = Util::compress_image_mask(dummy,mask);
00462 
00463         ncov = dummy1d->get_xsize();
00464 
00465         EMDeletePtr(dummy);
00466         EMDeletePtr(dummy1d);
00467         // no need to allocate the covariance matrix
00468         nimages = 0;
00469 }
00470 
00471 #define qmat(i,j)   qmat[((j)-1)*kstep + (i) -1]
00472 #define diag(i)     diag[(i)-1]
00473 #define rdata(i)    rdata[(i)-1]
00474 #define eigvec(i,j) eigvec[((j)-1)*ncov + (i)-1]
00475 #define eigval(i)   eigval[(i)-1]
00476 
00477 vector<EMData*> PCAlarge::analyze()
00478 {
00479         int status = 0;
00480         int ione = 1;
00481         float one = 1.0, zero = 0.0;
00482         char trans;
00483         float *eigvec;
00484         string scratchfile = string("maskedimages.scratch");
00485         char command[100];
00486 
00487         printf("start analyzing..., ncov = %d\n", ncov);
00488 
00489         float resnrm = 0.0;
00490 
00491         if ( nvec > nimages || nvec ==0 ) nvec = nimages;
00492         int nx = ncov;
00493 
00494         // the definition of kstep is purely a heuristic for right now
00495         int kstep = nvec + 20;
00496         if (kstep > nimages) kstep = nimages;
00497 
00498         float *diag    = new float[kstep];
00499         float *subdiag = new float[kstep-1];
00500         float *vmat    = new float[nx*kstep];
00501 
00502         // run kstep-step Lanczos factorization
00503         status = Lanczos(scratchfile, &kstep, diag, subdiag,
00504                          vmat, &resnrm);
00505 
00506         // remove scratch file
00507 #ifdef _WIN32
00508         if (_unlink(scratchfile.c_str()) == -1) {
00509                 fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
00510         }
00511 #else
00512         sprintf(command,"rm -f %s\n", scratchfile.c_str());
00513         status = system(command);
00514         if (status != 0) {
00515                 fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
00516         }
00517 #endif  //_WIN32
00518 
00519         char jobz[2] = "V";
00520         float *qmat  = new float[kstep*kstep];
00521         // workspace size will be optimized later
00522         int   lwork  = 100 + 4*kstep + kstep*kstep;
00523         int   liwork = 3+5*kstep;
00524 
00525         float *work  = new float[lwork];
00526         int   *iwork = new int[liwork];
00527         int   info = 0;
00528 
00529         // call LAPACK tridiagonal eigensolver
00530         sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
00531                 iwork, &liwork, &info);
00532 
00533         // store eigenvalues
00534         eigval = (float*)calloc(ncov,sizeof(float));
00535         eigvec = (float*)calloc(ncov*nvec,sizeof(float));
00536 
00537         for (int j = 0; j < nvec; j++) {
00538             eigval[j] = diag(kstep-j);
00539         }
00540 
00541 //         for (int i=0; i<nvec; i++) printf("eigval = %11.4e\n",
00542 //             eigval[i]);
00543 
00544         // compute eigenvectors
00545         for (int j=1; j<=nvec; j++) {
00546             trans = 'N';
00547             sgemv_(&trans, &nx,  &kstep, &one, vmat, &nx, &qmat(1,kstep-j+1),
00548                    &ione, &zero, &eigvec(1,j), &ione);
00549         }
00550 
00551         // pack eigenvectors into the return imagelist
00552         EMData *eigenimage = new EMData();
00553         eigenimage->set_size(ncov,1,1);
00554         float *rdata = eigenimage->get_data();
00555         for (int j = 1; j<= nvec; j++) {
00556             for (int i = 1; i <= ncov; i++)
00557                 rdata(i) = eigvec(i,j);
00558 
00559             EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);
00560 
00561             recons_eigvec->set_attr( "eigval", eigval[j-1] );
00562 
00563             images.push_back( recons_eigvec );
00564         }
00565 
00566         free(eigvec);
00567         EMDeletePtr(eigenimage);
00568 
00569         return images;
00570 }
00571 #undef qmat
00572 #undef diag
00573 #undef rdata
00574 #undef eigvec
00575 #undef eigval
00576 
00577 #define TOL 1e-7
00578 #define V(i,j)      V[((j)-1)*imgsize + (i) - 1]
00579 #define v0(i)       v0[(i)-1]
00580 #define Av(i)       Av[(i)-1]
00581 #define subdiag(i)  subdiag[(i)-1]
00582 #define diag(i)     diag[(i)-1]
00583 #define hvec(i)     hvec[(i)-1]
00584 
00585 int PCAlarge::Lanczos(const string &maskedimages, int *kstep,
00586                       float  *diag, float *subdiag, float *V,
00587                       float  *beta)
00588 {
00589     /*
00590         Purpose: Compute a kstep-step Lanczos factorization
00591                  on the covariant matrix X*trans(X), where
00592                  X (imgstack) contains a set of images;
00593 
00594         Input:
00595            imgstack (vector <EMData*>) a set of images on which PCA is
00596                                        to be performed;
00597 
00598            kstep (int*) The maximum number of Lanczos iterations allowed.
00599                           If Lanczos terminates before kstep steps
00600                           is reached (an invariant subspace is found),
00601                           kstep returns the number of steps taken;
00602 
00603         Output:
00604            diag (float *) The projection of the covariant matrix into a
00605                           Krylov subspace of dimension at most kstep.
00606                           The projection is a tridiagonal matrix. The
00607                           diagonal elements of this matrix is stored in
00608                           the diag array.
00609 
00610            subdiag (float*) The subdiagonal elements of the projection
00611                             is stored here.
00612 
00613            V (float *)    an imgsize by kstep array that contains a
00614                           set of orthonormal Lanczos basis vectors;
00615 
00616            beta (float *) the residual norm of the factorization;
00617     */
00618     int i, iter;
00619 
00620     float alpha;
00621     int   ione = 1;
00622     float zero = 0.0, one = 1.0, mone = -1.0;
00623     int   status = 0;
00624 
00625     char trans;
00626     int  imgsize = 0;
00627     float *v0, *Av, *hvec, *htmp, *imgdata;
00628     FILE  *fp=NULL;
00629 
00630     if (nimages <= 0) {
00631         status = 2; // no image in the stack
00632         goto EXIT;
00633     }
00634 
00635     imgsize = ncov;
00636     if (nimages <= 0) {
00637         status = 3; // no image in the stack
00638         goto EXIT;
00639     }
00640 
00641     v0   = new float[imgsize];
00642     Av   = new float[imgsize];
00643     hvec = new float[*kstep];
00644     htmp = new float[*kstep];
00645     imgdata = new float[imgsize];
00646 
00647     if (v0 == NULL || Av == NULL || hvec == NULL ||
00648         htmp == NULL || imgdata == NULL) {
00649         fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");
00650         status = -1;
00651         goto EXIT;
00652     }
00653 
00654     // may choose a random starting guess here
00655     for ( i = 1; i <= imgsize; i++)
00656     {
00657         v0(i) = 1.0;
00658         Av(i) = 0.0;
00659     }
00660 
00661     // normalize the starting vector
00662     *beta  = snrm2_(&imgsize, v0, &ione);
00663     for (i = 1; i<=imgsize; i++)
00664         V(i,1) = v0(i) / (*beta);
00665 
00666     // do Av <-- A*v0, where A is a cov matrix
00667     fp = fopen(maskedimages.c_str(),"rb");
00668     if (fp==NULL) {
00669         fprintf(stderr,"Lanczos: cannot open %s\n", maskedimages.c_str());
00670     }
00671     for (i = 0; i < nimages; i++) {
00672        fread(imgdata, sizeof(float), imgsize, fp);
00673        alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);
00674        saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00675     }
00676     fclose(fp);
00677 
00678 
00679     // Av <--- Av - V(:,1)*V(:,1)'*Av
00680     diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);
00681     alpha   = -diag(1);
00682     saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);
00683 
00684     // main loop
00685     for ( iter = 2 ; iter <= *kstep ; iter++ ) {
00686         *beta = snrm2_(&imgsize, Av, &ione);
00687 
00688         if (*beta < TOL) {
00689             // found an invariant subspace, exit
00690             *kstep = iter;
00691             break;
00692         }
00693 
00694         subdiag(iter-1) = *beta;
00695         for ( i = 1 ; i <= imgsize ; i++ ) {
00696             V(i,iter) = Av(i) / (*beta);
00697         }
00698 
00699         // do Av <-- A*V(:,iter), where A is a cov matrix
00700         for (i = 0; i < imgsize; i++) Av[i] = 0;
00701         fp = fopen(maskedimages.c_str(),"rb");
00702         for (i = 0; i < nimages; i++) {
00703            fread(imgdata, sizeof(float), imgsize, fp);
00704            alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);
00705            saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00706         }
00707         fclose(fp);
00708 
00709         // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
00710         trans = 'T';
00711         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00712                         &zero , hvec    , &ione);
00713         trans = 'N';
00714         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,
00715                         &ione , &one    , Av, &ione);
00716 
00717         // one step of reorthogonalization
00718         trans = 'T';
00719         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00720                         &zero , htmp    , &ione);
00721         saxpy_(&iter, &one, htmp, &ione, hvec, &ione);
00722         trans = 'N';
00723         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,
00724                         &ione , &one    , Av, &ione);
00725         diag(iter) = hvec(iter);
00726     }
00727 
00728     EMDeleteArray(v0);
00729     EMDeleteArray(Av);
00730     EMDeleteArray(hvec);
00731     EMDeleteArray(htmp);
00732     EMDeleteArray(imgdata);
00733 
00734 EXIT:
00735     return status;
00736 
00737 }
00738 #undef v0
00739 #undef Av
00740 #undef V
00741 #undef hvec
00742 #undef diag
00743 #undef subdiag
00744 #undef TOL
00745 
00746 void varimax::set_params(const Dict & new_params)
00747 {
00748         params = new_params;
00749         m_mask = params["mask"];
00750 
00751         // count the number of pixels under the mask
00752         // (this is really ugly!!!)
00753         EMData *dummy = new EMData();
00754 
00755         int nx = m_mask->get_xsize();
00756         int ny = m_mask->get_ysize();
00757         int nz = m_mask->get_zsize();
00758 
00759         dummy->set_size(nx,ny,nz);
00760 
00761         EMData *dummy1d = Util::compress_image_mask(dummy,m_mask);
00762 
00763         m_nlen = dummy1d->get_xsize();
00764         m_nfac = 0;
00765 
00766         EMDeletePtr(dummy);
00767         EMDeletePtr(dummy1d);
00768 }
00769 
00770 int varimax::insert_image(EMData* image)
00771 {
00772         if(m_mask==0)
00773                 throw NullPointerException("Null mask image pointer, set_params() first");
00774 
00775     EMData* img1d = Util::compress_image_mask(image,m_mask);
00776 
00777     m_data.insert( m_data.end(), img1d->get_data(), img1d->get_data() + m_nlen );
00778 
00779     m_nfac++;
00780 
00781     Assert( (int)m_data.size() == m_nfac*m_nlen);
00782 
00783     return 0;
00784 }
00785 
00786 vector<EMData*> varimax::analyze()
00787 {
00788     int itmax = 10000;
00789     float eps = 1e-4f;
00790     int verbose = 1;
00791     float params[4];
00792     params[0] = 1.0;
00793     varmx( &m_data[0], m_nlen, m_nfac, IVARIMAX, params, NULL, itmax, eps, verbose);
00794 
00795     vector<EMData*> images;
00796 
00797     EMData* img1d = new EMData();
00798     img1d->set_size(m_nlen, 1, 1);
00799     for( int i=0; i < m_nfac; ++i )
00800     {
00801         float* imgdata = img1d->get_data();
00802 
00803         int offset = i * m_nlen;
00804         for( int i=0; i < m_nlen; ++i )
00805         {
00806             imgdata[i] = m_data[offset+i];
00807         }
00808 
00809         EMData* img = Util::reconstitute_image_mask(img1d,m_mask);
00810         images.push_back(img);
00811     }
00812 
00813     EMDeletePtr(img1d);
00814 
00815     return images;
00816 }
00817 
00818 int SVDAnalyzer::insert_image(EMData * image)
00819 {
00820         if (mask==0)
00821                 throw NullPointerException("Null mask image pointer, set_params() first");
00822 
00823         // count pixels under mask
00824         size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();
00825         float  *d=image->get_data();
00826         float *md=mask ->get_data();
00827         for (size_t i=0,j=0; i<totpix; ++i) {
00828                 if (md[i]) {
00829                         gsl_matrix_set(A,j,nsofar,d[i]);
00830                         j++;
00831                 }
00832         }
00833         nsofar++;
00834 
00835    return 0;
00836 }
00837 #undef covmat
00838 
00839 #define eigvec(i,j) eigvec[(j)*ncov + (i)]
00840 vector<EMData*> SVDAnalyzer::analyze()
00841 {
00842 // Allocate the working space
00843 gsl_vector *work=gsl_vector_alloc(nimg);
00844 gsl_vector *S=gsl_vector_alloc(nimg);
00845 gsl_matrix *V=gsl_matrix_alloc(nimg,nimg);
00846 gsl_matrix *X=gsl_matrix_alloc(nimg,nimg);
00847 
00848 
00849 // Do the decomposition. All the real work is here
00850 gsl_linalg_SV_decomp_mod (A,X, V, S, work);
00851 //else gsl_linalg_SV_decomp_jacobi(A,V,S);
00852 
00853 vector<EMData*> ret;
00854 //unpack the results and write the output file
00855 float *md=mask->get_data();
00856 size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();
00857 for (int k=0; k<nvec; k++) {
00858         EMData *img = new EMData;
00859         img->set_size(mask->get_xsize(),mask->get_ysize(),mask->get_zsize());
00860 
00861         float  *d=img->get_data();
00862         for (size_t i=0,j=0; i<totpix; ++i) {
00863                 if (md[i]) {
00864                         d[i]=(float)gsl_matrix_get(A,j,k);
00865                         j++;
00866                 }
00867         }
00868         img->set_attr( "eigval", gsl_vector_get(S,k));
00869         ret.push_back(img);
00870 }
00871 
00872 gsl_vector_free(work);
00873 gsl_vector_free(S);
00874 gsl_matrix_free(V);
00875 gsl_matrix_free(X);
00876 
00877 gsl_matrix_free(A);
00878 A=NULL;
00879 mask=NULL;
00880 
00881 return ret;
00882 }
00883 
00884 void SVDAnalyzer::set_params(const Dict & new_params)
00885 {
00886         params = new_params;
00887         mask = params["mask"];
00888         nvec = params["nvec"];
00889         nimg = params["nimg"];
00890 
00891         // count pixels under mask
00892         pixels=0;
00893         size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();
00894         float *d=mask->get_data();
00895         for (size_t i=0; i<totpix; ++i) if (d[i]) ++pixels;
00896 
00897         printf("%d,%d\n",pixels,nimg);
00898         A=gsl_matrix_alloc(pixels,nimg);
00899         nsofar=0;
00900 }
00901 
00902 
00903 void EMAN::dump_analyzers()
00904 {
00905         dump_factory < Analyzer > ();
00906 }
00907 
00908 map<string, vector<string> > EMAN::dump_analyzers_list()
00909 {
00910         return dump_factory_list < Analyzer > ();
00911 }
00912 
00913 
00914 
00915 
00916 
00917 
00918