EMAN::PCAlarge Class Reference

#include <analyzer_sparx.h>

Inheritance diagram for EMAN::PCAlarge:

Inheritance graph
[legend]
Collaboration diagram for EMAN::PCAlarge:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 PCAlarge ()
virtual int insert_image (EMData *image)
 insert a image to the list of input images
virtual vector< EMData * > analyze ()
 main function for Analyzer, analyze input images and create output images
string get_name () const
 Get the Analyzer's name.
string get_desc () const
 Get the Analyzer's description.
void set_params (const Dict &new_params)
 Set the Analyzer parameters using a key/value dictionary.
int Lanczos (const string &maskedimages, int *kstep, float *diag, float *subdiag, float *V, float *beta)
TypeDict get_param_types () const
 Get Analyzer parameter information in a dictionary.

Static Public Member Functions

static AnalyzerNEW ()

Static Public Attributes

static const string NAME = "pca_large"

Protected Attributes

EMDatamask
int nvec

Private Attributes

int ncov
int nimages
float * eigval

Detailed Description

Definition at line 106 of file analyzer_sparx.h.


Constructor & Destructor Documentation

EMAN::PCAlarge::PCAlarge (  )  [inline]

Definition at line 109 of file analyzer_sparx.h.

Referenced by NEW().

00109 : mask(0), nvec(0) {}


Member Function Documentation

vector< EMData * > PCAlarge::analyze (  )  [virtual]

main function for Analyzer, analyze input images and create output images

Returns:
vector<EMData *> result os images analysis

Implements EMAN::Analyzer.

Definition at line 399 of file analyzer.cpp.

References diag, eigval, eigvec, EMDeletePtr(), EMAN::EMData::get_data(), EMAN::Analyzer::images, Lanczos(), mask, ncov, nimages, nvec, nx, qmat, rdata, reconstitute_image_mask(), EMAN::EMData::set_attr(), EMAN::EMData::set_size(), sgemv_(), sstevd_(), status, and subdiag.

00400 {
00401         int status = 0;
00402         int ione = 1;
00403         float one = 1.0, zero = 0.0;
00404         char trans;
00405         float *eigvec;
00406         string scratchfile = string("maskedimages.scratch");
00407         char command[100];
00408 
00409         printf("start analyzing..., ncov = %d\n", ncov);
00410 
00411         float resnrm = 0.0;
00412 
00413         if ( nvec > nimages || nvec ==0 ) nvec = nimages;
00414         int nx = ncov;
00415 
00416         // the definition of kstep is purely a heuristic for right now
00417         int kstep = nvec + 20;
00418         if (kstep > nimages) kstep = nimages;
00419 
00420         float *diag    = new float[kstep];
00421         float *subdiag = new float[kstep-1];
00422         float *vmat    = new float[nx*kstep];
00423 
00424         // run kstep-step Lanczos factorization
00425         status = Lanczos(scratchfile, &kstep, diag, subdiag,
00426                          vmat, &resnrm);
00427 
00428         // remove scratch file
00429         sprintf(command,"rm -f %s\n", scratchfile.c_str());
00430         status = system(command);
00431         if (status != 0) {
00432             fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
00433         }
00434 
00435         char jobz[2] = "V";
00436         float *qmat  = new float[kstep*kstep];
00437         // workspace size will be optimized later
00438         int   lwork  = 100 + 4*kstep + kstep*kstep;
00439         int   liwork = 3+5*kstep;
00440 
00441         float *work  = new float[lwork];
00442         int   *iwork = new int[liwork];
00443         int   info = 0;
00444 
00445         // call LAPACK tridiagonal eigensolver
00446         sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
00447                 iwork, &liwork, &info);
00448 
00449         // store eigenvalues
00450         eigval = (float*)calloc(ncov,sizeof(float));
00451         eigvec = (float*)calloc(ncov*nvec,sizeof(float));
00452 
00453         for (int j = 0; j < nvec; j++) {
00454             eigval[j] = diag(kstep-j);
00455         }
00456 
00457 //         for (int i=0; i<nvec; i++) printf("eigval = %11.4e\n",
00458 //             eigval[i]);
00459 
00460         // compute eigenvectors
00461         for (int j=1; j<=nvec; j++) {
00462             trans = 'N';
00463             sgemv_(&trans, &nx,  &kstep, &one, vmat, &nx, &qmat(1,kstep-j+1),
00464                    &ione, &zero, &eigvec(1,j), &ione);
00465         }
00466 
00467         // pack eigenvectors into the return imagelist
00468         EMData *eigenimage = new EMData();
00469         eigenimage->set_size(ncov,1,1);
00470         float *rdata = eigenimage->get_data();
00471         for (int j = 1; j<= nvec; j++) {
00472             for (int i = 1; i <= ncov; i++)
00473                 rdata(i) = eigvec(i,j);
00474 
00475             EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);
00476 
00477             recons_eigvec->set_attr( "eigval", eigval[j-1] );
00478 
00479             images.push_back( recons_eigvec );
00480         }
00481 
00482         free(eigvec);
00483         EMDeletePtr(eigenimage);
00484 
00485         return images;
00486 }

string EMAN::PCAlarge::get_desc (  )  const [inline, virtual]

Get the Analyzer's description.

Returns:
The Analyzer's description.

Implements EMAN::Analyzer.

Definition at line 120 of file analyzer_sparx.h.

00121                 {
00122                         return "Principal component analysis";
00123                 }

string EMAN::PCAlarge::get_name (  )  const [inline, virtual]

Get the Analyzer's name.

Each Analyzer is identified by a unique name.

Returns:
The Analyzer's name.

Implements EMAN::Analyzer.

Definition at line 115 of file analyzer_sparx.h.

References NAME.

00116                 {
00117                         return NAME;
00118                 }               

TypeDict EMAN::PCAlarge::get_param_types (  )  const [inline, virtual]

Get Analyzer parameter information in a dictionary.

Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.

Returns:
A dictionary containing the parameter info.

Implements EMAN::Analyzer.

Definition at line 137 of file analyzer_sparx.h.

References EMAN::EMObject::EMDATA, EMAN::EMObject::INT, and EMAN::TypeDict::put().

00138                 {
00139                         TypeDict d;
00140                         d.put("mask", EMObject::EMDATA, "mask image");
00141                         d.put("nvec", EMObject::INT, "number of desired principal components");
00142                         return d;
00143                 }

int PCAlarge::insert_image ( EMData image  )  [virtual]

insert a image to the list of input images

Parameters:
image 
Returns:
int 0 for success, <0 for fail

Implements EMAN::Analyzer.

Definition at line 343 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMData::get_xsize(), imgdata, mask, nimages, NullPointerException, and nx.

00344 {
00345         if(mask==0)
00346                 throw NullPointerException("Null mask image pointer, set_params() first");
00347 
00348    EMData *maskedimage = Util::compress_image_mask(image,mask);
00349 
00350    FILE *fp;
00351    string scratchfile = string("maskedimages.scratch");
00352 
00353    fp = fopen(scratchfile.c_str(),"a");
00354 
00355    int nx = maskedimage->get_xsize();
00356    float *imgdata = maskedimage->get_data();
00357    fwrite(imgdata, sizeof(float), nx, fp);
00358    nimages++;
00359 
00360    fclose(fp);
00361 
00362    EMDeletePtr(maskedimage);
00363 
00364    return 0;
00365 }

int PCAlarge::Lanczos ( const string &  maskedimages,
int *  kstep,
float *  diag,
float *  subdiag,
float *  V,
float *  beta 
)

Definition at line 501 of file analyzer.cpp.

References Av, diag, EMDeleteArray(), hvec, imgdata, ncov, nimages, saxpy_(), sdot_(), sgemv_(), snrm2_(), status, subdiag, TOL, V, and v0.

Referenced by analyze().

00504 {
00505     /*
00506         Purpose: Compute a kstep-step Lanczos factorization
00507                  on the covariant matrix X*trans(X), where
00508                  X (imgstack) contains a set of images;
00509 
00510         Input:
00511            imgstack (vector <EMData*>) a set of images on which PCA is
00512                                        to be performed;
00513 
00514            kstep (int*) The maximum number of Lanczos iterations allowed.
00515                           If Lanczos terminates before kstep steps
00516                           is reached (an invariant subspace is found),
00517                           kstep returns the number of steps taken;
00518 
00519         Output:
00520            diag (float *) The projection of the covariant matrix into a
00521                           Krylov subspace of dimension at most kstep.
00522                           The projection is a tridiagonal matrix. The
00523                           diagonal elements of this matrix is stored in
00524                           the diag array.
00525 
00526            subdiag (float*) The subdiagonal elements of the projection
00527                             is stored here.
00528 
00529            V (float *)    an imgsize by kstep array that contains a
00530                           set of orthonormal Lanczos basis vectors;
00531 
00532            beta (float *) the residual norm of the factorization;
00533     */
00534     int i, iter;
00535 
00536     float alpha;
00537     int   ione = 1;
00538     float zero = 0.0, one = 1.0, mone = -1.0;
00539     int   status = 0;
00540 
00541     char trans;
00542     int  imgsize = 0;
00543     float *v0, *Av, *hvec, *htmp, *imgdata;
00544     FILE  *fp=NULL;
00545 
00546     if (nimages <= 0) {
00547         status = 2; // no image in the stack
00548         goto EXIT;
00549     }
00550 
00551     imgsize = ncov;
00552     if (nimages <= 0) {
00553         status = 3; // no image in the stack
00554         goto EXIT;
00555     }
00556 
00557     v0   = new float[imgsize];
00558     Av   = new float[imgsize];
00559     hvec = new float[*kstep];
00560     htmp = new float[*kstep];
00561     imgdata = new float[imgsize];
00562 
00563     if (v0 == NULL || Av == NULL || hvec == NULL ||
00564         htmp == NULL || imgdata == NULL) {
00565         fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");
00566         status = -1;
00567         goto EXIT;
00568     }
00569 
00570     // may choose a random starting guess here
00571     for ( i = 1; i <= imgsize; i++)
00572     {
00573         v0(i) = 1.0;
00574         Av(i) = 0.0;
00575     }
00576 
00577     // normalize the starting vector
00578     *beta  = snrm2_(&imgsize, v0, &ione);
00579     for (i = 1; i<=imgsize; i++)
00580         V(i,1) = v0(i) / (*beta);
00581 
00582     // do Av <-- A*v0, where A is a cov matrix
00583     fp = fopen(maskedimages.c_str(),"r");
00584     if (fp==NULL) {
00585         fprintf(stderr,"Lanczos: cannot open %s\n", maskedimages.c_str());
00586     }
00587     for (i = 0; i < nimages; i++) {
00588        fread(imgdata, sizeof(float), imgsize, fp);
00589        alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);
00590        saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00591     }
00592     fclose(fp);
00593 
00594 
00595     // Av <--- Av - V(:,1)*V(:,1)'*Av
00596     diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);
00597     alpha   = -diag(1);
00598     saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);
00599 
00600     // main loop
00601     for ( iter = 2 ; iter <= *kstep ; iter++ ) {
00602         *beta = snrm2_(&imgsize, Av, &ione);
00603 
00604         if (*beta < TOL) {
00605             // found an invariant subspace, exit
00606             *kstep = iter;
00607             break;
00608         }
00609 
00610         subdiag(iter-1) = *beta;
00611         for ( i = 1 ; i <= imgsize ; i++ ) {
00612             V(i,iter) = Av(i) / (*beta);
00613         }
00614 
00615         // do Av <-- A*V(:,iter), where A is a cov matrix
00616         for (i = 0; i < imgsize; i++) Av[i] = 0;
00617         fp = fopen(maskedimages.c_str(),"r");
00618         for (i = 0; i < nimages; i++) {
00619            fread(imgdata, sizeof(float), imgsize, fp);
00620            alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);
00621            saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00622         }
00623         fclose(fp);
00624 
00625         // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
00626         trans = 'T';
00627         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00628                         &zero , hvec    , &ione);
00629         trans = 'N';
00630         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,
00631                         &ione , &one    , Av, &ione);
00632 
00633         // one step of reorthogonalization
00634         trans = 'T';
00635         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00636                         &zero , htmp    , &ione);
00637         saxpy_(&iter, &one, htmp, &ione, hvec, &ione);
00638         trans = 'N';
00639         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,
00640                         &ione , &one    , Av, &ione);
00641         diag(iter) = hvec(iter);
00642     }
00643 
00644     EMDeleteArray(v0);
00645     EMDeleteArray(Av);
00646     EMDeleteArray(hvec);
00647     EMDeleteArray(htmp);
00648     EMDeleteArray(imgdata);
00649 
00650 EXIT:
00651     return status;
00652 
00653 }

static Analyzer* EMAN::PCAlarge::NEW (  )  [inline, static]

Definition at line 125 of file analyzer_sparx.h.

References PCAlarge().

00126                 {
00127                         return new PCAlarge();
00128                 }

void PCAlarge::set_params ( const Dict new_params  )  [virtual]

Set the Analyzer parameters using a key/value dictionary.

Parameters:
new_params A dictionary containing the new parameters.

Reimplemented from EMAN::Analyzer.

Definition at line 367 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_xsize(), EMAN::EMData::get_ysize(), EMAN::EMData::get_zsize(), mask, ncov, nimages, nvec, nx, ny, EMAN::Analyzer::params, and EMAN::EMData::set_size().

00368 {
00369         params = new_params;
00370         mask = params["mask"];
00371         nvec = params["nvec"];
00372 
00373         // count the number of pixels under the mask
00374         // (this is really ugly!!!)
00375         EMData *dummy = new EMData();
00376 
00377         int nx = mask->get_xsize();
00378         int ny = mask->get_ysize();
00379         int nz = mask->get_zsize();
00380 
00381         dummy->set_size(nx,ny,nz);
00382 
00383         EMData *dummy1d = Util::compress_image_mask(dummy,mask);
00384 
00385         ncov = dummy1d->get_xsize();
00386 
00387         EMDeletePtr(dummy);
00388         EMDeletePtr(dummy1d);
00389         // no need to allocate the covariance matrix
00390         nimages = 0;
00391 }


Member Data Documentation

float* EMAN::PCAlarge::eigval [private]

Definition at line 154 of file analyzer_sparx.h.

Referenced by analyze().

EMData* EMAN::PCAlarge::mask [protected]

Definition at line 148 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), and set_params().

const string EMAN::PCAlarge::NAME = "pca_large" [static]

Definition at line 145 of file analyzer_sparx.h.

Referenced by get_name().

int EMAN::PCAlarge::ncov [private]

Definition at line 152 of file analyzer_sparx.h.

Referenced by analyze(), Lanczos(), and set_params().

int EMAN::PCAlarge::nimages [private]

Definition at line 153 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), Lanczos(), and set_params().

int EMAN::PCAlarge::nvec [protected]

Definition at line 149 of file analyzer_sparx.h.

Referenced by analyze(), and set_params().


The documentation for this class was generated from the following files:
Generated on Tue May 25 17:17:59 2010 for EMAN2 by  doxygen 1.4.7