EMAN::PCAlarge Class Reference

#include <analyzer_sparx.h>

Inheritance diagram for EMAN::PCAlarge:

Inheritance graph
[legend]
Collaboration diagram for EMAN::PCAlarge:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 PCAlarge ()
virtual int insert_image (EMData *image)
 insert a image to the list of input images
virtual vector< EMData * > analyze ()
 main function for Analyzer, analyze input images and create output images
string get_name () const
 Get the Analyzer's name.
string get_desc () const
 Get the Analyzer's description.
void set_params (const Dict &new_params)
 Set the Analyzer parameters using a key/value dictionary.
int Lanczos (const string &maskedimages, int *kstep, float *diag, float *subdiag, float *V, float *beta)
TypeDict get_param_types () const
 Get Analyzer parameter information in a dictionary.

Static Public Member Functions

static AnalyzerNEW ()

Static Public Attributes

static const string NAME = "pca_large"

Protected Attributes

EMDatamask
int nvec

Private Attributes

int ncov
int nimages
float * eigval

Detailed Description

Definition at line 106 of file analyzer_sparx.h.


Constructor & Destructor Documentation

EMAN::PCAlarge::PCAlarge (  )  [inline]

Definition at line 109 of file analyzer_sparx.h.

Referenced by NEW().

00109 : mask(0), nvec(0) {}


Member Function Documentation

vector< EMData * > PCAlarge::analyze (  )  [virtual]

main function for Analyzer, analyze input images and create output images

Returns:
vector<EMData *> result os images analysis

Implements EMAN::Analyzer.

Definition at line 390 of file analyzer.cpp.

References diag, eigval, eigvec, EMDeletePtr(), EMAN::EMData::get_data(), EMAN::Analyzer::images, Lanczos(), mask, ncov, nimages, nvec, nx, qmat, rdata, reconstitute_image_mask(), EMAN::EMData::set_attr(), EMAN::EMData::set_size(), sgemv_(), sstevd_(), status, and subdiag.

00391 {
00392         int status = 0;
00393         int ione = 1;
00394         float one = 1.0, zero = 0.0;
00395         char trans;
00396         float *eigvec;
00397         string scratchfile = string("maskedimages.scratch");
00398         char command[100];
00399 
00400         printf("start analyzing..., ncov = %d\n", ncov);
00401 
00402         float resnrm = 0.0;
00403 
00404         if ( nvec > nimages || nvec ==0 ) nvec = nimages;
00405         int nx = ncov;
00406 
00407         // the definition of kstep is purely a heuristic for right now
00408         int kstep = nvec + 20;
00409         if (kstep > nimages) kstep = nimages;
00410 
00411         float *diag    = new float[kstep];
00412         float *subdiag = new float[kstep-1];
00413         float *vmat    = new float[nx*kstep];
00414 
00415         // run kstep-step Lanczos factorization
00416         status = Lanczos(scratchfile, &kstep, diag, subdiag,
00417                          vmat, &resnrm);
00418 
00419         // remove scratch file
00420 #ifdef _WIN32
00421         if (_unlink(scratchfile.c_str()) == -1) {
00422                 fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
00423         }
00424 #else
00425         sprintf(command,"rm -f %s\n", scratchfile.c_str());
00426         status = system(command);
00427         if (status != 0) {
00428                 fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
00429         }
00430 #endif  //_WIN32
00431 
00432         char jobz[2] = "V";
00433         float *qmat  = new float[kstep*kstep];
00434         // workspace size will be optimized later
00435         int   lwork  = 100 + 4*kstep + kstep*kstep;
00436         int   liwork = 3+5*kstep;
00437 
00438         float *work  = new float[lwork];
00439         int   *iwork = new int[liwork];
00440         int   info = 0;
00441 
00442         // call LAPACK tridiagonal eigensolver
00443         sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
00444                 iwork, &liwork, &info);
00445 
00446         // store eigenvalues
00447         eigval = (float*)calloc(ncov,sizeof(float));
00448         eigvec = (float*)calloc(ncov*nvec,sizeof(float));
00449 
00450         for (int j = 0; j < nvec; j++) {
00451             eigval[j] = diag(kstep-j);
00452         }
00453 
00454 //         for (int i=0; i<nvec; i++) printf("eigval = %11.4e\n",
00455 //             eigval[i]);
00456 
00457         // compute eigenvectors
00458         for (int j=1; j<=nvec; j++) {
00459             trans = 'N';
00460             sgemv_(&trans, &nx,  &kstep, &one, vmat, &nx, &qmat(1,kstep-j+1),
00461                    &ione, &zero, &eigvec(1,j), &ione);
00462         }
00463 
00464         // pack eigenvectors into the return imagelist
00465         EMData *eigenimage = new EMData();
00466         eigenimage->set_size(ncov,1,1);
00467         float *rdata = eigenimage->get_data();
00468         for (int j = 1; j<= nvec; j++) {
00469             for (int i = 1; i <= ncov; i++)
00470                 rdata(i) = eigvec(i,j);
00471 
00472             EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);
00473 
00474             recons_eigvec->set_attr( "eigval", eigval[j-1] );
00475 
00476             images.push_back( recons_eigvec );
00477         }
00478 
00479         free(eigvec);
00480         EMDeletePtr(eigenimage);
00481 
00482         return images;
00483 }

string EMAN::PCAlarge::get_desc (  )  const [inline, virtual]

Get the Analyzer's description.

Returns:
The Analyzer's description.

Implements EMAN::Analyzer.

Definition at line 120 of file analyzer_sparx.h.

00121                 {
00122                         return "Principal component analysis";
00123                 }

string EMAN::PCAlarge::get_name (  )  const [inline, virtual]

Get the Analyzer's name.

Each Analyzer is identified by a unique name.

Returns:
The Analyzer's name.

Implements EMAN::Analyzer.

Definition at line 115 of file analyzer_sparx.h.

References NAME.

00116                 {
00117                         return NAME;
00118                 }               

TypeDict EMAN::PCAlarge::get_param_types (  )  const [inline, virtual]

Get Analyzer parameter information in a dictionary.

Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.

Returns:
A dictionary containing the parameter info.

Implements EMAN::Analyzer.

Definition at line 137 of file analyzer_sparx.h.

References EMAN::EMObject::EMDATA, EMAN::EMObject::INT, and EMAN::TypeDict::put().

00138                 {
00139                         TypeDict d;
00140                         d.put("mask", EMObject::EMDATA, "mask image");
00141                         d.put("nvec", EMObject::INT, "number of desired principal components");
00142                         return d;
00143                 }

int PCAlarge::insert_image ( EMData image  )  [virtual]

insert a image to the list of input images

Parameters:
image 
Returns:
int 0 for success, <0 for fail

Implements EMAN::Analyzer.

Definition at line 334 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMData::get_xsize(), imgdata, mask, nimages, NullPointerException, and nx.

00335 {
00336         if(mask==0)
00337                 throw NullPointerException("Null mask image pointer, set_params() first");
00338 
00339    EMData *maskedimage = Util::compress_image_mask(image,mask);
00340 
00341    FILE *fp;
00342    string scratchfile = string("maskedimages.scratch");
00343 
00344    fp = fopen(scratchfile.c_str(),"ab");
00345 
00346    int nx = maskedimage->get_xsize();
00347    float *imgdata = maskedimage->get_data();
00348    fwrite(imgdata, sizeof(float), nx, fp);
00349    nimages++;
00350 
00351    fclose(fp);
00352 
00353    EMDeletePtr(maskedimage);
00354 
00355    return 0;
00356 }

int PCAlarge::Lanczos ( const string &  maskedimages,
int *  kstep,
float *  diag,
float *  subdiag,
float *  V,
float *  beta 
)

Definition at line 498 of file analyzer.cpp.

References Av, diag, EMDeleteArray(), hvec, imgdata, ncov, nimages, saxpy_(), sdot_(), sgemv_(), snrm2_(), status, subdiag, TOL, V, and v0.

Referenced by analyze().

00501 {
00502     /*
00503         Purpose: Compute a kstep-step Lanczos factorization
00504                  on the covariant matrix X*trans(X), where
00505                  X (imgstack) contains a set of images;
00506 
00507         Input:
00508            imgstack (vector <EMData*>) a set of images on which PCA is
00509                                        to be performed;
00510 
00511            kstep (int*) The maximum number of Lanczos iterations allowed.
00512                           If Lanczos terminates before kstep steps
00513                           is reached (an invariant subspace is found),
00514                           kstep returns the number of steps taken;
00515 
00516         Output:
00517            diag (float *) The projection of the covariant matrix into a
00518                           Krylov subspace of dimension at most kstep.
00519                           The projection is a tridiagonal matrix. The
00520                           diagonal elements of this matrix is stored in
00521                           the diag array.
00522 
00523            subdiag (float*) The subdiagonal elements of the projection
00524                             is stored here.
00525 
00526            V (float *)    an imgsize by kstep array that contains a
00527                           set of orthonormal Lanczos basis vectors;
00528 
00529            beta (float *) the residual norm of the factorization;
00530     */
00531     int i, iter;
00532 
00533     float alpha;
00534     int   ione = 1;
00535     float zero = 0.0, one = 1.0, mone = -1.0;
00536     int   status = 0;
00537 
00538     char trans;
00539     int  imgsize = 0;
00540     float *v0, *Av, *hvec, *htmp, *imgdata;
00541     FILE  *fp=NULL;
00542 
00543     if (nimages <= 0) {
00544         status = 2; // no image in the stack
00545         goto EXIT;
00546     }
00547 
00548     imgsize = ncov;
00549     if (nimages <= 0) {
00550         status = 3; // no image in the stack
00551         goto EXIT;
00552     }
00553 
00554     v0   = new float[imgsize];
00555     Av   = new float[imgsize];
00556     hvec = new float[*kstep];
00557     htmp = new float[*kstep];
00558     imgdata = new float[imgsize];
00559 
00560     if (v0 == NULL || Av == NULL || hvec == NULL ||
00561         htmp == NULL || imgdata == NULL) {
00562         fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");
00563         status = -1;
00564         goto EXIT;
00565     }
00566 
00567     // may choose a random starting guess here
00568     for ( i = 1; i <= imgsize; i++)
00569     {
00570         v0(i) = 1.0;
00571         Av(i) = 0.0;
00572     }
00573 
00574     // normalize the starting vector
00575     *beta  = snrm2_(&imgsize, v0, &ione);
00576     for (i = 1; i<=imgsize; i++)
00577         V(i,1) = v0(i) / (*beta);
00578 
00579     // do Av <-- A*v0, where A is a cov matrix
00580     fp = fopen(maskedimages.c_str(),"rb");
00581     if (fp==NULL) {
00582         fprintf(stderr,"Lanczos: cannot open %s\n", maskedimages.c_str());
00583     }
00584     for (i = 0; i < nimages; i++) {
00585        fread(imgdata, sizeof(float), imgsize, fp);
00586        alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);
00587        saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00588     }
00589     fclose(fp);
00590 
00591 
00592     // Av <--- Av - V(:,1)*V(:,1)'*Av
00593     diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);
00594     alpha   = -diag(1);
00595     saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);
00596 
00597     // main loop
00598     for ( iter = 2 ; iter <= *kstep ; iter++ ) {
00599         *beta = snrm2_(&imgsize, Av, &ione);
00600 
00601         if (*beta < TOL) {
00602             // found an invariant subspace, exit
00603             *kstep = iter;
00604             break;
00605         }
00606 
00607         subdiag(iter-1) = *beta;
00608         for ( i = 1 ; i <= imgsize ; i++ ) {
00609             V(i,iter) = Av(i) / (*beta);
00610         }
00611 
00612         // do Av <-- A*V(:,iter), where A is a cov matrix
00613         for (i = 0; i < imgsize; i++) Av[i] = 0;
00614         fp = fopen(maskedimages.c_str(),"rb");
00615         for (i = 0; i < nimages; i++) {
00616            fread(imgdata, sizeof(float), imgsize, fp);
00617            alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);
00618            saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00619         }
00620         fclose(fp);
00621 
00622         // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
00623         trans = 'T';
00624         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00625                         &zero , hvec    , &ione);
00626         trans = 'N';
00627         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,
00628                         &ione , &one    , Av, &ione);
00629 
00630         // one step of reorthogonalization
00631         trans = 'T';
00632         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00633                         &zero , htmp    , &ione);
00634         saxpy_(&iter, &one, htmp, &ione, hvec, &ione);
00635         trans = 'N';
00636         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,
00637                         &ione , &one    , Av, &ione);
00638         diag(iter) = hvec(iter);
00639     }
00640 
00641     EMDeleteArray(v0);
00642     EMDeleteArray(Av);
00643     EMDeleteArray(hvec);
00644     EMDeleteArray(htmp);
00645     EMDeleteArray(imgdata);
00646 
00647 EXIT:
00648     return status;
00649 
00650 }

static Analyzer* EMAN::PCAlarge::NEW (  )  [inline, static]

Definition at line 125 of file analyzer_sparx.h.

References PCAlarge().

00126                 {
00127                         return new PCAlarge();
00128                 }

void PCAlarge::set_params ( const Dict new_params  )  [virtual]

Set the Analyzer parameters using a key/value dictionary.

Parameters:
new_params A dictionary containing the new parameters.

Reimplemented from EMAN::Analyzer.

Definition at line 358 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_xsize(), EMAN::EMData::get_ysize(), EMAN::EMData::get_zsize(), mask, ncov, nimages, nvec, nx, ny, EMAN::Analyzer::params, and EMAN::EMData::set_size().

00359 {
00360         params = new_params;
00361         mask = params["mask"];
00362         nvec = params["nvec"];
00363 
00364         // count the number of pixels under the mask
00365         // (this is really ugly!!!)
00366         EMData *dummy = new EMData();
00367 
00368         int nx = mask->get_xsize();
00369         int ny = mask->get_ysize();
00370         int nz = mask->get_zsize();
00371 
00372         dummy->set_size(nx,ny,nz);
00373 
00374         EMData *dummy1d = Util::compress_image_mask(dummy,mask);
00375 
00376         ncov = dummy1d->get_xsize();
00377 
00378         EMDeletePtr(dummy);
00379         EMDeletePtr(dummy1d);
00380         // no need to allocate the covariance matrix
00381         nimages = 0;
00382 }


Member Data Documentation

float* EMAN::PCAlarge::eigval [private]

Definition at line 154 of file analyzer_sparx.h.

Referenced by analyze().

EMData* EMAN::PCAlarge::mask [protected]

Definition at line 148 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), and set_params().

const string EMAN::PCAlarge::NAME = "pca_large" [static]

Definition at line 145 of file analyzer_sparx.h.

Referenced by get_name().

int EMAN::PCAlarge::ncov [private]

Definition at line 152 of file analyzer_sparx.h.

Referenced by analyze(), Lanczos(), and set_params().

int EMAN::PCAlarge::nimages [private]

Definition at line 153 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), Lanczos(), and set_params().

int EMAN::PCAlarge::nvec [protected]

Definition at line 149 of file analyzer_sparx.h.

Referenced by analyze(), and set_params().


The documentation for this class was generated from the following files:
Generated on Tue Jun 11 12:45:10 2013 for EMAN2 by  doxygen 1.4.7