#include <pca.h>
Public Member Functions | |
| int | dopca (vector< EMData * > imgstack, EMData *mask) |
| int | dopca (vector< EMData * > imgstack, EMData *mask, int nvec) |
| int | dopca_lan (vector< EMData * > imgstack, EMData *mask, int nvec) |
| int | dopca_ooc (const string &filename_in, const string &filename_out, const string &lanscratch, EMData *mask, int nvec) |
| int | Lanczos (vector< EMData * > imgstack, int *maxiter, float *diag, float *subdiag, float *V, float *beta) |
| int | Lanczos_ooc (string const &filename_in, int *kstep, float *diag, float *subdiag, string const &lanscratch, float *beta) |
| vector< float > | get_vals () |
| vector< EMData * > | get_vecs () |
| void | clear () |
Public Attributes | |
| vector< float > | singular_vals |
| vector< EMData * > | eigenimages |
|
|
Definition at line 638 of file pca.cpp. References eigenimages, and singular_vals. 00639 {
00640 // flush singular values and vectors
00641 singular_vals.clear();
00642 eigenimages.clear();
00643 }
|
|
||||||||||||||||
|
Definition at line 74 of file pca.cpp. References eigenimages, status, and EMAN::Util::svdcmp(). 00075 {
00076 // performs PCA on a list of images (each under a mask)
00077 // returns a list of eigenimages
00078
00079 vector<EMData*> img1dlst;
00080 vector<EMData*> eigvecs;
00081 int status = 0;
00082
00083 int nimgs = imgstack.size();
00084
00085 for (int i=0; i<nimgs; i++) {
00086 img1dlst.push_back(Util::compress_image_mask(imgstack[i],mask));
00087 }
00088
00089 if ( nimgs < nvec || nvec == 0) nvec = nimgs;
00090 eigvecs = Util::svdcmp(img1dlst, nvec);
00091 img1dlst.clear();
00092
00093 for (int i=0; i<nvec; i++) {
00094 eigenimages.push_back(Util::reconstitute_image_mask(eigvecs[i],mask));
00095 }
00096
00097 eigvecs.clear();
00098
00099 return status;
00100 }
|
|
||||||||||||
|
Definition at line 42 of file pca.cpp. References eigenimages, status, and EMAN::Util::svdcmp(). 00043 {
00044 // performs PCA on a list of images (each under a mask)
00045 // returns a list of eigenimages
00046
00047 int i, status = 0;
00048
00049 vector<EMData*> img1dlst;
00050 vector<EMData*> eigvecs;
00051
00052 int nimgs = imgstack.size();
00053
00054 for (i=0; i<nimgs; i++) {
00055 img1dlst.push_back(Util::compress_image_mask(imgstack[i],mask));
00056 }
00057
00058 // for right now, compute a full SVD
00059 eigvecs = Util::svdcmp(img1dlst, 0);
00060
00061 img1dlst.clear();
00062
00063 for (i=0; i<nimgs; i++) {
00064 eigenimages.push_back(Util::reconstitute_image_mask(eigvecs[i],mask));
00065 }
00066
00067 eigvecs.clear();
00068
00069 return status;
00070 }
|
|
||||||||||||||||
|
Definition at line 107 of file pca.cpp. References diag, eigenimages, eigvec, EMDeleteArray(), EMDeletePtr(), EMAN::EMData::get_data(), Lanczos(), nx, qmat, EMAN::EMData::set_size(), sgemv_(), singular_vals, sqrt(), sstevd_(), status, and subdiag. 00108 {
00109 // performs PCA on a list of images (each under a mask)
00110 // returns a list of eigenimages
00111
00112 int ione = 1;
00113 float one = 1.0, zero = 0.0;
00114 char trans;
00115
00116 vector<EMData*> img1dlst;
00117 vector<EMData*> eigvecs;
00118
00119 int status = 0;
00120 int nimgs = imgstack.size();
00121 for (int i=0; i<nimgs; i++) {
00122 img1dlst.push_back(Util::compress_image_mask(imgstack[i],mask));
00123 }
00124
00125 float resnrm = 0.0;
00126
00127 if ( nvec > nimgs || nvec ==0 ) nvec = nimgs;
00128
00129 int nx = img1dlst[0]->get_xsize();
00130 // the definition of kstep is purely a heuristic for right now
00131 int kstep = nvec + 20;
00132 if (kstep > nimgs) kstep = nimgs;
00133
00134 float *diag = new float[kstep];
00135 float *subdiag = new float[kstep-1];
00136 float *vmat = new float[nx*kstep];
00137
00138 // run kstep-step Lanczos factorization
00139 status = Lanczos(img1dlst, &kstep, diag, subdiag, vmat, &resnrm);
00140
00141 char jobz[2] = "V";
00142 float *qmat = new float[kstep*kstep];
00143 // workspace size will be optimized later
00144 int lwork = 100 + 4*kstep + kstep*kstep;
00145 int liwork = 3+5*kstep;
00146
00147 float *work = new float[lwork];
00148 int *iwork = new int[liwork];
00149 int info = 0;
00150
00151 // call LAPACK tridiagonal eigensolver
00152 sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
00153 iwork, &liwork, &info);
00154
00155 // store singular values
00156 // eigenvalues of the cov matrix have been sorted in ascending order
00157 for (int j = kstep; j > kstep - nvec; j--) {
00158 singular_vals.push_back((float)sqrt(diag(j)));
00159 }
00160
00161 img1dlst.clear();
00162
00163 EMData *eigvec = new EMData();
00164
00165 eigvec->set_size(nx, 1, 1);
00166 float *ritzvec = eigvec->get_data();
00167
00168 // compute Ritz vectors (approximate eigenvectors) one at a time
00169 for (int j=0; j<nvec; j++) {
00170 trans = 'N';
00171 sgemv_(&trans, &nx, &kstep, &one, vmat, &nx, &qmat(1,kstep-j), &ione,
00172 &zero , ritzvec, &ione);
00173 eigenimages.push_back(Util::reconstitute_image_mask(eigvec,mask));
00174 }
00175
00176 eigvecs.clear();
00177
00178 EMDeleteArray(diag);
00179 EMDeleteArray(subdiag);
00180 EMDeleteArray(vmat);
00181 EMDeleteArray(qmat);
00182 EMDeleteArray(work);
00183 EMDeleteArray(iwork);
00184 EMDeletePtr(eigvec);
00185 return status;
00186 }
|
|
||||||||||||||||||||||||
|
Definition at line 190 of file pca.cpp. References compress_image_mask(), diag, eigvec, EMDeleteArray(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMUtil::get_image_count(), EMAN::EMData::get_ndim(), EMAN::EMData::get_xsize(), Lanczos_ooc(), nx, qmat, EMAN::EMData::read_image(), reconstitute_image_mask(), saxpy_(), EMAN::EMData::set_size(), singular_vals, sqrt(), sstevd_(), status, subdiag, and EMAN::EMData::write_image(). 00192 {
00193 int status = 0, ione = 1;
00194 EMData *image_raw = new EMData();
00195 EMData *image_masked = NULL;
00196
00197 int nimgs = EMUtil::get_image_count(filename_in);
00198
00199 if (nimgs <= 0) {
00200 status = 2;
00201 fprintf(stderr,"dopca_ooc: no image in %s\n", filename_in.c_str());
00202 }
00203 for (int i=0; i<nimgs; i++) {
00204 image_raw->read_image(filename_in, i);
00205 image_masked=Util::compress_image_mask(image_raw,mask);
00206 image_masked->write_image("temp_masked_images.img",i);
00207 }
00208
00209 int ndim = image_masked->get_ndim();
00210 if (ndim != 1) {
00211 fprintf(stderr,"dopca_ooc: masked image should be 1-D\n");
00212 status = 3; // images should all be 1-d
00213 }
00214
00215 int nx = image_masked->get_xsize();
00216
00217 float resnrm = 0.0;
00218
00219 if ( nvec > nimgs || nvec ==0 ) nvec = nimgs;
00220
00221 // the definition of kstep is purely a heuristic for right now
00222 int kstep = nvec + 20;
00223 if (kstep > nimgs) kstep = nimgs;
00224
00225 float *diag = new float[kstep];
00226 float *subdiag = new float[kstep-1];
00227
00228 status = Lanczos_ooc("temp_masked_images.img", &kstep, diag, subdiag,
00229 lanscratch , &resnrm);
00230
00231 char jobz[2] = "V";
00232 float *qmat = new float[kstep*kstep];
00233 // workspace size will be optimized later
00234 int lwork = 100 + 4*kstep + kstep*kstep;
00235 int liwork = 3+5*kstep;
00236
00237 float *work = new float[lwork];
00238 int *iwork = new int[liwork];
00239 int info = 0;
00240
00241 // call LAPACK tridiagonal eigensolver
00242 sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
00243 iwork, &liwork, &info);
00244
00245 // store singular values
00246 // eigenvalues of the cov matrix have been sorted in ascending order
00247 for (int j = kstep; j > kstep - nvec; j--) {
00248 singular_vals.push_back((float)sqrt(diag(j)));
00249 }
00250
00251 EMData *eigvec = new EMData();
00252 eigvec->set_size(nx, 1, 1);
00253 float *ritzvec = eigvec->get_data();
00254 EMData *newimage = 0;
00255
00256 // compute Ritz vectors (approximate eigenvectors) one at a time
00257 FILE *fp;
00258 float *vlan = new float[nx];
00259 fp = fopen(lanscratch.c_str(),"rb");
00260 for (int j=0; j<nvec; j++) {
00261 for (int i = 0; i<nx; i++) ritzvec[i]=0.0;
00262 for (int jlan = 1; jlan <= kstep; jlan++) {
00263 fread(vlan, sizeof(float), nx, fp);
00264 saxpy_(&nx, &qmat(jlan,kstep-j), vlan, &ione, ritzvec, &ione);
00265 }
00266 rewind(fp);
00267 newimage = Util::reconstitute_image_mask(eigvec,mask);
00268 newimage->write_image(filename_out,j);
00269 }
00270 fclose(fp);
00271
00272 EMDeleteArray(diag);
00273 EMDeleteArray(subdiag);
00274 EMDeleteArray(qmat);
00275 EMDeleteArray(work);
00276 EMDeleteArray(iwork);
00277 EMDeleteArray(vlan);
00278 EMDeletePtr(eigvec);
00279 EMDeletePtr(newimage);
00280
00281 return status;
00282 }
|
|
|
Definition at line 626 of file pca.cpp. 00627 {
00628 // method for retrieving singular values
00629 return singular_vals;
00630 }
|
|
|
Definition at line 632 of file pca.cpp. 00633 {
00634 // method for retrieving right singular vectors (eigenimages)
00635 return eigenimages;
00636 }
|
|
||||||||||||||||||||||||||||
|
Definition at line 295 of file pca.cpp. References Av, diag, EMDeleteArray(), EMAN::EMData::get_data(), EMAN::EMData::get_ndim(), EMAN::EMData::get_xsize(), hvec, imgdata, saxpy_(), sdot_(), sgemv_(), snrm2_(), status, subdiag, V, and v0. Referenced by dopca_lan(). 00298 {
00299 /*
00300 Purpose: Compute a kstep-step Lanczos factorization
00301 on the covariant matrix X*trans(X), where
00302 X (imgstack) contains a set of images;
00303
00304 Input:
00305 imgstack (vector <EMData*>) a set of images on which PCA is
00306 to be performed;
00307
00308 kstep (int*) The maximum number of Lanczos iterations allowed.
00309 If Lanczos terminates before kstep steps
00310 is reached (an invariant subspace is found),
00311 kstep returns the number of steps taken;
00312
00313 Output:
00314 diag (float *) The projection of the covariant matrix into a
00315 Krylov subspace of dimension at most kstep.
00316 The projection is a tridiagonal matrix. The
00317 diagonal elements of this matrix is stored in
00318 the diag array.
00319
00320 subdiag (float*) The subdiagonal elements of the projection
00321 is stored here.
00322
00323 V (float *) an imgsize by kstep array that contains a
00324 set of orthonormal Lanczos basis vectors;
00325
00326 beta (float *) the residual norm of the factorization;
00327 */
00328 int i, iter;
00329
00330 float alpha;
00331 int ione = 1;
00332 float zero = 0.0, one = 1.0, mone = -1.0;
00333 int status = 0;
00334
00335 char trans;
00336 int nimgs = 0, imgsize = 0, ndim = 0;
00337 float *v0, *Av, *hvec, *htmp, *imgdata;
00338
00339 nimgs = imgstack.size();
00340 if (nimgs <= 0) {
00341 status = 2; // no image in the stack
00342 goto EXIT;
00343 }
00344
00345 ndim = imgstack[0]->get_ndim();
00346 if (ndim != 1) {
00347 status = 3; // images should all be 1-d
00348 goto EXIT;
00349 }
00350
00351 imgsize = imgstack[0]->get_xsize();
00352
00353 v0 = new float[imgsize];
00354 Av = new float[imgsize];
00355 hvec = new float[*kstep];
00356 htmp = new float[*kstep];
00357
00358 if (v0 == NULL || Av == NULL || hvec == NULL || htmp == NULL ) {
00359 fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");
00360 status = -1;
00361 goto EXIT;
00362 }
00363
00364 // may choose a random starting guess here
00365 for ( i = 1; i <= imgsize; i++) v0(i) = 1.0;
00366
00367 // normalize the starting vector
00368 *beta = snrm2_(&imgsize, v0, &ione);
00369 for (i = 1; i<=imgsize; i++)
00370 V(i,1) = v0(i) / (*beta);
00371
00372 // do Av <-- A*v0, where A is a cov matrix
00373 for (i = 0; i < nimgs; i++) {
00374 imgdata = imgstack[i]->get_data();
00375 alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);
00376 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00377 }
00378
00379 // Av <--- Av - V(:,1)*V(:,1)'*Av
00380 diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);
00381 alpha = -diag(1);
00382 saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);
00383
00384 // main loop
00385 for ( iter = 2 ; iter <= *kstep ; iter++ ) {
00386 *beta = snrm2_(&imgsize, Av, &ione);
00387
00388 if (*beta < TOL) {
00389 // found an invariant subspace, exit
00390 *kstep = iter;
00391 break;
00392 }
00393
00394 subdiag(iter-1) = *beta;
00395 for ( i = 1 ; i <= imgsize ; i++ ) {
00396 V(i,iter) = Av(i) / (*beta);
00397 }
00398
00399 // do Av <-- A*V(:,iter), where A is a cov matrix
00400 for (i = 0; i < imgsize; i++) Av[i] = 0;
00401 for (i = 0; i < nimgs; i++) {
00402 imgdata = imgstack[i]->get_data();
00403 alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);
00404 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00405 }
00406
00407 // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
00408 trans = 'T';
00409 status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00410 &zero , hvec , &ione);
00411 trans = 'N';
00412 status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,
00413 &ione , &one , Av, &ione);
00414
00415 // one step of reorthogonalization
00416 trans = 'T';
00417 status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00418 &zero , htmp , &ione);
00419 saxpy_(&iter, &one, htmp, &ione, hvec, &ione);
00420 trans = 'N';
00421 status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,
00422 &ione , &one , Av, &ione);
00423 diag(iter) = hvec(iter);
00424 }
00425
00426 EMDeleteArray(v0);
00427 EMDeleteArray(Av);
00428 EMDeleteArray(hvec);
00429 EMDeleteArray(htmp);
00430
00431 EXIT:
00432 return status;
00433 }
|
|
||||||||||||||||||||||||||||
|
Definition at line 455 of file pca.cpp. References Av, diag, EMDeleteArray(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMUtil::get_image_count(), EMAN::EMData::get_ndim(), EMAN::EMData::get_xsize(), imgdata, EMAN::EMData::read_image(), resid, saxpy_(), scopy_(), sdot_(), snrm2_(), sscal_(), status, subdiag, and v. Referenced by dopca_ooc(). 00458 {
00459 /*
00460 Purpose: Compute a kstep-step Lanczos factorization
00461 on the covariant matrix X*trans(X), where
00462 X (imgstack) contains a set of images;
00463
00464 Input:
00465 filename_in The name of the input file that contains
00466 (string) masked images;
00467
00468 kstep (int*) The maximum number of Lanczos iterations allowed.
00469 If Lanczos terminates before kstep steps
00470 is reached (an invariant subspace is found),
00471 kstep returns the number of steps taken;
00472
00473 lanscratch The name of the file that will be used to
00474 (string) store a set of orthonormal Lanczos basis vectors;
00475
00476 Output:
00477 diag (float*) The projection of the covariant matrix into a
00478 Krylov subspace of dimension at most kstep.
00479 The projection is a tridiagonal matrix. The
00480 diagonal elements of this matrix is stored in
00481 the diag array;
00482
00483 subdiag (float*) The subdiagonal elements of the projection
00484 is stored here;
00485
00486 beta (float *) The residual norm of the factorization;
00487 */
00488 int i, iter;
00489 EMData *maskedimage;
00490
00491 float alpha;
00492 int ione = 1;
00493 int status = 0;
00494
00495 int imgsize = 0, ndim = 0;
00496 float *v, *Av, *resid, *imgdata;
00497 float h = 0.0, htmp=0.0;
00498
00499 int nimgs = EMUtil::get_image_count(filename_in);
00500 if (nimgs <= 0) {
00501 status = 2; // no image in the stack
00502 goto EXIT;
00503 }
00504
00505 maskedimage = new EMData();
00506 maskedimage->read_image(filename_in,0);
00507 // what happens when filename_in does not exist, or when read fails?
00508
00509 ndim = maskedimage->get_ndim();
00510 if (ndim != 1) {
00511 status = 3; // images should all be 1-d
00512 goto EXIT;
00513 }
00514
00515 imgsize = maskedimage->get_xsize();
00516
00517 v = new float[imgsize];
00518 Av = new float[imgsize];
00519 resid = new float[imgsize];
00520
00521 if (v == NULL || Av == NULL ) {
00522 fprintf(stderr, "Lanczos: failed to allocate v,Av\n");
00523 status = -1;
00524 goto EXIT;
00525 }
00526
00527 // may choose a random starting guess here
00528 for ( i = 1; i <= imgsize; i++) v(i) = 1.0;
00529
00530 // normalize the starting vector
00531 *beta = snrm2_(&imgsize, v, &ione);
00532 alpha = 1/(*beta);
00533 sscal_(&imgsize, &alpha, v, &ione);
00534 // write v out to a scratch file
00535 FILE *fp;
00536 fp = fopen(lanscratch.c_str(), "wb");
00537 fwrite(v, sizeof(float), imgsize, fp);
00538 fclose(fp);
00539
00540 // do Av <-- A*v, where A is a cov matrix
00541 for (i = 0; i < nimgs; i++) {
00542 maskedimage->read_image(filename_in,i);
00543 imgdata = maskedimage->get_data();
00544 alpha = sdot_(&imgsize, imgdata, &ione, v, &ione);
00545 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00546 }
00547
00548 // Av <--- Av - V(:,1)*V(:,1)'*Av
00549 diag(1) = sdot_(&imgsize, v, &ione, Av, &ione);
00550 alpha = -diag(1);
00551 scopy_(&imgsize, Av, &ione, resid, &ione);
00552 saxpy_(&imgsize, &alpha, v, &ione, resid, &ione);
00553
00554 // main loop
00555 for ( iter = 2 ; iter <= *kstep ; iter++ ) {
00556 *beta = snrm2_(&imgsize, resid, &ione);
00557
00558 if (*beta < TOL) {
00559 // found an invariant subspace, exit
00560 *kstep = iter;
00561 break;
00562 }
00563
00564 subdiag(iter-1) = *beta;
00565 for ( i = 1 ; i <= imgsize ; i++ ) {
00566 v(i) = resid(i) / (*beta);
00567 }
00568
00569 // write v out to a scratch file at appropriate position
00570 fp = fopen(lanscratch.c_str(),"ab");
00571 fwrite(v, sizeof(float), imgsize, fp);
00572 fclose(fp);
00573
00574 // do Av <-- A*V(:,iter), where A is a cov matrix
00575 for (i = 0; i < imgsize; i++) Av[i] = 0;
00576 for (i = 0; i < nimgs; i++) {
00577 maskedimage->read_image(filename_in,i);
00578 imgdata = maskedimage->get_data();
00579 alpha = sdot_(&imgsize, imgdata, &ione, v, &ione);
00580 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00581 }
00582
00583 // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
00584 // the out-of-core version reads one Lanczos vector at a time
00585 scopy_(&imgsize, Av, &ione, resid, &ione);
00586 fp = fopen(lanscratch.c_str(),"rb");
00587 for (int jlan = 1; jlan <= iter; jlan++) {
00588 fread(v, sizeof(float), imgsize, fp);
00589 h = sdot_(&imgsize, v, &ione, Av, &ione);
00590 alpha = -h;
00591 saxpy_(&imgsize, &alpha, v, &ione, resid, &ione);
00592 }
00593 fclose(fp);
00594
00595 // one step of reorthogonalization
00596 scopy_(&imgsize, resid, &ione, Av, &ione);
00597 fp = fopen(lanscratch.c_str(),"rb");
00598 for (int jlan = 1; jlan <= iter; jlan++) {
00599 fread(v, sizeof(float), imgsize, fp);
00600 htmp = sdot_(&imgsize, v, &ione, Av, &ione);
00601 alpha = -htmp;
00602 saxpy_(&imgsize, &alpha, v, &ione, resid, &ione);
00603 h += htmp;
00604 }
00605 fclose(fp);
00606 diag(iter) = h;
00607 }
00608
00609 EMDeleteArray(v);
00610 EMDeleteArray(Av);
00611 EMDeleteArray(resid);
00612 EMDeletePtr(maskedimage);
00613
00614 EXIT:
00615 return status;
00616 }
|
|
|
Definition at line 41 of file pca.h. Referenced by clear(), dopca(), and dopca_lan(). |
|
|
Definition at line 40 of file pca.h. Referenced by clear(), dopca_lan(), and dopca_ooc(). |
1.3.9.1