#include <pca.h>
Public Member Functions | |
int | dopca (vector< EMData * > imgstack, EMData *mask) |
int | dopca (vector< EMData * > imgstack, EMData *mask, int nvec) |
int | dopca_lan (vector< EMData * > imgstack, EMData *mask, int nvec) |
int | dopca_ooc (const string &filename_in, const string &filename_out, const string &lanscratch, EMData *mask, int nvec) |
int | Lanczos (vector< EMData * > imgstack, int *maxiter, float *diag, float *subdiag, float *V, float *beta) |
int | Lanczos_ooc (string const &filename_in, int *kstep, float *diag, float *subdiag, string const &lanscratch, float *beta) |
vector< float > | get_vals () |
vector< EMData * > | get_vecs () |
void | clear () |
Public Attributes | |
vector< float > | singular_vals |
vector< EMData * > | eigenimages |
|
Definition at line 638 of file pca.cpp. References eigenimages, and singular_vals. 00639 { 00640 // flush singular values and vectors 00641 singular_vals.clear(); 00642 eigenimages.clear(); 00643 }
|
|
Definition at line 74 of file pca.cpp. References eigenimages, status, and EMAN::Util::svdcmp(). 00075 { 00076 // performs PCA on a list of images (each under a mask) 00077 // returns a list of eigenimages 00078 00079 vector<EMData*> img1dlst; 00080 vector<EMData*> eigvecs; 00081 int status = 0; 00082 00083 int nimgs = imgstack.size(); 00084 00085 for (int i=0; i<nimgs; i++) { 00086 img1dlst.push_back(Util::compress_image_mask(imgstack[i],mask)); 00087 } 00088 00089 if ( nimgs < nvec || nvec == 0) nvec = nimgs; 00090 eigvecs = Util::svdcmp(img1dlst, nvec); 00091 img1dlst.clear(); 00092 00093 for (int i=0; i<nvec; i++) { 00094 eigenimages.push_back(Util::reconstitute_image_mask(eigvecs[i],mask)); 00095 } 00096 00097 eigvecs.clear(); 00098 00099 return status; 00100 }
|
|
Definition at line 42 of file pca.cpp. References eigenimages, status, and EMAN::Util::svdcmp(). 00043 { 00044 // performs PCA on a list of images (each under a mask) 00045 // returns a list of eigenimages 00046 00047 int i, status = 0; 00048 00049 vector<EMData*> img1dlst; 00050 vector<EMData*> eigvecs; 00051 00052 int nimgs = imgstack.size(); 00053 00054 for (i=0; i<nimgs; i++) { 00055 img1dlst.push_back(Util::compress_image_mask(imgstack[i],mask)); 00056 } 00057 00058 // for right now, compute a full SVD 00059 eigvecs = Util::svdcmp(img1dlst, 0); 00060 00061 img1dlst.clear(); 00062 00063 for (i=0; i<nimgs; i++) { 00064 eigenimages.push_back(Util::reconstitute_image_mask(eigvecs[i],mask)); 00065 } 00066 00067 eigvecs.clear(); 00068 00069 return status; 00070 }
|
|
Definition at line 107 of file pca.cpp. References diag, eigenimages, eigvec, EMDeleteArray(), EMDeletePtr(), EMAN::EMData::get_data(), Lanczos(), nx, qmat, EMAN::EMData::set_size(), sgemv_(), singular_vals, sqrt(), sstevd_(), status, and subdiag. 00108 { 00109 // performs PCA on a list of images (each under a mask) 00110 // returns a list of eigenimages 00111 00112 int ione = 1; 00113 float one = 1.0, zero = 0.0; 00114 char trans; 00115 00116 vector<EMData*> img1dlst; 00117 vector<EMData*> eigvecs; 00118 00119 int status = 0; 00120 int nimgs = imgstack.size(); 00121 for (int i=0; i<nimgs; i++) { 00122 img1dlst.push_back(Util::compress_image_mask(imgstack[i],mask)); 00123 } 00124 00125 float resnrm = 0.0; 00126 00127 if ( nvec > nimgs || nvec ==0 ) nvec = nimgs; 00128 00129 int nx = img1dlst[0]->get_xsize(); 00130 // the definition of kstep is purely a heuristic for right now 00131 int kstep = nvec + 20; 00132 if (kstep > nimgs) kstep = nimgs; 00133 00134 float *diag = new float[kstep]; 00135 float *subdiag = new float[kstep-1]; 00136 float *vmat = new float[nx*kstep]; 00137 00138 // run kstep-step Lanczos factorization 00139 status = Lanczos(img1dlst, &kstep, diag, subdiag, vmat, &resnrm); 00140 00141 char jobz[2] = "V"; 00142 float *qmat = new float[kstep*kstep]; 00143 // workspace size will be optimized later 00144 int lwork = 100 + 4*kstep + kstep*kstep; 00145 int liwork = 3+5*kstep; 00146 00147 float *work = new float[lwork]; 00148 int *iwork = new int[liwork]; 00149 int info = 0; 00150 00151 // call LAPACK tridiagonal eigensolver 00152 sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork, 00153 iwork, &liwork, &info); 00154 00155 // store singular values 00156 // eigenvalues of the cov matrix have been sorted in ascending order 00157 for (int j = kstep; j > kstep - nvec; j--) { 00158 singular_vals.push_back((float)sqrt(diag(j))); 00159 } 00160 00161 img1dlst.clear(); 00162 00163 EMData *eigvec = new EMData(); 00164 00165 eigvec->set_size(nx, 1, 1); 00166 float *ritzvec = eigvec->get_data(); 00167 00168 // compute Ritz vectors (approximate eigenvectors) one at a time 00169 for (int j=0; j<nvec; j++) { 00170 trans = 'N'; 00171 sgemv_(&trans, &nx, &kstep, &one, vmat, &nx, &qmat(1,kstep-j), &ione, 00172 &zero , ritzvec, &ione); 00173 eigenimages.push_back(Util::reconstitute_image_mask(eigvec,mask)); 00174 } 00175 00176 eigvecs.clear(); 00177 00178 EMDeleteArray(diag); 00179 EMDeleteArray(subdiag); 00180 EMDeleteArray(vmat); 00181 EMDeleteArray(qmat); 00182 EMDeleteArray(work); 00183 EMDeleteArray(iwork); 00184 EMDeletePtr(eigvec); 00185 return status; 00186 }
|
|
Definition at line 190 of file pca.cpp. References compress_image_mask(), diag, eigvec, EMDeleteArray(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMUtil::get_image_count(), EMAN::EMData::get_ndim(), EMAN::EMData::get_xsize(), Lanczos_ooc(), nx, qmat, EMAN::EMData::read_image(), reconstitute_image_mask(), saxpy_(), EMAN::EMData::set_size(), singular_vals, sqrt(), sstevd_(), status, subdiag, and EMAN::EMData::write_image(). 00192 { 00193 int status = 0, ione = 1; 00194 EMData *image_raw = new EMData(); 00195 EMData *image_masked = NULL; 00196 00197 int nimgs = EMUtil::get_image_count(filename_in); 00198 00199 if (nimgs <= 0) { 00200 status = 2; 00201 fprintf(stderr,"dopca_ooc: no image in %s\n", filename_in.c_str()); 00202 } 00203 for (int i=0; i<nimgs; i++) { 00204 image_raw->read_image(filename_in, i); 00205 image_masked=Util::compress_image_mask(image_raw,mask); 00206 image_masked->write_image("temp_masked_images.img",i); 00207 } 00208 00209 int ndim = image_masked->get_ndim(); 00210 if (ndim != 1) { 00211 fprintf(stderr,"dopca_ooc: masked image should be 1-D\n"); 00212 status = 3; // images should all be 1-d 00213 } 00214 00215 int nx = image_masked->get_xsize(); 00216 00217 float resnrm = 0.0; 00218 00219 if ( nvec > nimgs || nvec ==0 ) nvec = nimgs; 00220 00221 // the definition of kstep is purely a heuristic for right now 00222 int kstep = nvec + 20; 00223 if (kstep > nimgs) kstep = nimgs; 00224 00225 float *diag = new float[kstep]; 00226 float *subdiag = new float[kstep-1]; 00227 00228 status = Lanczos_ooc("temp_masked_images.img", &kstep, diag, subdiag, 00229 lanscratch , &resnrm); 00230 00231 char jobz[2] = "V"; 00232 float *qmat = new float[kstep*kstep]; 00233 // workspace size will be optimized later 00234 int lwork = 100 + 4*kstep + kstep*kstep; 00235 int liwork = 3+5*kstep; 00236 00237 float *work = new float[lwork]; 00238 int *iwork = new int[liwork]; 00239 int info = 0; 00240 00241 // call LAPACK tridiagonal eigensolver 00242 sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork, 00243 iwork, &liwork, &info); 00244 00245 // store singular values 00246 // eigenvalues of the cov matrix have been sorted in ascending order 00247 for (int j = kstep; j > kstep - nvec; j--) { 00248 singular_vals.push_back((float)sqrt(diag(j))); 00249 } 00250 00251 EMData *eigvec = new EMData(); 00252 eigvec->set_size(nx, 1, 1); 00253 float *ritzvec = eigvec->get_data(); 00254 EMData *newimage = 0; 00255 00256 // compute Ritz vectors (approximate eigenvectors) one at a time 00257 FILE *fp; 00258 float *vlan = new float[nx]; 00259 fp = fopen(lanscratch.c_str(),"rb"); 00260 for (int j=0; j<nvec; j++) { 00261 for (int i = 0; i<nx; i++) ritzvec[i]=0.0; 00262 for (int jlan = 1; jlan <= kstep; jlan++) { 00263 fread(vlan, sizeof(float), nx, fp); 00264 saxpy_(&nx, &qmat(jlan,kstep-j), vlan, &ione, ritzvec, &ione); 00265 } 00266 rewind(fp); 00267 newimage = Util::reconstitute_image_mask(eigvec,mask); 00268 newimage->write_image(filename_out,j); 00269 } 00270 fclose(fp); 00271 00272 EMDeleteArray(diag); 00273 EMDeleteArray(subdiag); 00274 EMDeleteArray(qmat); 00275 EMDeleteArray(work); 00276 EMDeleteArray(iwork); 00277 EMDeleteArray(vlan); 00278 EMDeletePtr(eigvec); 00279 EMDeletePtr(newimage); 00280 00281 return status; 00282 }
|
|
Definition at line 626 of file pca.cpp. 00627 { 00628 // method for retrieving singular values 00629 return singular_vals; 00630 }
|
|
Definition at line 632 of file pca.cpp. 00633 { 00634 // method for retrieving right singular vectors (eigenimages) 00635 return eigenimages; 00636 }
|
|
Definition at line 295 of file pca.cpp. References Av, diag, EMDeleteArray(), EMAN::EMData::get_data(), EMAN::EMData::get_ndim(), EMAN::EMData::get_xsize(), hvec, imgdata, saxpy_(), sdot_(), sgemv_(), snrm2_(), status, subdiag, V, and v0. Referenced by dopca_lan(). 00298 { 00299 /* 00300 Purpose: Compute a kstep-step Lanczos factorization 00301 on the covariant matrix X*trans(X), where 00302 X (imgstack) contains a set of images; 00303 00304 Input: 00305 imgstack (vector <EMData*>) a set of images on which PCA is 00306 to be performed; 00307 00308 kstep (int*) The maximum number of Lanczos iterations allowed. 00309 If Lanczos terminates before kstep steps 00310 is reached (an invariant subspace is found), 00311 kstep returns the number of steps taken; 00312 00313 Output: 00314 diag (float *) The projection of the covariant matrix into a 00315 Krylov subspace of dimension at most kstep. 00316 The projection is a tridiagonal matrix. The 00317 diagonal elements of this matrix is stored in 00318 the diag array. 00319 00320 subdiag (float*) The subdiagonal elements of the projection 00321 is stored here. 00322 00323 V (float *) an imgsize by kstep array that contains a 00324 set of orthonormal Lanczos basis vectors; 00325 00326 beta (float *) the residual norm of the factorization; 00327 */ 00328 int i, iter; 00329 00330 float alpha; 00331 int ione = 1; 00332 float zero = 0.0, one = 1.0, mone = -1.0; 00333 int status = 0; 00334 00335 char trans; 00336 int nimgs = 0, imgsize = 0, ndim = 0; 00337 float *v0, *Av, *hvec, *htmp, *imgdata; 00338 00339 nimgs = imgstack.size(); 00340 if (nimgs <= 0) { 00341 status = 2; // no image in the stack 00342 goto EXIT; 00343 } 00344 00345 ndim = imgstack[0]->get_ndim(); 00346 if (ndim != 1) { 00347 status = 3; // images should all be 1-d 00348 goto EXIT; 00349 } 00350 00351 imgsize = imgstack[0]->get_xsize(); 00352 00353 v0 = new float[imgsize]; 00354 Av = new float[imgsize]; 00355 hvec = new float[*kstep]; 00356 htmp = new float[*kstep]; 00357 00358 if (v0 == NULL || Av == NULL || hvec == NULL || htmp == NULL ) { 00359 fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n"); 00360 status = -1; 00361 goto EXIT; 00362 } 00363 00364 // may choose a random starting guess here 00365 for ( i = 1; i <= imgsize; i++) v0(i) = 1.0; 00366 00367 // normalize the starting vector 00368 *beta = snrm2_(&imgsize, v0, &ione); 00369 for (i = 1; i<=imgsize; i++) 00370 V(i,1) = v0(i) / (*beta); 00371 00372 // do Av <-- A*v0, where A is a cov matrix 00373 for (i = 0; i < nimgs; i++) { 00374 imgdata = imgstack[i]->get_data(); 00375 alpha = sdot_(&imgsize, imgdata, &ione, V, &ione); 00376 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione); 00377 } 00378 00379 // Av <--- Av - V(:,1)*V(:,1)'*Av 00380 diag(1) = sdot_(&imgsize, V, &ione, Av, &ione); 00381 alpha = -diag(1); 00382 saxpy_(&imgsize, &alpha, V, &ione, Av, &ione); 00383 00384 // main loop 00385 for ( iter = 2 ; iter <= *kstep ; iter++ ) { 00386 *beta = snrm2_(&imgsize, Av, &ione); 00387 00388 if (*beta < TOL) { 00389 // found an invariant subspace, exit 00390 *kstep = iter; 00391 break; 00392 } 00393 00394 subdiag(iter-1) = *beta; 00395 for ( i = 1 ; i <= imgsize ; i++ ) { 00396 V(i,iter) = Av(i) / (*beta); 00397 } 00398 00399 // do Av <-- A*V(:,iter), where A is a cov matrix 00400 for (i = 0; i < imgsize; i++) Av[i] = 0; 00401 for (i = 0; i < nimgs; i++) { 00402 imgdata = imgstack[i]->get_data(); 00403 alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione); 00404 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione); 00405 } 00406 00407 // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av 00408 trans = 'T'; 00409 status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione, 00410 &zero , hvec , &ione); 00411 trans = 'N'; 00412 status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec, 00413 &ione , &one , Av, &ione); 00414 00415 // one step of reorthogonalization 00416 trans = 'T'; 00417 status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione, 00418 &zero , htmp , &ione); 00419 saxpy_(&iter, &one, htmp, &ione, hvec, &ione); 00420 trans = 'N'; 00421 status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp, 00422 &ione , &one , Av, &ione); 00423 diag(iter) = hvec(iter); 00424 } 00425 00426 EMDeleteArray(v0); 00427 EMDeleteArray(Av); 00428 EMDeleteArray(hvec); 00429 EMDeleteArray(htmp); 00430 00431 EXIT: 00432 return status; 00433 }
|
|
Definition at line 455 of file pca.cpp. References Av, diag, EMDeleteArray(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMUtil::get_image_count(), EMAN::EMData::get_ndim(), EMAN::EMData::get_xsize(), imgdata, EMAN::EMData::read_image(), resid, saxpy_(), scopy_(), sdot_(), snrm2_(), sscal_(), status, subdiag, and v. Referenced by dopca_ooc(). 00458 { 00459 /* 00460 Purpose: Compute a kstep-step Lanczos factorization 00461 on the covariant matrix X*trans(X), where 00462 X (imgstack) contains a set of images; 00463 00464 Input: 00465 filename_in The name of the input file that contains 00466 (string) masked images; 00467 00468 kstep (int*) The maximum number of Lanczos iterations allowed. 00469 If Lanczos terminates before kstep steps 00470 is reached (an invariant subspace is found), 00471 kstep returns the number of steps taken; 00472 00473 lanscratch The name of the file that will be used to 00474 (string) store a set of orthonormal Lanczos basis vectors; 00475 00476 Output: 00477 diag (float*) The projection of the covariant matrix into a 00478 Krylov subspace of dimension at most kstep. 00479 The projection is a tridiagonal matrix. The 00480 diagonal elements of this matrix is stored in 00481 the diag array; 00482 00483 subdiag (float*) The subdiagonal elements of the projection 00484 is stored here; 00485 00486 beta (float *) The residual norm of the factorization; 00487 */ 00488 int i, iter; 00489 EMData *maskedimage; 00490 00491 float alpha; 00492 int ione = 1; 00493 int status = 0; 00494 00495 int imgsize = 0, ndim = 0; 00496 float *v, *Av, *resid, *imgdata; 00497 float h = 0.0, htmp=0.0; 00498 00499 int nimgs = EMUtil::get_image_count(filename_in); 00500 if (nimgs <= 0) { 00501 status = 2; // no image in the stack 00502 goto EXIT; 00503 } 00504 00505 maskedimage = new EMData(); 00506 maskedimage->read_image(filename_in,0); 00507 // what happens when filename_in does not exist, or when read fails? 00508 00509 ndim = maskedimage->get_ndim(); 00510 if (ndim != 1) { 00511 status = 3; // images should all be 1-d 00512 goto EXIT; 00513 } 00514 00515 imgsize = maskedimage->get_xsize(); 00516 00517 v = new float[imgsize]; 00518 Av = new float[imgsize]; 00519 resid = new float[imgsize]; 00520 00521 if (v == NULL || Av == NULL ) { 00522 fprintf(stderr, "Lanczos: failed to allocate v,Av\n"); 00523 status = -1; 00524 goto EXIT; 00525 } 00526 00527 // may choose a random starting guess here 00528 for ( i = 1; i <= imgsize; i++) v(i) = 1.0; 00529 00530 // normalize the starting vector 00531 *beta = snrm2_(&imgsize, v, &ione); 00532 alpha = 1/(*beta); 00533 sscal_(&imgsize, &alpha, v, &ione); 00534 // write v out to a scratch file 00535 FILE *fp; 00536 fp = fopen(lanscratch.c_str(), "wb"); 00537 fwrite(v, sizeof(float), imgsize, fp); 00538 fclose(fp); 00539 00540 // do Av <-- A*v, where A is a cov matrix 00541 for (i = 0; i < nimgs; i++) { 00542 maskedimage->read_image(filename_in,i); 00543 imgdata = maskedimage->get_data(); 00544 alpha = sdot_(&imgsize, imgdata, &ione, v, &ione); 00545 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione); 00546 } 00547 00548 // Av <--- Av - V(:,1)*V(:,1)'*Av 00549 diag(1) = sdot_(&imgsize, v, &ione, Av, &ione); 00550 alpha = -diag(1); 00551 scopy_(&imgsize, Av, &ione, resid, &ione); 00552 saxpy_(&imgsize, &alpha, v, &ione, resid, &ione); 00553 00554 // main loop 00555 for ( iter = 2 ; iter <= *kstep ; iter++ ) { 00556 *beta = snrm2_(&imgsize, resid, &ione); 00557 00558 if (*beta < TOL) { 00559 // found an invariant subspace, exit 00560 *kstep = iter; 00561 break; 00562 } 00563 00564 subdiag(iter-1) = *beta; 00565 for ( i = 1 ; i <= imgsize ; i++ ) { 00566 v(i) = resid(i) / (*beta); 00567 } 00568 00569 // write v out to a scratch file at appropriate position 00570 fp = fopen(lanscratch.c_str(),"ab"); 00571 fwrite(v, sizeof(float), imgsize, fp); 00572 fclose(fp); 00573 00574 // do Av <-- A*V(:,iter), where A is a cov matrix 00575 for (i = 0; i < imgsize; i++) Av[i] = 0; 00576 for (i = 0; i < nimgs; i++) { 00577 maskedimage->read_image(filename_in,i); 00578 imgdata = maskedimage->get_data(); 00579 alpha = sdot_(&imgsize, imgdata, &ione, v, &ione); 00580 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione); 00581 } 00582 00583 // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av 00584 // the out-of-core version reads one Lanczos vector at a time 00585 scopy_(&imgsize, Av, &ione, resid, &ione); 00586 fp = fopen(lanscratch.c_str(),"rb"); 00587 for (int jlan = 1; jlan <= iter; jlan++) { 00588 fread(v, sizeof(float), imgsize, fp); 00589 h = sdot_(&imgsize, v, &ione, Av, &ione); 00590 alpha = -h; 00591 saxpy_(&imgsize, &alpha, v, &ione, resid, &ione); 00592 } 00593 fclose(fp); 00594 00595 // one step of reorthogonalization 00596 scopy_(&imgsize, resid, &ione, Av, &ione); 00597 fp = fopen(lanscratch.c_str(),"rb"); 00598 for (int jlan = 1; jlan <= iter; jlan++) { 00599 fread(v, sizeof(float), imgsize, fp); 00600 htmp = sdot_(&imgsize, v, &ione, Av, &ione); 00601 alpha = -htmp; 00602 saxpy_(&imgsize, &alpha, v, &ione, resid, &ione); 00603 h += htmp; 00604 } 00605 fclose(fp); 00606 diag(iter) = h; 00607 } 00608 00609 EMDeleteArray(v); 00610 EMDeleteArray(Av); 00611 EMDeleteArray(resid); 00612 EMDeletePtr(maskedimage); 00613 00614 EXIT: 00615 return status; 00616 }
|
|
Definition at line 41 of file pca.h. Referenced by clear(), dopca(), and dopca_lan(). |
|
Definition at line 40 of file pca.h. Referenced by clear(), dopca_lan(), and dopca_ooc(). |