00001 #include "mpi.h"
00002 #include "emdata.h"
00003
00004 using namespace EMAN;
00005
00006 #include <stdlib.h>
00007 #include <stdio.h>
00008 #include <ctype.h>
00009 #include <math.h>
00010 #include <string.h>
00011 #include <float.h>
00012
00013 #include "HyBR_Cart.h"
00014 #include "hybr.h"
00015 #include "utilcomm_Cart.h"
00016 #include "project3d_Cart.h"
00017 #include "project3d.h"
00018
00019 int recons3d_HyBR_mpi_Cart(MPI_Comm comm_2d, MPI_Comm comm_row , MPI_Comm comm_col,
00020 EMData ** images, float * angleshift , EMData *& xvol ,
00021 int nangloc , int radius , int maxiter ,
00022 std::string symmetry, int insolve)
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 {
00055 MPI_Status mpistatus;
00056 int ROW = 0, COL = 1;
00057 int my2dpid, ierr;
00058 int mycoords[2], dims[2], periods[2];
00059 int srpid, srcoords[2];
00060
00061
00062 MPI_Cart_get(comm_2d, 2, dims, periods, mycoords);
00063 MPI_Comm_rank(comm_2d, &my2dpid);
00064
00065 int * psize;
00066 int * nbase;
00067
00068 int nangles;
00069 psize = new int[dims[ROW]];
00070 nbase = new int[dims[ROW]];
00071 MPI_Allreduce(&nangloc, &nangles, 1, MPI_INT, MPI_SUM, comm_col);
00072
00073 int nsym = 0;
00074
00075 int nx = images[0]->get_xsize();
00076 if ( radius == -1 ) radius = nx/2 - 1;
00077
00078 Vec3i volsize, origin;
00079 volsize[0] = nx;
00080 volsize[1] = nx;
00081 volsize[2] = nx;
00082 origin[0] = nx/2+1;
00083 origin[1] = nx/2+1;
00084 origin[2] = nx/2+1;
00085
00086
00087 std::vector<float> symangles(3,0.0);
00088
00089
00090
00091 int nrays, nnz;
00092 ierr = getnnz(volsize, radius, origin, &nrays, &nnz);
00093
00094 int nnzloc, nraysloc;
00095 int * ptrs = new int[nrays+1];
00096 int * cord = new int[3*nrays];
00097 int *nnzpart = new int[dims[COL]];
00098 int *nnzbase = new int[dims[COL]+1];
00099 int *ptrstart = new int[dims[COL]+1];
00100
00101 ierr = getcb2sph(volsize, radius, origin, nnz, ptrs, cord);
00102 nnzloc = setpart_gr1(comm_2d, nnz, nnzpart, nnzbase);
00103 nraysloc = sphpart(comm_2d, nrays, ptrs, nnzbase, ptrstart);
00104
00105 int myptrstart = ptrstart[mycoords[COL]];
00106 int nnzall[dims[COL]];
00107 for (int i = 0; i<dims[COL]; i++)
00108 nnzall[i] = ptrs[ptrstart[i+1]] - ptrs[ptrstart[i]];
00109
00110 nnzloc = nnzall[mycoords[COL]];
00111
00112 int m = nx*nx*nangles;
00113 int n = nnz;
00114
00115 int iterations_save, warning;
00116 float w, omega,curmin, g, alpha, beta_loc, beta;
00117
00118 float * Ukloc = myvector(nx*nx*nangloc);
00119 float ** Vloc = matrix(nnzloc, maxiter+1);
00120 float ** BB = matrix(maxiter+2, maxiter+1);
00121
00122 float ** Ub = matrix(maxiter+2, maxiter+2);
00123 float ** Vb = matrix(maxiter+1, maxiter+1);
00124 float * Sb = myvector(maxiter+1);
00125
00126 float * vec = myvector(maxiter+2);
00127 float * f = myvector(maxiter+2);
00128 float * Omegas = myvector(maxiter+1);
00129 float * GCV = myvector(maxiter+1);
00130
00131 float * x_save = myvector(nnzloc);
00132 float * x = myvector(nnzloc);
00133
00134 float * UbTvector;
00135 float ** BB2;
00136 float ** Ub2;
00137 float * Sb2, *f2, *Utb;
00138 float ** Vb2;
00139 float * vec2;
00140 float * row1Ub2 = myvector(maxiter+2);
00141
00142 for (int i=0; i< nnzloc; i++){
00143 x[i] = 0.0;
00144 x_save[i] = 0.0;
00145 for(int j=0; j<maxiter+1; j++)
00146 Vloc[i][j] = 0.0;
00147 }
00148
00149 EMData * current_image;
00150 float phi, theta, psi;
00151 Transform3D RA;
00152 Transform3D Tf;
00153 nsym = Tf.get_nsym(symmetry);
00154 Transform3D::EulerType EULER_SPIDER = Transform3D::SPIDER;
00155 Dict angdict;
00156
00157 float * image_data;
00158 float dm[8];
00159
00160
00161 beta_loc = 0.0;
00162 for (int i=0; i<nangloc; i++){
00163 current_image = images[i];
00164 image_data = current_image->get_data();
00165
00166 for(int j=0; j<nx*nx; j++)
00167 beta_loc += image_data[j] * image_data[j];
00168 }
00169 ierr = MPI_Allreduce (&beta_loc, &beta, 1, MPI_FLOAT, MPI_SUM, comm_col);
00170 beta = sqrt(beta);
00171
00172 for (int i=0; i<nangloc; i++){
00173 current_image = images[i];
00174 image_data = current_image->get_data();
00175
00176 for(int j=0; j<nx*nx; j++)
00177 Ukloc[nx*nx*i+j] = image_data[j] / beta;
00178 }
00179
00180 vec[0] = beta;
00181 warning = 0;
00182
00183 double t0;
00184 t0 = MPI_Wtime();
00185 for (int i = 0; i< maxiter+1; i++){
00186 LBD_Cart(comm_2d, comm_row, comm_col, angleshift, volsize, nraysloc, nnzloc, origin, radius, ptrs, myptrstart, cord,nangloc, nx, nx, Ukloc, BB, Vloc, m, n, i, maxiter, symmetry);
00187
00188 if (i+1 >= 2){
00189 BB2 = submatrix(BB, maxiter+2, maxiter+1, i+2, i+1);
00190 Ub2 = submatrix(Ub, maxiter+2, maxiter+2, i+2, i+2);
00191 Vb2 = submatrix(Vb, maxiter+1, maxiter+1, i+1,i+1);
00192 Sb2 = subvector(Sb, maxiter+1, i+1);
00193 f2 = subvector(f, maxiter+2 ,i+1);
00194 vec2 = subvector(vec, maxiter+2, i+2);
00195
00196 svd(BB2, i+2, i+1, Ub2, Sb2, Vb2);
00197
00198
00199 UbTvector = myvector(i+2);
00200 matvec_multT(Ub2,vec2, i+2, i+2, UbTvector);
00201 omega = findomega(UbTvector, Sb2, i+2, i+1, insolve);
00202 Omegas[i-1] = min((float)1.0, omega);
00203
00204 w = sum_vector(Omegas, i)/((float)(i+1) -(float)1.0);
00205
00206 alpha = -(float)1.0;
00207
00208 if(insolve == 0)
00209 tsvd(Ub2, Sb2, Vb2, vec2, i+2, i+1, w, f2, &alpha);
00210 else
00211 tikhonov(Ub2, Sb2, Vb2, vec2, i+2, i+1, w, f2, &alpha);
00212
00213
00214 get_row(Ub2, i+2, 0, row1Ub2);
00215 g = gcvstopfun(alpha, row1Ub2, Sb2, beta, i+1, m, n, insolve);
00216
00217 GCV[i-1] = g;
00218
00219
00220 if (i+1 > 2){
00221
00222 if (fabs((GCV[i-1]-GCV[i-2]))/GCV[0] < (pow((float)10.0, - (float)6.0))){
00223 matvec_mult(Vloc, f2, nnzloc, i+1, x);
00224
00225 if (mycoords[ROW] == 0 && mycoords[COL] != 0 ){
00226 srcoords[ROW] = 0;
00227 srcoords[COL] = 0;
00228 MPI_Cart_rank(comm_2d, srcoords, &srpid);
00229
00230 MPI_Send(x, nnzloc, MPI_FLOAT, srpid, my2dpid, comm_2d);
00231 }
00232
00233 if (mycoords[ROW] == 0 && mycoords[COL] == 0 ){
00234 printf("Exit Code 1: Time = %11.3e\n", MPI_Wtime()-t0);
00235 xvol->set_size(nx, nx, nx);
00236 xvol->to_zero();
00237 float * voldata = xvol->get_data();
00238 float * xvol_sph = new float[nnz];
00239
00240 for(int i=0; i< dims[COL]; i++){
00241 if (i==0){
00242 for(int i=0; i<nnzloc; i++)
00243 xvol_sph[i] = x[i];
00244 }else{
00245 srcoords[ROW] = 0;
00246 srcoords[COL] = i;
00247 MPI_Cart_rank(comm_2d, srcoords, &srpid);
00248
00249 MPI_Recv(&xvol_sph[ptrs[ptrstart[i]]-1], nnzall[i], MPI_FLOAT, srpid, srpid, comm_2d, &mpistatus);
00250 }
00251 }
00252
00253
00254 ierr = sph2cb(xvol_sph, volsize, nrays, radius, nnz, ptrs, cord, voldata);
00255 EMDeleteArray(xvol_sph);
00256 }
00257 return 0;
00258 }
00259
00260 else if (warning && i > iterations_save + 3){
00261 curmin = GCV[iterations_save];
00262 for(int j = 1; j< 3; j++){
00263 if (GCV[iterations_save + j]< curmin){
00264 curmin = GCV[iterations_save+ j];
00265 }
00266 }
00267 if (GCV[iterations_save] <= curmin){
00268
00269 copy_vector(x_save, x, nnzloc);
00270
00271
00272 if (mycoords[ROW] == 0 && mycoords[COL] != 0 ){
00273 srcoords[ROW] = 0;
00274 srcoords[COL] = 0;
00275 MPI_Cart_rank(comm_2d, srcoords, &srpid);
00276
00277 MPI_Send(x, nnzloc, MPI_FLOAT, srpid, my2dpid, comm_2d);
00278 }
00279
00280 if (mycoords[ROW] == 0 && mycoords[COL] == 0 ){
00281 printf("Exit Code 2: Time = %11.3e\n", MPI_Wtime()-t0);
00282 xvol->set_size(nx, nx, nx);
00283 xvol->to_zero();
00284 float * voldata = xvol->get_data();
00285 float * xvol_sph = new float[nnz];
00286
00287 for(int i=0; i< dims[COL]; i++){
00288 if (i==0){
00289 for(int i=0; i<nnzloc; i++)
00290 xvol_sph[i] = x[i];
00291 }else{
00292 srcoords[ROW] = 0;
00293 srcoords[COL] = i;
00294 MPI_Cart_rank(comm_2d, srcoords, &srpid);
00295
00296 MPI_Recv(&xvol_sph[ptrs[ptrstart[i]]-1], nnzall[i], MPI_FLOAT, srpid, srpid, comm_2d, &mpistatus);
00297 }
00298 }
00299
00300 ierr = sph2cb(xvol_sph, volsize, nrays, radius, nnz, ptrs, cord, voldata);
00301 EMDeleteArray(xvol_sph);
00302 }
00303 return 0;
00304 }
00305 else {
00306
00307 warning = 0;
00308 iterations_save = maxiter;
00309 }
00310 }
00311
00312 else if ( ! warning ){
00313 if (GCV[i-2] < GCV[i-1]){
00314 warning = 1;
00315 matvec_mult(Vloc, f2,nnzloc, i+1, x_save);
00316 iterations_save = i;
00317 }
00318 }
00319
00320 free_matrix(BB2);
00321 free_matrix(Ub2);
00322 free_matrix(Vb2);
00323 free_vector(Sb2);
00324 free_vector(vec2);
00325 }
00326 matvec_mult(Vloc, f2, nnzloc, i+1, x);
00327 }
00328 if (my2dpid ==0) printf("Iteration %d, alpha = %f, omega is %f\n", i+1, alpha, w);
00329 }
00330
00331
00332 if (mycoords[ROW] == 0 && mycoords[COL] != 0 ){
00333 srcoords[ROW] = 0;
00334 srcoords[COL] = 0;
00335 MPI_Cart_rank(comm_2d, srcoords, &srpid);
00336
00337 MPI_Send(x, nnzloc, MPI_FLOAT, srpid, my2dpid, comm_2d);
00338 }
00339
00340 if (mycoords[ROW] == 0 && mycoords[COL] == 0 ){
00341 printf("Exit Code 3: Time = %11.3e\n", MPI_Wtime()-t0);
00342
00343 xvol->set_size(nx, nx, nx);
00344 xvol->to_zero();
00345 float * voldata = xvol->get_data();
00346 float * xvol_sph = new float[nnz];
00347
00348 for(int i=0; i< dims[COL]; i++){
00349 if (i==0){
00350 for(int i=0; i<nnzloc; i++)
00351 xvol_sph[i] = x[i];
00352 }else{
00353 srcoords[ROW] = 0;
00354 srcoords[COL] = i;
00355 MPI_Cart_rank(comm_2d, srcoords, &srpid);
00356
00357 MPI_Recv(&xvol_sph[ptrs[ptrstart[i]]-1], nnzall[i], MPI_FLOAT, srpid, srpid, comm_2d, &mpistatus);
00358 }
00359 }
00360
00361 ierr = sph2cb(xvol_sph, volsize, nrays, radius, nnz, ptrs, cord, voldata);
00362 EMDeleteArray(xvol_sph);
00363 }
00364
00365 EMDeleteArray(ptrs);
00366 EMDeleteArray(cord);
00367 EMDeleteArray(psize);
00368 EMDeleteArray(nbase);
00369 EMDeleteArray(nnzpart);
00370 EMDeleteArray(nnzbase);
00371 EMDeleteArray(ptrstart);
00372
00373 return 0;
00374 }
00375
00376
00377
00378
00379 void LBD_Cart(MPI_Comm comm_2d, MPI_Comm comm_row, MPI_Comm comm_col, float *angleshift, Vec3i volsize, int nraysloc, int nnzloc, Vec3i origin, int radius, int *ptrs, int myptrstart, int *cord, int nangloc, int nx, int ny, float *Ukloc, float **B, float **Vloc, int m, int n, int k, int maxiter,std::string symmetry)
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390 {
00391 float * vc = myvector(nnzloc);
00392 float * vc_loc = myvector(nnzloc);
00393 float * uc = myvector(nangloc*nx*ny);
00394 float * uc_loc = myvector(nangloc*nx*ny);
00395 float alpha, alpha_loc, beta, beta_loc;
00396 float * Vk = myvector(nnzloc);
00397 int ierr;
00398 float phi, theta, psi;
00399 float dm[8];
00400
00401 MPI_Status mpistatus;
00402 int ROW = 0, COL = 1;
00403 int my2dpid;
00404 int mycoords[2], dims[2], periods[2];
00405 int srpid, srcoords[2];
00406
00407
00408 MPI_Cart_get(comm_2d, 2, dims, periods, mycoords);
00409 MPI_Comm_rank(comm_2d, &my2dpid);
00410
00411 int nsym = 0;
00412 Transform3D RA;
00413 Transform3D Tf;
00414 nsym = Tf.get_nsym(symmetry);
00415 Transform3D::EulerType EULER_SPIDER = Transform3D::SPIDER;
00416 Dict angdict;
00417
00418 get_col(Vloc, nnzloc, k-1, Vk);
00419 if (k == 0){
00420 for ( int i = 0 ; i < nangloc ; ++i ) {
00421
00422 RA = Transform3D(EULER_SPIDER, angleshift[5*i + 0], angleshift[5*i + 1], angleshift[5*i + 2]);
00423 dm[6] = angleshift[5*i + 3] * -1.0;
00424 dm[7] = angleshift[5*i + 4] * -1.0;
00425 for ( int ns = 1 ; ns < nsym + 1 ; ++ns ) {
00426
00427 Tf = Tf.get_sym(symmetry, ns) * RA;
00428 angdict = Tf.get_rotation(EULER_SPIDER);
00429
00430 phi = (float) angdict["phi"] * PI/180.0;
00431 theta = (float) angdict["theta"] * PI/180.0;
00432 psi = (float) angdict["psi"] * PI/180.0;
00433 make_proj_mat(phi, theta, psi, dm);
00434
00435 ierr = bckpj3_Cart(volsize, nraysloc, nnzloc, dm, origin, radius, ptrs, cord, myptrstart, &Ukloc[nx*nx*i], vc_loc);
00436 }
00437 }
00438 ierr = MPI_Allreduce(vc_loc, vc, nnzloc, MPI_FLOAT, MPI_SUM, comm_col);
00439
00440 }
00441 else {
00442 for ( int i = 0 ; i < nangloc ; ++i ) {
00443
00444 RA = Transform3D(EULER_SPIDER, angleshift[5*i + 0], angleshift[5*i + 1], angleshift[5*i + 2]);
00445 dm[6] = angleshift[5*i + 3] * -1.0;
00446 dm[7] = angleshift[5*i + 4] * -1.0;
00447 for ( int ns = 1 ; ns < nsym + 1 ; ++ns ) {
00448
00449 Tf = Tf.get_sym(symmetry, ns) * RA;
00450 angdict = Tf.get_rotation(EULER_SPIDER);
00451
00452 phi = (float) angdict["phi"] * PI/180.0;
00453 theta = (float) angdict["theta"] * PI/180.0;
00454 psi = (float) angdict["psi"] * PI/180.0;
00455 make_proj_mat(phi, theta, psi, dm);
00456
00457 ierr = bckpj3_Cart(volsize, nraysloc, nnzloc, dm, origin, radius, ptrs, cord, myptrstart, &Ukloc[nx*nx*i], vc_loc);
00458 }
00459 }
00460 ierr = MPI_Allreduce(vc_loc, vc, nnzloc, MPI_FLOAT, MPI_SUM, comm_col);
00461
00462 for(int i=0; i<nnzloc; i++)
00463 vc[i] = vc[i] - B[k][k-1]*Vk[i];
00464 }
00465
00466
00467 alpha_loc = 0.0;
00468 for(int i=0; i<nnzloc; i++)
00469 alpha_loc += vc[i]* vc[i];
00470
00471 MPI_Allreduce (&alpha_loc, &alpha, 1, MPI_FLOAT, MPI_SUM, comm_row);
00472 alpha = sqrt(alpha);
00473
00474 for (int i=0; i<nnzloc; i++)
00475 vc[i] = vc[i] / alpha;
00476
00477
00478 for ( int i = 0 ; i < nangloc ; ++i ) {
00479
00480 RA = Transform3D(EULER_SPIDER, angleshift[5*i + 0], angleshift[5*i + 1], angleshift[5*i + 2]);
00481 dm[6] = angleshift[5*i + 3] * -1.0;
00482 dm[7] = angleshift[5*i + 4] * -1.0;
00483 for ( int ns = 1 ; ns < nsym + 1 ; ++ns ) {
00484
00485 Tf = Tf.get_sym(symmetry, ns) * RA;
00486 angdict = Tf.get_rotation(EULER_SPIDER);
00487
00488 phi = (float) angdict["phi"] * PI/180.0;
00489 theta = (float) angdict["theta"] * PI/180.0;
00490 psi = (float) angdict["psi"] * PI/180.0;
00491 make_proj_mat(phi, theta, psi, dm);
00492
00493 ierr = fwdpj3_Cart(volsize, nraysloc, nnzloc, dm, origin, radius, ptrs, cord, myptrstart, vc, &uc_loc[nx*nx*i]);
00494 }
00495 }
00496
00497 ierr = MPI_Allreduce(uc_loc, uc, nangloc*nx*nx, MPI_FLOAT, MPI_SUM, comm_row);
00498
00499 for (int i=0; i<nangloc*nx*nx; i++)
00500 uc[i] = uc[i] - alpha*Ukloc[i];
00501
00502
00503 beta_loc = 0.0;
00504 for(int i=0; i<nangloc*nx*nx; i++)
00505 beta_loc += uc[i]* uc[i];
00506
00507 MPI_Allreduce (&beta_loc, &beta, 1, MPI_FLOAT, MPI_SUM, comm_col);
00508 beta = sqrt(beta);
00509
00510 for (int i=0; i<nangloc*nx*nx; i++)
00511 uc[i] = uc[i] / beta;
00512
00513 copy_vector(uc, Ukloc,nangloc*nx*nx);
00514 put_col(Vloc, nnzloc, vc, k);
00515
00516 B[k][k] = alpha;
00517 B[k+1][k] = beta;
00518
00519 free_vector(vc);
00520 free_vector(vc_loc);
00521 free_vector(uc);
00522 free_vector(uc_loc);
00523 free_vector(Vk);
00524 }
00525
00526
00527
00528 float findomega(float * bhat, float *s, int m, int n, int insolve)
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545 {
00546 int i;
00547 float alpha, t0, t1, t3, t4, t5, v2, omega;
00548 float * s2 = myvector(n);
00549 float * tt = myvector(n);
00550 float * t2 = myvector(n);
00551 float * v1 = myvector(n);
00552 float * hold = myvector(m-n);
00553 float * hold2 = myvector(n);
00554 float * hold3 = myvector(n);
00555 float * hold4 = myvector(n);
00556 float * hold5 = myvector(n);
00557 float * hold6 = myvector(n);
00558
00559 if(insolve ==0){
00560 int kopt = n;
00561 omega = (m*bhat[kopt-1]*bhat[kopt-1])/(kopt*bhat[kopt-1]*bhat[kopt-1] +2*bhat[kopt]*bhat[kopt]);
00562 } else {
00563
00564
00565
00566 alpha = s[n-1];
00567
00568
00569
00570
00571 for (i = 0; i < m-n; i++)
00572 hold[i] = fabs(bhat[n+i])*fabs(bhat[n+i]);
00573
00574 t0 = sum_vector(hold, m-n);
00575
00576 for (i=0; i<n ; i++){
00577 s2[i] = fabs(s[i]) * fabs(s[i]);
00578 tt[i] = (float)1.0 / (s2[i] + alpha*alpha);
00579 hold2[i] = s2[i]* tt[i];
00580 }
00581
00582 t1 = sum_vector(hold2,n);
00583 for (i=0; i<n; i++){
00584 t2[i] = fabs(bhat[i]*alpha*s[i]) * fabs(bhat[i]*alpha*s[i]);
00585 hold3[i] = t2[i] * fabs(tt[i]*tt[i]*tt[i]);
00586 hold4[i] = (s[i]*tt[i])*(s[i]*tt[i]);
00587 hold5[i] = fabs(alpha*alpha*bhat[i]*tt[i])*(fabs(alpha*alpha*bhat[i]*tt[i]));
00588 v1[i] = fabs(bhat[i]*s[i])*fabs(bhat[i]*s[i]);
00589 hold6[i]= v1[i] * fabs(tt[i]*tt[i]*tt[i]);
00590 }
00591 t3 = sum_vector(hold3,n);
00592 t4 = sum_vector(hold4,n);
00593 t5 = sum_vector(hold5,n);
00594
00595 v2 = sum_vector(hold6,n);
00596
00597
00598
00599 omega = ((float)m*alpha*alpha*v2) / (t1*t3 + t4*(t5 + t0));
00600 }
00601 free_vector(s2);
00602 free_vector(tt);
00603 free_vector(t2);
00604 free_vector(v1);
00605 free_vector(hold);
00606 free_vector(hold2);
00607 free_vector(hold3);
00608 free_vector(hold4);
00609 free_vector(hold5);
00610 free_vector(hold6);
00611
00612 return omega;
00613 }
00614
00615
00616
00617 float gcvstopfun(float alpha, float * u, float *s, float beta, int k, int m, int n, int insolve)
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630 {
00631 float beta2, alpha2, num, den, G;
00632 float *s2 = myvector(k);
00633 float *t1 = myvector(k);
00634 float *t2 = myvector(k+1);
00635 float *t3 = myvector(k);
00636 int i;
00637
00638 beta2 = beta*beta;
00639 alpha2 = alpha*alpha;
00640
00641 if(insolve == 0){
00642 for(i=(int) alpha; i<k+1; i++){
00643 t2[i] = fabs(u[i])*fabs(u[i]);
00644 }
00645 G = (n*beta2*sum_vector(t2,k+1))/((m-alpha)*(m-alpha));
00646
00647 }else{
00648 for(i=0; i<k; i++)
00649 s2[i] = fabs(s[i]) * fabs(s[i]) ;
00650
00651 for (i=0; i<k; i++){
00652 t1[i] = (float)1.0 / (s2[i] + alpha2);
00653 t2[i] = fabs(alpha2*u[i] * t1[i]) * fabs(alpha2*u[i] * t1[i]);
00654 t3[i] = s2[i] * t1[i];
00655 }
00656
00657 num = n*beta2*(sum_vector(t2,k) + fabs(u[k])*fabs(u[k]));
00658 den = (((float)m - sum_vector(t3,k)))*(((float)m - sum_vector(t3, k)));
00659
00660 G = num / den;
00661 }
00662 free_vector(s2);
00663 free_vector(t1);
00664 free_vector(t2);
00665 free_vector(t3);
00666
00667 return G;
00668 }
00669
00670
00671
00672 void recons3d_CGLS_mpi_Cart(MPI_Comm comm_2d, MPI_Comm comm_row , MPI_Comm comm_col,
00673 EMData ** images, float * angleshift, EMData *& xvol ,
00674 int nangloc , int radius , int maxiter ,
00675 std::string symmetry)
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700 {
00701 MPI_Status mpistatus;
00702 int ROW = 0, COL = 1;
00703 int my2dpid, ierr;
00704 int mycoords[2], dims[2], periods[2];
00705 int srpid, srcoords[2];
00706
00707
00708 MPI_Cart_get(comm_2d, 2, dims, periods, mycoords);
00709 MPI_Comm_rank(comm_2d, &my2dpid);
00710
00711 int * psize;
00712 int * nbase;
00713
00714 int nangles;
00715 psize = new int[dims[ROW]];
00716 nbase = new int[dims[ROW]];
00717 MPI_Allreduce(&nangloc, &nangles, 1, MPI_INT, MPI_SUM, comm_col);
00718
00719 int nsym = 0;
00720
00721 int nx = images[0]->get_xsize();
00722
00723
00724 if ( radius == -1 ) radius = nx/2 - 1;
00725
00726 Vec3i volsize, origin;
00727 volsize[0] = nx;
00728 volsize[1] = nx;
00729 volsize[2] = nx;
00730 origin[0] = nx/2+1;
00731 origin[1] = nx/2+1;
00732 origin[2] = nx/2+1;
00733
00734
00735 std::vector<float> symangles(3,0.0);
00736
00737
00738
00739 int nrays, nnz;
00740 ierr = getnnz(volsize, radius, origin, &nrays, &nnz);
00741
00742 int nnzloc, nraysloc;
00743 int * ptrs = new int[nrays+1];
00744 int * cord = new int[3*nrays];
00745 int *nnzpart = new int[dims[COL]];
00746 int *nnzbase = new int[dims[COL]+1];
00747 int *ptrstart = new int[dims[COL]+1];
00748
00749 ierr = getcb2sph(volsize, radius, origin, nnz, ptrs, cord);
00750 nnzloc = setpart_gr1(comm_2d, nnz, nnzpart, nnzbase);
00751 nraysloc = sphpart(comm_2d, nrays, ptrs, nnzbase, ptrstart);
00752
00753 int myptrstart = ptrstart[mycoords[COL]];
00754 int nnzall[dims[COL]];
00755 for (int i = 0; i<dims[COL]; i++)
00756 nnzall[i] = ptrs[ptrstart[i+1]] - ptrs[ptrstart[i]];
00757
00758 nnzloc = nnzall[mycoords[COL]];
00759
00760 int m = nx*nx*nangles;
00761 int n = nnz;
00762
00763 int k;
00764 float * trAb_loc = myvector(nnzloc);
00765 float * trAb = myvector(nnzloc);
00766 float * q_loc = myvector(nx*nx*nangloc);
00767 float * q = myvector(nx*nx*nangloc);
00768 float * s = myvector(nx*nx*nangloc);
00769 float * x = myvector(nnzloc);
00770 float * r_loc = myvector(nnzloc);
00771 float * r = myvector(nnzloc);
00772 float * p = myvector(nnzloc);
00773 float nrm_trAb_loc, nrm_trAb, tol, gamma_loc, gamma, beta, oldgamma, nq_loc, nq, alpha;
00774 float phi, theta, psi;
00775 float dm[8];
00776 float * Rnrm = myvector(maxiter+1);
00777
00778 for (int i=0; i< nnzloc; i++){
00779 trAb_loc [i] = 0.0;
00780 trAb [i] = 0.0;
00781 x[i] = 0.0;
00782 r_loc[i] = 0.0;
00783 r[i] = 0.0;
00784 p[i] = 0.0;
00785 }
00786 for (int i=0; i< nx*nx*nangloc; i++){
00787 q_loc[i] = 0.0;
00788 q[i] = 0.0;
00789 s[i] = 0.0;
00790 }
00791
00792 EMData * current_image;
00793 Transform3D RA;
00794 Transform3D Tf;
00795 nsym = Tf.get_nsym(symmetry);
00796 Transform3D::EulerType EULER_SPIDER = Transform3D::SPIDER;
00797 Dict angdict;
00798
00799 float * image_data;
00800
00801
00802 for (int i=0; i<nangloc; i++){
00803 current_image = images[i];
00804 image_data = current_image->get_data();
00805
00806
00807 phi = angleshift[5*i + 0];
00808 theta = angleshift[5*i + 1];
00809 psi = angleshift[5*i + 2];
00810 dm[6] = angleshift[5*i + 3] * -1.0;
00811 dm[7] = angleshift[5*i + 4] * -1.0;
00812
00813 RA = Transform3D(EULER_SPIDER, phi, theta, psi);
00814 for ( int ns = 1 ; ns < nsym + 1 ; ++ns ) {
00815
00816
00817 Tf = Tf.get_sym(symmetry, ns) * RA;
00818 angdict = Tf.get_rotation(EULER_SPIDER);
00819 phi = (float) angdict["phi"] * PI/180.0;
00820 theta = (float) angdict["theta"] * PI/180.0;
00821 psi = (float) angdict["psi"] * PI/180.0;
00822 make_proj_mat(phi, theta, psi, dm);
00823
00824 ierr = bckpj3_Cart(volsize, nraysloc, nnzloc, dm, origin, radius, ptrs, cord,
00825 myptrstart, image_data, trAb_loc);
00826 }
00827 }
00828
00829 ierr = MPI_Allreduce (trAb_loc, trAb, nnzloc, MPI_FLOAT, MPI_SUM, comm_col);
00830
00831 nrm_trAb_loc = 0.0;
00832 for(int i=0; i<nnzloc; i++)
00833 nrm_trAb_loc += trAb[i]*trAb[i];
00834
00835 ierr = MPI_Allreduce (&nrm_trAb_loc, &nrm_trAb, 1, MPI_FLOAT, MPI_SUM, comm_row);
00836
00837 nrm_trAb = sqrt(nrm_trAb);
00838 tol = sqrt(FLT_EPSILON)*nrm_trAb;
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855 for (int i=0; i<nangloc; i++){
00856 current_image = images[i];
00857 image_data = current_image->get_data();
00858 for (int j=0; j<nx*nx; j++)
00859 s[i*nx*nx+j] = image_data[j]- s[i*nx*nx+j];
00860 }
00861
00862
00863 for (int i=0; i<nangloc; i++){
00864 phi = angleshift[5*i + 0];
00865 theta = angleshift[5*i + 1];
00866 psi = angleshift[5*i + 2];
00867 dm[6] = angleshift[5*i + 3] * -1.0;
00868 dm[7] = angleshift[5*i + 4] * -1.0;
00869
00870 RA = Transform3D(EULER_SPIDER, phi, theta, psi);
00871 for ( int ns = 1 ; ns < nsym + 1 ; ++ns ) {
00872
00873
00874 Tf = Tf.get_sym(symmetry, ns) * RA;
00875 angdict = Tf.get_rotation(EULER_SPIDER);
00876 phi = (float) angdict["phi"] * PI/180.0;
00877 theta = (float) angdict["theta"] * PI/180.0;
00878 psi = (float) angdict["psi"] * PI/180.0;
00879 make_proj_mat(phi, theta, psi, dm);
00880
00881 ierr = bckpj3_Cart(volsize, nraysloc, nnzloc, dm, origin, radius, ptrs, cord,
00882 myptrstart, &s[nx*nx*i], r_loc);
00883 }
00884 }
00885
00886 ierr = MPI_Allreduce (r_loc, r, nnzloc, MPI_FLOAT, MPI_SUM, comm_col);
00887
00888 gamma_loc = 0.0;
00889 for(int i=0; i<nnzloc; i++)
00890 gamma_loc += r[i]*r[i];
00891
00892 ierr = MPI_Allreduce (&gamma_loc, &gamma, 1, MPI_FLOAT, MPI_SUM, comm_row);
00893 Rnrm[0] = sqrt(gamma);
00894 if (my2dpid==0) printf("Iteration %d, Rnorm = %f\n", 1, Rnrm[0]/nrm_trAb);
00895
00896 k = 0;
00897
00898
00899
00900 double t0;
00901 t0 = MPI_Wtime();
00902 while (Rnrm[k]>tol && k<= maxiter-1){
00903 k++;
00904 if(k==1){
00905 for(int i=0;i<nnzloc; i++)
00906 p[i] = r[i];
00907 }else{
00908 beta = gamma / oldgamma;
00909 for(int i=0;i<nnzloc; i++)
00910 p[i] = r[i]+beta*p[i];
00911 }
00912
00913 for (int i=0; i < nx*nx*nangloc; i++)
00914 q_loc[i] = 0.0;
00915
00916 for (int i=0; i < nangloc; i++){
00917
00918 RA = Transform3D(EULER_SPIDER, angleshift[5*i + 0], angleshift[5*i + 1], angleshift[5*i + 2]);
00919 dm[6] = angleshift[5*i + 3] * -1.0;
00920 dm[7] = angleshift[5*i + 4] * -1.0;
00921 for ( int ns = 1 ; ns < nsym + 1 ; ++ns ) {
00922
00923 Tf = Tf.get_sym(symmetry, ns) * RA;
00924 angdict = Tf.get_rotation(EULER_SPIDER);
00925
00926 phi = (float) angdict["phi"] * PI/180.0;
00927 theta = (float) angdict["theta"] * PI/180.0;
00928 psi = (float) angdict["psi"] * PI/180.0;
00929 make_proj_mat(phi, theta, psi, dm);
00930
00931 ierr = fwdpj3_Cart(volsize, nraysloc, nnzloc, dm, origin, radius, ptrs, cord,
00932 myptrstart, p, &q_loc[nx*nx*i]);
00933 }
00934 }
00935
00936 ierr = MPI_Allreduce(q_loc, q, nangloc*nx*nx, MPI_FLOAT, MPI_SUM, comm_row);
00937
00938 nq_loc = 0.0;
00939 for(int i=0; i< nangloc*nx*nx; i++)
00940 nq_loc += q[i]*q[i];
00941
00942 MPI_Allreduce (&nq_loc, &nq, 1, MPI_FLOAT, MPI_SUM, comm_col);
00943
00944 alpha = gamma/nq;
00945
00946 for(int i=0; i<nnzloc; i++){
00947 x[i] = x[i]+alpha*p[i];
00948 }
00949
00950 for(int i=0; i<nangloc*nx*nx; i++)
00951 s[i] = s[i]- alpha*q[i];
00952
00953
00954 for(int i=0; i< nnzloc; i++)
00955 r_loc[i] = 0.0;
00956
00957 for (int i=0; i<nangloc; i++){
00958
00959 RA = Transform3D(EULER_SPIDER, angleshift[5*i + 0], angleshift[5*i + 1], angleshift[5*i + 2]);
00960 dm[6] = angleshift[5*i + 3] * -1.0;
00961 dm[7] = angleshift[5*i + 4] * -1.0;
00962 for ( int ns = 1 ; ns < nsym + 1 ; ++ns ) {
00963
00964 Tf = Tf.get_sym(symmetry, ns) * RA;
00965 angdict = Tf.get_rotation(EULER_SPIDER);
00966
00967 phi = (float) angdict["phi"] * PI/180.0;
00968 theta = (float) angdict["theta"] * PI/180.0;
00969 psi = (float) angdict["psi"] * PI/180.0;
00970 make_proj_mat(phi, theta, psi, dm);
00971
00972 ierr = bckpj3_Cart(volsize, nraysloc, nnzloc, dm, origin, radius, ptrs, cord,
00973 myptrstart, &s[nx*nx*i], r_loc);
00974 }
00975 }
00976 ierr = MPI_Allreduce(r_loc, r, nnzloc, MPI_FLOAT, MPI_SUM, comm_col);
00977
00978 oldgamma = gamma;
00979 gamma_loc = 0.0;
00980 for(int i=0; i< nnzloc; i++)
00981 gamma_loc += r[i]*r[i];
00982
00983 MPI_Allreduce (&gamma_loc, &gamma, 1, MPI_FLOAT, MPI_SUM, comm_row);
00984
00985 Rnrm[k] = sqrt(gamma);
00986 if(k!=1)
00987 if (my2dpid ==0) printf("Iteration %d, Rnorm = %f\n", k, Rnrm[k]/nrm_trAb);
00988 }
00989 if (my2dpid == 0)printf("Exit CGLS: time = %11.3e\n", MPI_Wtime()-t0);
00990
00991 if (mycoords[ROW] == 0 && mycoords[COL] != 0 ){
00992 srcoords[ROW] = 0;
00993 srcoords[COL] = 0;
00994 MPI_Cart_rank(comm_2d, srcoords, &srpid);
00995 MPI_Send(x, nnzloc, MPI_FLOAT, srpid, my2dpid, comm_2d);
00996 }
00997
00998 if (mycoords[ROW] == 0 && mycoords[COL] == 0 ){
00999 xvol->set_size(nx, nx, nx);
01000 xvol->to_zero();
01001 float * voldata = xvol->get_data();
01002 float * xvol_sph = new float[nnz];
01003
01004 for(int i=0; i< dims[COL]; i++){
01005 if (i==0){
01006 for(int i=0; i<nnzloc; i++) xvol_sph[i] = x[i];
01007 }
01008 else {
01009 srcoords[ROW] = 0;
01010 srcoords[COL] = i;
01011 MPI_Cart_rank(comm_2d, srcoords, &srpid);
01012 MPI_Recv(&xvol_sph[ptrs[ptrstart[i]]-1], nnzall[i], MPI_FLOAT, srpid, srpid,
01013 comm_2d, &mpistatus);
01014 }
01015 }
01016
01017 ierr = sph2cb(xvol_sph, volsize, nrays, radius, nnz, ptrs, cord, voldata);
01018 EMDeleteArray(xvol_sph);
01019 }
01020
01021 EMDeleteArray(ptrs);
01022 EMDeleteArray(cord);
01023 EMDeleteArray(psize);
01024 EMDeleteArray(nbase);
01025 EMDeleteArray(nnzpart);
01026 EMDeleteArray(nnzbase);
01027 EMDeleteArray(ptrstart);
01028
01029 free_vector(x);
01030 free_vector(trAb_loc);
01031 free_vector(trAb);
01032 free_vector(s);
01033 free_vector(q_loc);
01034 free_vector(q);
01035 free_vector(r_loc);
01036 free_vector(r);
01037 free_vector(p);
01038 free_vector(Rnrm);
01039 }