00001 #include "mpi.h"
00002 #include "emdata.h"
00003 #include "project3d.h"
00004 #include "fgcalc.h"
00005 #include "utilcomm.h"
00006
00007 #define gradloc(i) gradloc[(i)]
00008 #define rhs(i,j) rhs[((j)-1)*nx*ny + (i) - 1]
00009 #define rvec(i) rvec[(i) - 1]
00010 #define dt 1.0e-4
00011
00012 int fgcalc(MPI_Comm comm, float *volsph, Vec3i volsize,
00013 int nnz, int nrays, Vec3i origin, int ri,
00014 int *ptrs, int *cord, float *angtrs, int nang,
00015 float *rhs, float aba, NUMBER *fval, float *grad, char * fname_base)
00016 {
00017 int mypid, ncpus, nvars;
00018 int *psize, *nbase;
00019
00020 int nangloc, nx, ny, nz, jglb, ierr, ibeg;
00021 float phi, theta, psi, sx, sy;
00022 NUMBER cphi, sphi, cthe, sthe, cpsi, spsi;
00023 float dm[8];
00024 float *rvec, *gvec1, *gvec2, *gvec3, *gvec4, *gvec5, *gradloc;
00025 NUMBER fvalloc= 0.0;
00026
00027 MPI_Comm_rank(comm,&mypid);
00028 MPI_Comm_size(comm,&ncpus);
00029
00030 nvars = nnz + nang*5;
00031 *fval = 0.0;
00032 for (int i = 0; i < nvars; i++) grad[i]=0.0;
00033
00034 nx = volsize[0];
00035 ny = volsize[1];
00036 nz = volsize[2];
00037
00038 psize = new int[ncpus];
00039 nbase = new int[ncpus];
00040 rvec = new float[nx*ny];
00041 gvec1 = new float[nx*ny];
00042 gvec2 = new float[nx*ny];
00043 gvec3 = new float[nx*ny];
00044 gvec4 = new float[nx*ny];
00045 gvec5 = new float[nx*ny];
00046 gradloc = new float[nvars];
00047
00048
00049
00050
00051 float ref_param;
00052
00053
00054
00055 for ( int i = 0 ; i < nvars ; ++i ) {
00056 gradloc[i] = 0.0;
00057 }
00058
00059 std::ofstream res_out;
00060 char out_fname[64];
00061 sprintf(out_fname, "res_%s.dat", fname_base);
00062 res_out.open(out_fname);
00063
00064 nangloc = setpart(comm, nang, psize, nbase);
00065 float * img_res = new float[nangloc];
00066
00067 dm[6] = 0.0;
00068 dm[7] = 0.0;
00069
00070 for (int j = 0; j < nangloc; j++) {
00071 img_res[j] = 0.0;
00072 for (int i = 0; i<nx*ny; i++) {
00073 rvec[i] = 0.0;
00074 gvec1[i] = 0.0;
00075 gvec2[i] = 0.0;
00076 gvec3[i] = 0.0;
00077 gvec4[i] = 0.0;
00078 gvec5[i] = 0.0;
00079 }
00080
00081 jglb = nbase[mypid] + j;
00082
00083 phi = angtrs[5*jglb+0];
00084 theta = angtrs[5*jglb+1];
00085 psi = angtrs[5*jglb+2];
00086 sx = angtrs[5*jglb+3];
00087 sy = angtrs[5*jglb+4];
00088
00089 cphi=cos(phi);
00090 sphi=sin(phi);
00091 cthe=cos(theta);
00092 sthe=sin(theta);
00093 cpsi=cos(psi);
00094 spsi=sin(psi);
00095
00096
00097
00098
00099
00100 dm[0]=cphi*cthe*cpsi-sphi*spsi;
00101 dm[1]=sphi*cthe*cpsi+cphi*spsi;
00102 dm[2]=-sthe*cpsi;
00103 dm[3]=-cphi*cthe*spsi-sphi*cpsi;
00104 dm[4]=-sphi*cthe*spsi+cphi*cpsi;
00105 dm[5]=sthe*spsi;
00106 dm[6]=sx;
00107 dm[7]=sy;
00108
00109 ierr = fwdpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord, volsph, rvec);
00110
00111
00112 ref_param = phi;
00113 phi = phi + dt;
00114
00115 cphi = cos(phi);
00116 sphi = sin(phi);
00117 phi = ref_param;
00118
00119 dm[0]=cphi*cthe*cpsi-sphi*spsi;
00120 dm[1]=sphi*cthe*cpsi+cphi*spsi;
00121 dm[2]=-sthe*cpsi;
00122 dm[3]=-cphi*cthe*spsi-sphi*cpsi;
00123 dm[4]=-sphi*cthe*spsi+cphi*cpsi;
00124 dm[5]=sthe*spsi;
00125
00126 ierr = fwdpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord,
00127 volsph, gvec1);
00128
00129
00130
00131 cphi = cos(phi);
00132 sphi = sin(phi);
00133
00134
00135
00136 for (int i = 0; i<nx*ny; i++) gvec1[i] = (gvec1[i] - rvec[i])/dt;
00137
00138
00139 ref_param = theta;
00140 theta = theta + dt;
00141 cthe = cos(theta);
00142 sthe = sin(theta);
00143
00144 theta = ref_param;
00145
00146 dm[0]=cphi*cthe*cpsi-sphi*spsi;
00147 dm[1]=sphi*cthe*cpsi+cphi*spsi;
00148 dm[2]=-sthe*cpsi;
00149 dm[3]=-cphi*cthe*spsi-sphi*cpsi;
00150 dm[4]=-sphi*cthe*spsi+cphi*cpsi;
00151 dm[5]=sthe*spsi;
00152
00153 ierr = fwdpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord,
00154 volsph, gvec2);
00155
00156
00157
00158 cthe = cos(theta);
00159 sthe = sin(theta);
00160
00161
00162
00163 for (int i = 0; i<nx*ny; i++) gvec2[i] = (gvec2[i]-rvec[i])/dt;
00164
00165
00166 ref_param = psi;
00167 psi = psi + dt;
00168 cpsi = cos(psi);
00169 spsi = sin(psi);
00170
00171 psi = ref_param;
00172
00173 dm[0]=cphi*cthe*cpsi-sphi*spsi;
00174 dm[1]=sphi*cthe*cpsi+cphi*spsi;
00175 dm[2]=-sthe*cpsi;
00176 dm[3]=-cphi*cthe*spsi-sphi*cpsi;
00177 dm[4]=-sphi*cthe*spsi+cphi*cpsi;
00178 dm[5]=sthe*spsi;
00179
00180 ierr = fwdpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord,
00181 volsph, gvec3);
00182
00183
00184
00185 cpsi = cos(psi);
00186 spsi = sin(psi);
00187
00188
00189 for (int i = 0; i < nx*ny; i++) gvec3[i] = (gvec3[i] - rvec[i])/dt;
00190
00191
00192 dm[0]=cphi*cthe*cpsi-sphi*spsi;
00193 dm[1]=sphi*cthe*cpsi+cphi*spsi;
00194 dm[2]=-sthe*cpsi;
00195 dm[3]=-cphi*cthe*spsi-sphi*cpsi;
00196 dm[4]=-sphi*cthe*spsi+cphi*cpsi;
00197 dm[5]=sthe*spsi;
00198
00199
00200
00201 ref_param = dm[6];
00202 dm[6] += dt;
00203
00204 ierr = fwdpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord,
00205 volsph, gvec4);
00206
00207
00208 dm[6] = ref_param;
00209
00210
00211 for (int i = 0; i < nx*ny; i++) gvec4[i] = (gvec4[i]-rvec[i])/dt;
00212
00213 ref_param = dm[7];
00214 dm[7] += dt;
00215
00216
00217 ierr = fwdpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord,
00218 volsph, gvec5);
00219
00220
00221 dm[7] = ref_param;
00222
00223
00224 for (int i = 0; i < nx*ny; i++) gvec5[i] = (gvec5[i]-rvec[i])/dt;
00225
00226
00227 for (int i = 1; i <= nx*ny; i++) {
00228 rvec(i) = rvec(i) - rhs(i,j+1);
00229 }
00230
00231 ierr = bckpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord,
00232 rvec, gradloc);
00233
00234
00235
00236
00237
00238 ibeg = nnz + 5*(jglb) - 1;
00239 for (int i = 0; i < nx*ny; i++) {
00240
00241 fvalloc += rvec[i]*rvec[i];
00242 img_res[j] += rvec[i] * rvec[i];
00243 gradloc(ibeg+1) += rvec[i]*gvec1[i];
00244 gradloc(ibeg+2) += rvec[i]*gvec2[i];
00245 gradloc(ibeg+3) += rvec[i]*gvec3[i];
00246 gradloc(ibeg+4) += rvec[i]*gvec4[i];
00247 gradloc(ibeg+5) += rvec[i]*gvec5[i];
00248 }
00249
00250
00251 }
00252
00253 ierr = MPI_Allreduce(gradloc, grad, nvars, MPI_FLOAT, MPI_SUM, comm);
00254 ierr = MPI_Allreduce(&fvalloc, fval, 1, MPI_FLOAT, MPI_SUM, comm);
00255
00256
00257 MPI_Status mpistatus;
00258 if (mypid == 0) {
00259 for ( int j = 0 ; j < psize[0] ; ++j ) {
00260 res_out << std::scientific << img_res[j] << std::endl;
00261 }
00262 for ( int j = 1 ; j < ncpus ; ++j ) {
00263 ierr = MPI_Recv(img_res, psize[j], MPI_FLOAT, j, j, comm, &mpistatus);
00264 for ( int i = 0 ; i < psize[j] ; ++i ) {
00265 res_out << std::scientific << img_res[i] << std::endl;
00266 }
00267 }
00268 } else {
00269 ierr = MPI_Send(img_res, nangloc, MPI_FLOAT, 0, mypid, comm);
00270 }
00271
00272
00273 res_out.close();
00274
00275 EMDeleteArray(psize);
00276 EMDeleteArray(nbase);
00277 EMDeleteArray(rvec);
00278 EMDeleteArray(gvec1);
00279 EMDeleteArray(gvec2);
00280 EMDeleteArray(gvec3);
00281 EMDeleteArray(gvec4);
00282 EMDeleteArray(gvec5);
00283 EMDeleteArray(gradloc);
00284 EMDeleteArray(img_res);
00285
00286 return 0;
00287 }
00288
00289
00290 int fcalc(float *volsph, Vec3i volsize,
00291 int nnz, int nrays, Vec3i origin, int ri,
00292 int *ptrs, int *cord, float *angtrs, int nang,
00293 float *rhs, NUMBER *fval)
00294 {
00295 int nx, ny, nz, jglb, ierr;
00296 float phi, theta, psi, sx, sy;
00297 NUMBER cphi, sphi, cthe, sthe, cpsi, spsi;
00298 float dm[8];
00299 float * rvec;
00300 *fval = 0.0;
00301
00302 nx = volsize[0];
00303 ny = volsize[1];
00304 nz = volsize[2];
00305
00306 rvec = new float[nx*ny];
00307
00308 for (int j = 0; j < nang; j++) {
00309 for (int i = 0; i<nx*ny; i++) {
00310 rvec[i] = 0.0;
00311 }
00312
00313
00314 phi = angtrs[5*j+0];
00315 theta = angtrs[5*j+1];
00316 psi = angtrs[5*j+2];
00317 sx = angtrs[5*j+3];
00318 sy = angtrs[5*j+4];
00319
00320 cphi=cos(phi);
00321 sphi=sin(phi);
00322 cthe=cos(theta);
00323 sthe=sin(theta);
00324 cpsi=cos(psi);
00325 spsi=sin(psi);
00326
00327 dm[0]=cphi*cthe*cpsi-sphi*spsi;
00328 dm[1]=sphi*cthe*cpsi+cphi*spsi;
00329 dm[2]=-sthe*cpsi;
00330 dm[3]=-cphi*cthe*spsi-sphi*cpsi;
00331 dm[4]=-sphi*cthe*spsi+cphi*cpsi;
00332 dm[5]=sthe*spsi;
00333 dm[6]=sx;
00334 dm[7]=sy;
00335
00336 ierr = fwdpj3(volsize, nrays, nnz, dm, origin, ri, ptrs, cord, volsph, rvec);
00337
00338
00339 for (int i = 1; i <= nx*ny; i++) {
00340 rvec(i) = rvec(i) - rhs(i,j+1);
00341 }
00342
00343
00344 for (int i = 0; i < nx*ny; i++) {
00345 *fval += rvec[i]*rvec[i];
00346 }
00347 }
00348
00349 EMDeleteArray(rvec);
00350
00351 return 0;
00352 }