00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 #include <ctime>
00046
00047 #include "emdata.h"
00048 #include <sstream>
00049 using std::stringstream;
00050
00051 #include <iostream>
00052 using std::cout;
00053 using std::endl;
00054
00055 using namespace EMAN;
00056
00057 #define CPS (CLOCKS_PER_SEC)
00058
00059
00060 int main(int argc, char *argv[])
00061 {
00062 int SIZE = 96;
00063 int NTT = 500;
00064
00065 int slow = 0;
00066 int low = 0;
00067 int big = 0;
00068 int newali = 0;
00069 int vg = 0;
00070
00071
00072
00073
00074 if (argc > 1) {
00075 if (Util::sstrncmp(argv[1], "slow"))
00076 slow = 1;
00077 else if (Util::sstrncmp(argv[1], "best"))
00078 slow = 3;
00079 else if (Util::sstrncmp(argv[1], "low"))
00080 low = 1;
00081 else if (Util::sstrncmp(argv[1], "refine"))
00082 newali = 1;
00083 else if (Util::sstrncmp(argv[1], "big"))
00084 big = 1;
00085 else if (Util::sstrncmp(argv[1], "valgrind"))
00086 vg=1;
00087
00088
00089
00090
00091
00092
00093 }
00094 if (argc > 2) {
00095
00096 stringstream ss;
00097 ss << argv[2];
00098 ss >> SIZE;
00099 if (SIZE < 0 || SIZE > 1024) {
00100 cout << "The boxsize " << SIZE << " is invalid. Please specify a box size in the range of (0,1024]" << endl;
00101 throw;
00102 }
00103 }
00104
00105
00106
00107
00108 if (vg) {
00109 EMData *a = new EMData();
00110 a->set_size(128,128,1);
00111 a->process_inplace("testimage.scurve");
00112 EMData *b = a->rot_scale_trans2D(0.0,1.0,1.0,0.0);
00113 EMData *d = b->align("rotate_translate",a,Dict(),"sqeuclidean");
00114 Dict r=b->get_attr_dict();
00115 printf("%p\n",&r);
00116 printf("nx = %d\n",(int)r["nx"]);
00117 delete a;
00118 delete b;
00119 delete d;
00120 exit(0);
00121 }
00122
00123 printf("This could take a few minutes. Please be patient.\n");
00124 if (big) {
00125 NTT = 100;
00126 SIZE = 320;
00127 }
00128
00129 printf("Initializing\n");
00130
00131 EMData pat;
00132 pat.set_size(SIZE, SIZE, 1);
00133 float *d = pat.get_data();
00134
00135 for (int i = -SIZE / 2; i < SIZE / 2; i++) {
00136 for (int j = -SIZE / 2; j < SIZE / 2; j++) {
00137 d[(i + SIZE / 2) + (j + SIZE / 2) * SIZE] =
00138 -3.0f * exp(-Util::square((fabs((float)i) + fabs((float)j))) / 10.0f) +
00139 exp(-Util::square((fabs((float)i) + fabs((float)j) / 2.0f)) / 100.0f) *
00140 (abs(i) < 2 ? 2.0f : 1.0f);
00141 }
00142 }
00143 pat.update();
00144 pat.process_inplace("normalize.circlemean");
00145 pat.process_inplace("mask.sharp", Dict("outer_radius", pat.get_xsize()/2));
00146
00147 EMData *data[5000];
00148
00149 for (int i = 0; i < NTT; i++) {
00150 data[i] = pat.copy();
00151 float *d = data[i]->get_data();
00152
00153 for (int j = 0; j < SIZE * SIZE; j++) {
00154 d[j] += Util::get_gauss_rand(0, 1.0);
00155 }
00156 data[i]->update();
00157 data[i]->process_inplace("normalize.circlemean");
00158 data[i]->process_inplace("mask.sharp", Dict("outer_radius", data[i]->get_xsize()/2));
00159
00160
00161
00162
00163 }
00164
00165 if (low) {
00166 printf("Low level tests starting. Please note that compiling with optimization may invalidate certain tests. Also note that overhead is difficult to compensate for, so in most cases it is not dealt with.\n");
00167
00168 float t1 = (float) clock();
00169 for (float fj = 0; fj < 500.0; fj += 1.0) {
00170 for (int i = 0; i < NTT / 2.0; i += 1)
00171 data[i]->dot(data[i + NTT / 2]);
00172 }
00173
00174 float t2 = (float) clock();
00175 float ti = (t2 - t1) / (float) CPS;
00176 printf("Baseline 1: %d, %d x %d dot()s in %1.1f sec %1.1f/sec-> ~%1.2f mflops\n",
00177 500 * NTT / 2, SIZE, SIZE, ti, 500 * NTT / (2.0 * ti),
00178 SIZE * SIZE * 4 * 500.0 * NTT / (1000000.0 * ti));
00179
00180 t1 = (float) clock();
00181 for (float fj = 0; fj < 500.0; fj += 1.0) {
00182 for (float fi = 0; fi < NTT / 2.0; fi += 1.0)
00183 data[1]->dot(data[12]);
00184 }
00185 t2 = (float) clock();
00186 ti = (t2 - t1) / (float) CPS;
00187 printf("Baseline 1a: %d, %d x %d optimized cached dot()s in %1.1f s %1.1f/s-> ~%1.2f mflops\n",
00188 500 * NTT / 2, SIZE, SIZE, ti, 500 * NTT / (2.0 * ti),
00189 SIZE * SIZE * 4 * 500.0 * NTT / (1000000.0 * ti));
00190
00191 t1 = (float) clock();
00192 for (int j = 0; j < 500; j++) {
00193 for (int i = 0; i < NTT / 2; i++) {
00194 Dict d;
00195 d["keepzero"] = 1;
00196 data[i]->cmp("sqeuclidean", data[i + NTT / 2], d);
00197 }
00198 }
00199 t2 = (float) clock();
00200 ti = (t2 - t1) / (float) CPS;
00201 printf("Baseline 2: %d, %d x %d lcmp()s in %1.1f sec %1.1f/sec\n", 500 * NTT / 2, SIZE,
00202 SIZE, ti, 500 * NTT / (2.0 * ti));
00203
00204 t1 = (float) clock();
00205 for (int j = 0; j < 100; j++) {
00206 for (int i = 0; i < NTT / 2; i++) {
00207 Dict params;
00208 data[i]->cmp("phase", data[i + NTT / 2], params);
00209 }
00210 }
00211 t2 = (float) clock();
00212 ti = (t2 - t1) / (float) CPS;
00213 printf("Baseline 3: %d, %d x %d pcmp()s in %1.1f sec %1.1f/sec\n", 100 * NTT / 2, SIZE,
00214 SIZE, ti, 100 * NTT / (2.0 * ti));
00215
00216 t1 = (float) clock();
00217 for (int j = 0; j < 100; j++) {
00218 for (int i = 0; i < NTT / 2; i++) {
00219 Dict params;
00220 data[i]->cmp("frc", data[i + NTT / 2], params);
00221 }
00222 }
00223 t2 = (float) clock();
00224 ti = (t2 - t1) / (float) CPS;
00225 printf("Baseline 4: %d, %d x %d fscmp()s in %1.1f sec %1.1f/sec\n", 100 * NTT / 2, SIZE,
00226 SIZE, ti, 100 * NTT / (2.0 * ti));
00227
00228 t1 = (float) clock();
00229 for (int j = 0; j < 500; j++) {
00230 for (int i = 0; i < NTT / 2; i++)
00231 data[i]->process_inplace("math.absvalue");
00232 }
00233 t2 = (float) clock();
00234 ti = (t2 - t1) / (float) CPS;
00235 printf("Baseline 5a: %d, %d x %d fabs in %1.1f sec -> ~%1.2f mfabs/sec\n", 500 * NTT / 2,
00236 SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00237
00238 t1 = (float) clock();
00239 for (int j = 0; j < 100; j++) {
00240 for (int i = 0; i < NTT / 2; i++)
00241 data[i]->process_inplace("math.sqrt");
00242 }
00243 t2 = (float) clock();
00244 ti = (t2 - t1) / (float) CPS;
00245 printf("Baseline 5b: %d, %d x %d sqrts in %1.1f sec -> ~%1.2f msqrt/sec\n", 100 * NTT / 2,
00246 SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00247
00248 d = data[0]->get_data();
00249 t1 = (float) clock();
00250 for (int j = 0; j < 100; j++) {
00251 for (int i = 0; i < NTT / 2; i++) {
00252 for (int k = 0; k < SIZE * SIZE; k++)
00253 (void)sqrt(d[k]);
00254 }
00255 }
00256 t2 = (float) clock();
00257 ti = (t2 - t1) / (float) CPS;
00258 printf("Baseline 5c: %d, %d x %d sqrts in %1.1f sec -> ~%1.2f msqrt/sec (cached)\n",
00259 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00260 data[0]->update();
00261
00262 d = data[0]->get_data();
00263 t1 = (float) clock();
00264 for (int j = 0; j < 100; j++) {
00265 for (int i = 0; i < NTT / 2; i++) {
00266 for (int k = 0; k < SIZE * SIZE; k++)
00267 (void)cos(d[k]);
00268 }
00269 }
00270 t2 = (float) clock();
00271 ti = (t2 - t1) / (float) CPS;
00272 printf("Baseline 5d: %d, %d x %d cos in %1.1f sec -> ~%1.2f mcos/sec (cached)\n",
00273 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00274 data[0]->update();
00275
00276 d = data[0]->get_data();
00277 t1 = (float) clock();
00278 for (int j = 0; j < 100; j++) {
00279 for (int i = 0; i < NTT / 2; i++) {
00280 for (int k = 0; k < SIZE * SIZE - 1; k++)
00281 (void)hypot(d[k], d[k + 1]);
00282 }
00283 }
00284 t2 = (float) clock();
00285 ti = (t2 - t1) / (float) CPS;
00286 printf("Baseline 5e: %d, %d x %d hypot in %1.1f sec -> ~%1.2f mhypot/sec (cached)\n",
00287 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00288 data[0]->update();
00289
00290 d = data[0]->get_data();
00291 t1 = (float) clock();
00292 for (int j = 0; j < 1000; j++) {
00293 for (int i = 0; i < NTT / 2; i++) {
00294 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00295 float f = d[k] * d[k + 1];
00296 f = f + f;
00297 }
00298 }
00299 }
00300
00301 t2 = (float) clock();
00302 ti = (t2 - t1) / (float) CPS;
00303 printf("Baseline 5f: %d, %d x %d mult in %1.1f sec -> ~%1.2f mmult/sec (cached)\n",
00304 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 1000.0 * NTT / (1000000.0 * ti));
00305 data[0]->update();
00306
00307 d = data[0]->get_data();
00308 t1 = (float) clock();
00309 for (int j = 0; j < 500; j++) {
00310 for (int i = 0; i < NTT / 2; i++) {
00311 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00312 float a = d[k] / d[k + 1];
00313 a = a + a;
00314 }
00315 }
00316 }
00317 t2 = (float) clock();
00318 ti = (t2 - t1) / (float) CPS;
00319 printf("Baseline 5g: %d, %d x %d div in %1.1f sec -> ~%1.2f mdiv/sec (cached)\n",
00320 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00321 data[0]->update();
00322
00323 d = data[0]->get_data();
00324 t1 = (float) clock();
00325 for (int j = 0; j < 500; j++) {
00326 for (int i = 0; i < NTT / 2; i++) {
00327 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00328 float f = fabs(d[k]);
00329 f = f + f;
00330 }
00331 }
00332 }
00333 t2 = (float) clock();
00334 ti = (t2 - t1) / (float) CPS;
00335 printf("Baseline 5h: %d, %d x %d fabs in %1.1f sec -> ~%1.2f fabs/sec (cached)\n",
00336 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00337 data[0]->update();
00338
00339 d = data[0]->get_data();
00340 t1 = (float) clock();
00341 for (int j = 0; j < 500; j++) {
00342 for (int i = 0; i < NTT / 2; i++) {
00343 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00344 (void)atan2(d[k], d[k + 1]);
00345 (void)hypot(d[k], d[k + 1]);
00346 }
00347 }
00348 }
00349 t2 = (float) clock();
00350 ti = (t2 - t1) / (float) CPS;
00351 printf("Baseline 5i: %d, %d x %d ri2ap in %1.1f sec -> ~%1.2f ri2ap/sec (cached)\n",
00352 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00353
00354 data[0]->update();
00355 t1 = (float) clock();
00356
00357 for (int i = 0; i < NTT * 100; i++) {
00358 EMData *cp = data[i % NTT]->copy();
00359 Dict d("n",2);
00360 cp->process_inplace("math.meanshrink",d);
00361 if( cp )
00362 {
00363 delete cp;
00364 cp = 0;
00365 }
00366 }
00367 t2 = (float) clock();
00368 ti = (t2 - t1) / (float) CPS;
00369 printf("Baseline 6: %1.1f sec %f meanshrink x 2/sec\n", ti, NTT * 100.0 / ti);
00370
00371 EMData *d1a = data[0]->copy();
00372 t1 = (float) clock();
00373
00374 for (int i = 0; i < NTT * 1000; i++) {
00375 d1a->do_fft();
00376 d1a->update();
00377 }
00378 t2 = (float) clock();
00379 ti = (t2 - t1) / (float) CPS;
00380 printf("Baseline 7: %1.1f sec %f ffts/sec\n", ti, NTT * 1000 / ti);
00381
00382 d1a = d1a->copy();
00383 t1 = (float) clock();
00384
00385 for (int i = 0; i < NTT * 1000; i++) {
00386 d1a->translate(-1, -3, 0);
00387 }
00388 t2 = (float) clock();
00389 ti = (t2 - t1) / (float) CPS;
00390 printf("Baseline 8: %1.1f sec %f translates/sec\n", ti, NTT * 1000 / ti);
00391
00392 return 0;
00393 }
00394
00395
00396 EMData *tmp = 0;
00397 float t1 = (float) clock();
00398 for (int i = 0; i < 3; i++) {
00399
00400
00401
00402
00403
00404
00405
00406 for (int j = 5; j < NTT; j++) {
00407 if (slow == 3) {
00408 tmp = data[i]->align("rtf_best", data[j],
00409 Dict("flip", (EMData*)0,"maxshift", SIZE/8));
00410 }
00411 else if (slow == 1) {
00412 Dict d;
00413 d["flip"] = (EMData*)0;
00414 d["maxshift"] = SIZE/8;
00415 tmp = data[i]->align("rtf_slow", data[j], d);
00416 }
00417 else if (newali == 1) {
00418 tmp = data[i]->align("rotate_translate_flip", data[j], Dict());
00419 EMData* tmp2 = tmp->align("refine", data[j], Dict());
00420 delete tmp2;
00421 tmp2 = 0;
00422 }
00423 else {
00424
00425
00426
00427
00428
00429 tmp = data[i]->align("rotate_translate_flip", data[j], Dict());
00430 }
00431 if( tmp )
00432 {
00433 delete tmp;
00434 tmp = 0;
00435 }
00436 if (j % 10 == 0) {
00437 putchar('.');
00438 fflush(stdout);
00439 }
00440 }
00441
00442
00443
00444
00445
00446 putchar('\n');
00447 }
00448 float t2 = (float) clock();
00449
00450 float ti = (t2 - t1) / (float) CPS;
00451 if (slow == 2)
00452 ti *= 10.0;
00453 if (!big && !slow && !newali) {
00454 printf("\nFor comparison (note these numbers may change from release to release)\n");
00455 printf("An AMD Athlon (32 bit) 900Mhz SF -------------------------------- 360\n");
00456 printf("An AMD Athlon XP 2400+ (32 bit) 2.0Ghz SF ----------------------- 1010\n");
00457 printf("An AMD Athlon XP 2600+ (32 bit) 2.0Ghz SF ----------------------- 1090\n");
00458 printf("An AMD Athlon 64 3700+ 2.2Ghz SF -------------------------------- 1530\n");
00459 printf("An AMD Athlon 64 X2 3800+ 2.0Ghz SF ----------------------------- 1578\n");
00460 printf("An AMD Athlon 64 FX-51 2.2Ghz SF -------------------------------- 1760\n");
00461 printf("An AMD Opteron 248 2.2Ghz SF ------------------------------------ 1870\n");
00462 printf("An Intel Core2 T7200 2.0Ghz SF ---------------------------------- 1990\n");
00463 printf("An Intel Xeon E5335 2.0Ghz SF ----------------------------------- 2010\n");
00464 printf("An AMD Opteron 280 2.4Ghz SF ------------------------------------ 2130\n");
00465 printf("An Intel Core2 6700 2.66Ghz SF ---------------------------------- 2600\n");
00466 printf("An Intel Core2 Duo T9400 2.53Ghz SF ----------------------------- 2730\n");
00467 printf("An Intel Xeon E5430 2.66Ghz SF ---------------------------------- 2800\n");
00468 printf("An Intel Xeon X5355 2.66Ghz SF ---------------------------------- 2920\n");
00469 printf("An Intel Xeon X5450 3.0Ghz SF ----------------------------------- 3220\n");
00470 printf("An Intel Xeon X5460 3.16Ghz SF ---------------------------------- 3320\n");
00471 printf("\nYour machines speed factor = %1.1f\n", 25000.0 / ti);
00472 printf("\n\nThis repesents %1.2f RTFAligns/sec\n",
00473 3.0 * ((slow == 2 ? NTT / 10 : NTT) - 5) / ti);
00474 }
00475 else if (big && !slow) {
00476 printf("\nYour machines speed factor = %1.1f\n", 72000.0 / ti);
00477 }
00478 else {
00479 printf("\nYour machines speed factor on this test = %1.1f\n", 25000.0 / ti);
00480 }
00481 return 0;
00482 }
00483
00484