#include <ctime>
#include "emdata.h"
#include <sstream>
#include <iostream>
Include dependency graph for speedtest.cpp:
Go to the source code of this file.
Defines | |
#define | CPS (CLOCKS_PER_SEC) |
Functions | |
int | main (int argc, char *argv[]) |
|
Definition at line 57 of file speedtest.cpp. |
|
Definition at line 60 of file speedtest.cpp. References abs, EMAN::EMData::align(), b, EMAN::EMData::cmp(), EMAN::EMData::copy(), data, EMAN::EMData::do_fft(), EMAN::EMData::dot(), EMAN::EMData::get_attr_dict(), EMAN::EMData::get_data(), EMAN::Util::get_gauss_rand(), get_xsize(), EMAN::EMData::get_xsize(), EMAN::EMData::process_inplace(), EMAN::EMData::rot_scale_trans2D(), EMAN::EMData::set_size(), sqrt(), EMAN::Util::sstrncmp(), EMAN::EMData::translate(), and EMAN::EMData::update(). 00061 { 00062 int SIZE = 96; 00063 int NTT = 500; 00064 00065 int slow = 0; 00066 int low = 0; 00067 int big = 0; 00068 int newali = 0; 00069 int vg = 0; 00070 //#ifdef EMAN2_USING_CUDA 00071 // int gpu = 0; 00072 //#endif 00073 00074 if (argc > 1) { 00075 if (Util::sstrncmp(argv[1], "slow")) 00076 slow = 1; 00077 else if (Util::sstrncmp(argv[1], "best")) 00078 slow = 3; 00079 else if (Util::sstrncmp(argv[1], "low")) 00080 low = 1; 00081 else if (Util::sstrncmp(argv[1], "refine")) 00082 newali = 1; 00083 else if (Util::sstrncmp(argv[1], "big")) 00084 big = 1; 00085 else if (Util::sstrncmp(argv[1], "valgrind")) 00086 vg=1; 00087 //#ifdef EMAN2_USING_CUDA 00088 // else if (Util::sstrncmp(argv[1], "gpu")) { 00089 // cout << "GPU mode selected" << endl; 00090 // gpu=1; 00091 // } 00092 //#endif 00093 } 00094 if (argc > 2) { 00095 // assume it's the boxsize 00096 stringstream ss; 00097 ss << argv[2]; 00098 ss >> SIZE; 00099 if (SIZE < 0 || SIZE > 1024) { 00100 cout << "The boxsize " << SIZE << " is invalid. Please specify a box size in the range of (0,1024]" << endl; 00101 throw; 00102 } 00103 } 00104 00105 // This is some testing code used to run valgrind tests on 00106 // parts of the core library. It is not for benchmarking 00107 // purposes. 00108 if (vg) { 00109 EMData *a = new EMData(); 00110 a->set_size(128,128,1); 00111 a->process_inplace("testimage.scurve"); 00112 EMData *b = a->rot_scale_trans2D(0.0,1.0,1.0,0.0); 00113 EMData *d = b->align("rotate_translate",a,Dict(),"sqeuclidean"); 00114 Dict r=b->get_attr_dict(); 00115 printf("%p\n",&r); 00116 printf("nx = %d\n",(int)r["nx"]); 00117 delete a; 00118 delete b; 00119 delete d; 00120 exit(0); 00121 } 00122 00123 printf("This could take a few minutes. Please be patient.\n"); 00124 if (big) { 00125 NTT = 100; 00126 SIZE = 320; 00127 } 00128 00129 printf("Initializing\n"); 00130 00131 EMData pat; 00132 pat.set_size(SIZE, SIZE, 1); 00133 float *d = pat.get_data(); 00134 00135 for (int i = -SIZE / 2; i < SIZE / 2; i++) { 00136 for (int j = -SIZE / 2; j < SIZE / 2; j++) { 00137 d[(i + SIZE / 2) + (j + SIZE / 2) * SIZE] = 00138 -3.0f * exp(-Util::square((fabs((float)i) + fabs((float)j))) / 10.0f) + 00139 exp(-Util::square((fabs((float)i) + fabs((float)j) / 2.0f)) / 100.0f) * 00140 (abs(i) < 2 ? 2.0f : 1.0f); 00141 } 00142 } 00143 pat.update(); 00144 pat.process_inplace("normalize.circlemean"); 00145 pat.process_inplace("mask.sharp", Dict("outer_radius", pat.get_xsize()/2)); 00146 00147 EMData *data[5000]; 00148 00149 for (int i = 0; i < NTT; i++) { 00150 data[i] = pat.copy(); 00151 float *d = data[i]->get_data(); 00152 00153 for (int j = 0; j < SIZE * SIZE; j++) { 00154 d[j] += Util::get_gauss_rand(0, 1.0); 00155 } 00156 data[i]->update(); 00157 data[i]->process_inplace("normalize.circlemean"); 00158 data[i]->process_inplace("mask.sharp", Dict("outer_radius", data[i]->get_xsize()/2)); 00159 00160 // if (i < 5) { 00161 // data[i]->write_image("speed.hed", i, EMUtil::IMAGE_IMAGIC); 00162 // } 00163 } 00164 00165 if (low) { 00166 printf("Low level tests starting. Please note that compiling with optimization may invalidate certain tests. Also note that overhead is difficult to compensate for, so in most cases it is not dealt with.\n"); 00167 00168 float t1 = (float) clock(); 00169 for (float fj = 0; fj < 500.0; fj += 1.0) { 00170 for (int i = 0; i < NTT / 2.0; i += 1) 00171 data[i]->dot(data[i + NTT / 2]); 00172 } 00173 00174 float t2 = (float) clock(); 00175 float ti = (t2 - t1) / (float) CPS; 00176 printf("Baseline 1: %d, %d x %d dot()s in %1.1f sec %1.1f/sec-> ~%1.2f mflops\n", 00177 500 * NTT / 2, SIZE, SIZE, ti, 500 * NTT / (2.0 * ti), 00178 SIZE * SIZE * 4 * 500.0 * NTT / (1000000.0 * ti)); 00179 00180 t1 = (float) clock(); 00181 for (float fj = 0; fj < 500.0; fj += 1.0) { 00182 for (float fi = 0; fi < NTT / 2.0; fi += 1.0) 00183 data[1]->dot(data[12]); 00184 } 00185 t2 = (float) clock(); 00186 ti = (t2 - t1) / (float) CPS; 00187 printf("Baseline 1a: %d, %d x %d optimized cached dot()s in %1.1f s %1.1f/s-> ~%1.2f mflops\n", 00188 500 * NTT / 2, SIZE, SIZE, ti, 500 * NTT / (2.0 * ti), 00189 SIZE * SIZE * 4 * 500.0 * NTT / (1000000.0 * ti)); 00190 00191 t1 = (float) clock(); 00192 for (int j = 0; j < 500; j++) { 00193 for (int i = 0; i < NTT / 2; i++) { 00194 Dict d; 00195 d["keepzero"] = 1; 00196 data[i]->cmp("sqeuclidean", data[i + NTT / 2], d); 00197 } 00198 } 00199 t2 = (float) clock(); 00200 ti = (t2 - t1) / (float) CPS; 00201 printf("Baseline 2: %d, %d x %d lcmp()s in %1.1f sec %1.1f/sec\n", 500 * NTT / 2, SIZE, 00202 SIZE, ti, 500 * NTT / (2.0 * ti)); 00203 00204 t1 = (float) clock(); 00205 for (int j = 0; j < 100; j++) { 00206 for (int i = 0; i < NTT / 2; i++) { 00207 Dict params; 00208 data[i]->cmp("phase", data[i + NTT / 2], params); 00209 } 00210 } 00211 t2 = (float) clock(); 00212 ti = (t2 - t1) / (float) CPS; 00213 printf("Baseline 3: %d, %d x %d pcmp()s in %1.1f sec %1.1f/sec\n", 100 * NTT / 2, SIZE, 00214 SIZE, ti, 100 * NTT / (2.0 * ti)); 00215 00216 t1 = (float) clock(); 00217 for (int j = 0; j < 100; j++) { 00218 for (int i = 0; i < NTT / 2; i++) { 00219 Dict params; 00220 data[i]->cmp("frc", data[i + NTT / 2], params); 00221 } 00222 } 00223 t2 = (float) clock(); 00224 ti = (t2 - t1) / (float) CPS; 00225 printf("Baseline 4: %d, %d x %d fscmp()s in %1.1f sec %1.1f/sec\n", 100 * NTT / 2, SIZE, 00226 SIZE, ti, 100 * NTT / (2.0 * ti)); 00227 00228 t1 = (float) clock(); 00229 for (int j = 0; j < 500; j++) { 00230 for (int i = 0; i < NTT / 2; i++) 00231 data[i]->process_inplace("math.absvalue"); 00232 } 00233 t2 = (float) clock(); 00234 ti = (t2 - t1) / (float) CPS; 00235 printf("Baseline 5a: %d, %d x %d fabs in %1.1f sec -> ~%1.2f mfabs/sec\n", 500 * NTT / 2, 00236 SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti)); 00237 00238 t1 = (float) clock(); 00239 for (int j = 0; j < 100; j++) { 00240 for (int i = 0; i < NTT / 2; i++) 00241 data[i]->process_inplace("math.sqrt"); 00242 } 00243 t2 = (float) clock(); 00244 ti = (t2 - t1) / (float) CPS; 00245 printf("Baseline 5b: %d, %d x %d sqrts in %1.1f sec -> ~%1.2f msqrt/sec\n", 100 * NTT / 2, 00246 SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti)); 00247 00248 d = data[0]->get_data(); 00249 t1 = (float) clock(); 00250 for (int j = 0; j < 100; j++) { 00251 for (int i = 0; i < NTT / 2; i++) { 00252 for (int k = 0; k < SIZE * SIZE; k++) 00253 (void)sqrt(d[k]); 00254 } 00255 } 00256 t2 = (float) clock(); 00257 ti = (t2 - t1) / (float) CPS; 00258 printf("Baseline 5c: %d, %d x %d sqrts in %1.1f sec -> ~%1.2f msqrt/sec (cached)\n", 00259 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti)); 00260 data[0]->update(); 00261 00262 d = data[0]->get_data(); 00263 t1 = (float) clock(); 00264 for (int j = 0; j < 100; j++) { 00265 for (int i = 0; i < NTT / 2; i++) { 00266 for (int k = 0; k < SIZE * SIZE; k++) 00267 (void)cos(d[k]); 00268 } 00269 } 00270 t2 = (float) clock(); 00271 ti = (t2 - t1) / (float) CPS; 00272 printf("Baseline 5d: %d, %d x %d cos in %1.1f sec -> ~%1.2f mcos/sec (cached)\n", 00273 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti)); 00274 data[0]->update(); 00275 00276 d = data[0]->get_data(); 00277 t1 = (float) clock(); 00278 for (int j = 0; j < 100; j++) { 00279 for (int i = 0; i < NTT / 2; i++) { 00280 for (int k = 0; k < SIZE * SIZE - 1; k++) 00281 (void)hypot(d[k], d[k + 1]); 00282 } 00283 } 00284 t2 = (float) clock(); 00285 ti = (t2 - t1) / (float) CPS; 00286 printf("Baseline 5e: %d, %d x %d hypot in %1.1f sec -> ~%1.2f mhypot/sec (cached)\n", 00287 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti)); 00288 data[0]->update(); 00289 00290 d = data[0]->get_data(); 00291 t1 = (float) clock(); 00292 for (int j = 0; j < 1000; j++) { 00293 for (int i = 0; i < NTT / 2; i++) { 00294 for (int k = 0; k < SIZE * SIZE - 1; k++) { 00295 float f = d[k] * d[k + 1]; 00296 f = f + f; 00297 } 00298 } 00299 } 00300 00301 t2 = (float) clock(); 00302 ti = (t2 - t1) / (float) CPS; 00303 printf("Baseline 5f: %d, %d x %d mult in %1.1f sec -> ~%1.2f mmult/sec (cached)\n", 00304 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 1000.0 * NTT / (1000000.0 * ti)); 00305 data[0]->update(); 00306 00307 d = data[0]->get_data(); 00308 t1 = (float) clock(); 00309 for (int j = 0; j < 500; j++) { 00310 for (int i = 0; i < NTT / 2; i++) { 00311 for (int k = 0; k < SIZE * SIZE - 1; k++) { 00312 float a = d[k] / d[k + 1]; 00313 a = a + a; 00314 } 00315 } 00316 } 00317 t2 = (float) clock(); 00318 ti = (t2 - t1) / (float) CPS; 00319 printf("Baseline 5g: %d, %d x %d div in %1.1f sec -> ~%1.2f mdiv/sec (cached)\n", 00320 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti)); 00321 data[0]->update(); 00322 00323 d = data[0]->get_data(); 00324 t1 = (float) clock(); 00325 for (int j = 0; j < 500; j++) { 00326 for (int i = 0; i < NTT / 2; i++) { 00327 for (int k = 0; k < SIZE * SIZE - 1; k++) { 00328 float f = fabs(d[k]); 00329 f = f + f; 00330 } 00331 } 00332 } 00333 t2 = (float) clock(); 00334 ti = (t2 - t1) / (float) CPS; 00335 printf("Baseline 5h: %d, %d x %d fabs in %1.1f sec -> ~%1.2f fabs/sec (cached)\n", 00336 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti)); 00337 data[0]->update(); 00338 00339 d = data[0]->get_data(); 00340 t1 = (float) clock(); 00341 for (int j = 0; j < 500; j++) { 00342 for (int i = 0; i < NTT / 2; i++) { 00343 for (int k = 0; k < SIZE * SIZE - 1; k++) { 00344 (void)atan2(d[k], d[k + 1]); 00345 (void)hypot(d[k], d[k + 1]); 00346 } 00347 } 00348 } 00349 t2 = (float) clock(); 00350 ti = (t2 - t1) / (float) CPS; 00351 printf("Baseline 5i: %d, %d x %d ri2ap in %1.1f sec -> ~%1.2f ri2ap/sec (cached)\n", 00352 100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti)); 00353 00354 data[0]->update(); 00355 t1 = (float) clock(); 00356 00357 for (int i = 0; i < NTT * 100; i++) { 00358 EMData *cp = data[i % NTT]->copy(); 00359 Dict d("n",2); 00360 cp->process_inplace("math.meanshrink",d); 00361 if( cp ) 00362 { 00363 delete cp; 00364 cp = 0; 00365 } 00366 } 00367 t2 = (float) clock(); 00368 ti = (t2 - t1) / (float) CPS; 00369 printf("Baseline 6: %1.1f sec %f meanshrink x 2/sec\n", ti, NTT * 100.0 / ti); 00370 00371 EMData *d1a = data[0]->copy(); 00372 t1 = (float) clock(); 00373 00374 for (int i = 0; i < NTT * 1000; i++) { 00375 d1a->do_fft(); 00376 d1a->update(); 00377 } 00378 t2 = (float) clock(); 00379 ti = (t2 - t1) / (float) CPS; 00380 printf("Baseline 7: %1.1f sec %f ffts/sec\n", ti, NTT * 1000 / ti); 00381 00382 d1a = d1a->copy(); 00383 t1 = (float) clock(); 00384 00385 for (int i = 0; i < NTT * 1000; i++) { 00386 d1a->translate(-1, -3, 0); 00387 } 00388 t2 = (float) clock(); 00389 ti = (t2 - t1) / (float) CPS; 00390 printf("Baseline 8: %1.1f sec %f translates/sec\n", ti, NTT * 1000 / ti); 00391 00392 return 0; 00393 } 00394 00395 00396 EMData *tmp = 0; 00397 float t1 = (float) clock(); 00398 for (int i = 0; i < 3; i++) { 00399 //#ifdef EMAN2_USING_CUDA 00400 // if (gpu) { 00401 // cout << "Using gpu" << endl; 00402 // data[i]->set_gpu_rw_current(); 00403 // data[i]->cuda_lock(); 00404 // } 00405 //#endif 00406 for (int j = 5; j < NTT; j++) { 00407 if (slow == 3) { 00408 tmp = data[i]->align("rtf_best", data[j], 00409 Dict("flip", (EMData*)0,"maxshift", SIZE/8)); 00410 } 00411 else if (slow == 1) { 00412 Dict d; 00413 d["flip"] = (EMData*)0; 00414 d["maxshift"] = SIZE/8; 00415 tmp = data[i]->align("rtf_slow", data[j], d); 00416 } 00417 else if (newali == 1) { 00418 tmp = data[i]->align("rotate_translate_flip", data[j], Dict()); 00419 EMData* tmp2 = tmp->align("refine", data[j], Dict()); 00420 delete tmp2; 00421 tmp2 = 0; 00422 } 00423 else { 00424 //#ifdef EMAN2_USING_CUDA 00425 // if (gpu) { 00426 // data[j]->set_gpu_rw_current(); 00427 // } 00428 //#endif 00429 tmp = data[i]->align("rotate_translate_flip", data[j], Dict()); 00430 } 00431 if( tmp ) 00432 { 00433 delete tmp; 00434 tmp = 0; 00435 } 00436 if (j % 10 == 0) { 00437 putchar('.'); 00438 fflush(stdout); 00439 } 00440 } 00441 //#ifdef EMAN2_USING_CUDA 00442 // if (gpu) { 00443 // data[i]->cuda_unlock(); 00444 // } 00445 //#endif 00446 putchar('\n'); 00447 } 00448 float t2 = (float) clock(); 00449 00450 float ti = (t2 - t1) / (float) CPS; 00451 if (slow == 2) 00452 ti *= 10.0; 00453 if (!big && !slow && !newali) { 00454 printf("\nFor comparison (note these numbers may change from release to release)\n"); 00455 printf("An AMD Athlon (32 bit) 900Mhz SF -------------------------------- 360\n"); 00456 printf("An AMD Athlon XP 2400+ (32 bit) 2.0Ghz SF ----------------------- 1010\n"); 00457 printf("An AMD Athlon XP 2600+ (32 bit) 2.0Ghz SF ----------------------- 1090\n"); 00458 printf("An AMD Athlon 64 3700+ 2.2Ghz SF -------------------------------- 1530\n"); 00459 printf("An AMD Athlon 64 X2 3800+ 2.0Ghz SF ----------------------------- 1578\n"); 00460 printf("An AMD Athlon 64 FX-51 2.2Ghz SF -------------------------------- 1760\n"); 00461 printf("An AMD Opteron 248 2.2Ghz SF ------------------------------------ 1870\n"); 00462 printf("An Intel Core2 T7200 2.0Ghz SF ---------------------------------- 1990\n"); 00463 printf("An Intel Xeon E5335 2.0Ghz SF ----------------------------------- 2010\n"); 00464 printf("An AMD Opteron 280 2.4Ghz SF ------------------------------------ 2130\n"); 00465 printf("An Intel Core2 6700 2.66Ghz SF ---------------------------------- 2600\n"); 00466 printf("An Intel Core2 Duo T9400 2.53Ghz SF ----------------------------- 2730\n"); 00467 printf("An Intel Xeon E5430 2.66Ghz SF ---------------------------------- 2800\n"); 00468 printf("An Intel Xeon X5355 2.66Ghz SF ---------------------------------- 2920\n"); 00469 printf("An Intel Xeon X5450 3.0Ghz SF ----------------------------------- 3220\n"); 00470 printf("An Intel Xeon X5460 3.16Ghz SF ---------------------------------- 3320\n"); 00471 printf("\nYour machines speed factor = %1.1f\n", 25000.0 / ti); 00472 printf("\n\nThis repesents %1.2f RTFAligns/sec\n", 00473 3.0 * ((slow == 2 ? NTT / 10 : NTT) - 5) / ti); 00474 } 00475 else if (big && !slow) { 00476 printf("\nYour machines speed factor = %1.1f\n", 72000.0 / ti); 00477 } 00478 else { 00479 printf("\nYour machines speed factor on this test = %1.1f\n", 25000.0 / ti); 00480 } 00481 return 0; 00482 }
|