00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifdef EMAN2_USING_CUDA
00035
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include "cuda/cuda_util.h"
00042 #include "cuda/cuda_processor.h"
00043 #include "cuda/cuda_emfft.h"
00044
00045 using namespace EMAN;
00046
00047 EMData::CudaCache EMData::cuda_cache(100);
00048
00049 float* EMData::get_cuda_data() const {
00050 cout << cuda_cache_handle << endl;
00051 if (get_size() == 0 ) throw UnexpectedBehaviorException("The size of the data is 0?");
00052 if (cuda_cache_handle==-1 || EMDATA_GPU_NEEDS_UPDATE & flags) {
00053 cout << "needs an update or handle=-1" << endl;
00054 if (cuda_cache_handle != -1 && gpu_ro_is_current() ) {
00055 cuda_cache.copy_ro_to_rw(cuda_cache_handle);
00056 } else {
00057 if (cuda_cache_handle !=-1 ) {
00058 cuda_cache.clear_item(cuda_cache_handle);
00059 }
00060 cuda_cache_handle = cuda_cache.cache_rw_data(this,rdata,nx,ny,nz);
00061 if (cuda_cache_handle == -1) throw;
00062 }
00063 flags &= ~EMDATA_GPU_NEEDS_UPDATE;
00064 }
00065 return cuda_cache.get_rw_data(cuda_cache_handle);
00066 }
00067
00068 bool EMData::gpu_rw_is_current() const {
00069 if (cuda_cache_handle !=-1 && !(EMDATA_GPU_NEEDS_UPDATE & flags)) return cuda_cache.has_rw_data(cuda_cache_handle);
00070 else return false;
00071 }
00072
00073 bool EMData::cpu_rw_is_current() const {
00074 if (!(EMDATA_CPU_NEEDS_UPDATE & flags) && rdata != 0) return true;
00075 return false;
00076 }
00077
00078 bool EMData::gpu_ro_is_current() const {
00079 if (cuda_cache_handle !=-1 && !(EMDATA_GPU_RO_NEEDS_UPDATE & flags)) return cuda_cache.has_ro_data(cuda_cache_handle);
00080 else return false;
00081 }
00082
00083 void EMData::bind_cuda_texture(const bool interp_mode) const {
00084 check_cuda_array_update();
00085 cuda_cache.lock(cuda_cache_handle);
00086 bind_cuda_array_to_texture(cuda_cache.get_ro_data(cuda_cache_handle),cuda_cache.get_ndim(cuda_cache_handle),interp_mode);
00087 }
00088
00089 void EMData::unbind_cuda_texture() const {
00090 ::unbind_cuda_texture(cuda_cache.get_ndim(cuda_cache_handle));
00091 cuda_cache.unlock(cuda_cache_handle);
00092 }
00093
00094 cudaArray* EMData::get_cuda_array() const {
00095 if (get_size() == 0 ) throw UnexpectedBehaviorException("The size of the data is 0?");
00096 check_cuda_array_update();
00097 return cuda_cache.get_ro_data(cuda_cache_handle);
00098 }
00099
00100 void EMData::check_cuda_array_update() const {
00101 if (cuda_cache_handle==-1 || EMDATA_GPU_RO_NEEDS_UPDATE & flags) {
00102 if (cuda_cache_handle !=- 1 && gpu_rw_is_current() ) {
00103 cuda_cache.copy_rw_to_ro(cuda_cache_handle);
00104 } else {
00105 if (cuda_cache_handle !=-1 ) cuda_cache.clear_item(cuda_cache_handle);
00106 cuda_cache_handle = cuda_cache.cache_ro_data(this,rdata,nx,ny,nz);
00107 if (cuda_cache_handle >=50 ) throw InvalidValueException(cuda_cache_handle,"In get cuda data, the handle is strange");
00108 if (cuda_cache_handle == -1) throw;
00109 }
00110 flags &= ~EMDATA_GPU_RO_NEEDS_UPDATE;
00111 }
00112 }
00113
00114 void EMData::cuda_cache_lost_imminently() const {
00115
00116 get_data();
00117 flags |= EMDATA_GPU_NEEDS_UPDATE| EMDATA_GPU_RO_NEEDS_UPDATE;
00118 cuda_cache_handle = -1;
00119 }
00120 void EMData::cuda_lock() const {
00121 if (cuda_cache_handle == -1) throw UnexpectedBehaviorException("No cuda handle, can't lock");
00122 cuda_cache.lock(cuda_cache_handle);
00123
00124 }
00125 void EMData::cuda_unlock() const {
00126
00127
00128 if (cuda_cache_handle == -1) throw UnexpectedBehaviorException("No cuda handle, can't lock");
00129 cuda_cache.unlock(cuda_cache_handle);
00130 }
00131 EMDataForCuda EMData::get_data_struct_for_cuda() const {
00132 EMDataForCuda tmp = {get_cuda_data(),nx,ny,nz};
00133 return tmp;
00134 }
00135
00136 bool EMData::gpu_operation_preferred() const {
00137 bool cpu = cpu_rw_is_current();
00138 bool gpu = gpu_rw_is_current();
00139 if ( cpu==0 && gpu==0 ) {
00140
00141 return false;
00142
00143
00144
00145
00146
00147
00148 }
00149 if (gpu) return true;
00150 return false;
00151 }
00152
00153 EMData* EMData::calc_ccf_cuda( EMData* image, bool use_texturing,bool center ) const {
00154 EMData* tmp;
00155 if (is_complex()) {
00156
00157 tmp = new EMData(*this);
00158 } else {
00159
00160 tmp = do_fft_cuda();
00161 }
00162
00163 Dict d;
00164 EMData* with = 0;
00165 if (image == this) {
00166 d["with"] = (EMData*) tmp;
00167 } else {
00168 if (!image->is_complex()) {
00169 int wnx = image->get_xsize(); int wny = image->get_ysize(); int wnz = image->get_zsize();
00170 if ( wnx != nx || wny != ny || wnz != nz ) {
00171
00172 Region r;
00173 if (nz > 1) {
00174 r = Region((wnx-nx)/2, (wny-ny)/2, (wnz-nz)/2,nx,ny,nz);
00175 }
00176 else if (ny > 1) {
00177 r = Region((wnx-nx)/2, (wny-ny)/2,nx,ny);
00178 }
00179 else throw UnexpectedBehaviorException("Calc_ccf_cuda doesn't work on 1D images");
00180 EMData* tmp = image->get_clip(r);
00181 with = tmp->do_fft_cuda();
00182 delete tmp;
00183 }else {
00184 with = image->do_fft_cuda();
00185 }
00186 d["with"] = (EMData*) with;
00187 } else {
00188
00189 d["with"] = (EMData*)image;
00190 }
00191 }
00192
00193
00194 EMDataForCuda left = tmp->get_data_struct_for_cuda();
00195 CudaDataLock lock(tmp);
00196 if (use_texturing) {
00197 ((EMData*)d["with"])->bind_cuda_texture(false);
00198 emdata_processor_correlation_texture(&left,center);
00199 ((EMData*)d["with"])->unbind_cuda_texture();
00200 } else {
00201 EMDataForCuda right = ((EMData*)d["with"])->get_data_struct_for_cuda();
00202 CudaDataLock lock2((EMData*)d["with"]);
00203 emdata_processor_correlation(&left,&right,center);
00204 }
00205 tmp->gpu_update();
00206
00207
00208
00209 if (with != 0 && image != this) {
00210 delete with;
00211 with = 0;
00212 }
00213
00214 EMData* soln = tmp->do_ift_cuda(false);
00215 soln->gpu_update();
00216 delete tmp;
00217 tmp = 0;
00218
00219 return soln;
00220 }
00221
00222 EMData *EMData::make_rotational_footprint_cuda( bool unwrap)
00223 {
00224 ENTERFUNC;
00225
00226
00227
00228 float edge_mean = 0;
00229 CudaDataLock(this);
00230 if ( rot_fp != 0 && unwrap == true) {
00231 return new EMData(*rot_fp);
00232 }
00233
00234
00235
00236
00238
00242
00243 int cs = (((nx * 7 / 4) & 0xfffff8) - nx) / 2;
00244
00245 static EMData big_clip;
00246 int big_x = nx+2*cs;
00247 int big_y = ny+2*cs;
00248 int big_z = 1;
00249 if ( nz != 1 ) {
00250 big_z = nz+2*cs;
00251 }
00252
00253
00254 if ( big_clip.get_xsize() != big_x || big_clip.get_ysize() != big_y || big_clip.get_zsize() != big_z ) {
00255 big_clip.set_size_cuda(big_x,big_y,big_z);
00256 big_clip.get_cuda_data();
00257 big_clip.cuda_lock();
00258 }
00259 big_clip.to_value(edge_mean);
00260
00261 if (nz != 1) {
00262 big_clip.insert_clip(this,IntPoint(cs,cs,cs));
00263 } else {
00264 big_clip.insert_clip(this,IntPoint(cs,cs,0));
00265 }
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277 EMData *mc = big_clip.calc_ccf_cuda(&big_clip,false,true);
00278 mc->sub(mc->get_edge_mean());
00279
00280 static EMData sml_clip;
00281 int sml_x = nx * 3 / 2;
00282 int sml_y = ny * 3 / 2;
00283 int sml_z = 1;
00284 if ( nz != 1 ) {
00285 sml_z = nz * 3 / 2;
00286 }
00287
00288 if ( sml_clip.get_xsize() != sml_x || sml_clip.get_ysize() != sml_y || sml_clip.get_zsize() != sml_z ) {
00289 sml_clip.set_size_cuda(sml_x,sml_y,sml_z);
00290 sml_clip.get_cuda_data();
00291 sml_clip.cuda_lock();
00292 }
00293 if (nz != 1) {
00294 sml_clip.insert_clip(mc,IntPoint(-cs+nx/4,-cs+ny/4,-cs+nz/4));
00295 } else {
00296 sml_clip.insert_clip(mc,IntPoint(-cs+nx/4,-cs+ny/4,0));
00297 }
00298
00299 delete mc; mc = 0;
00300 EMData * result = NULL;
00301
00302 if (!unwrap || nz != 1) {
00303
00304 result = new EMData(sml_clip);
00305 }
00306 else {
00307 result = sml_clip.unwrap();
00308 }
00309
00310 result->gpu_update();
00311
00312 if ( unwrap == true)
00313 {
00314
00315
00316
00317
00318
00319
00320
00321 rot_fp = result;
00322 return new EMData(*rot_fp);
00323 }
00324 else return result;
00325 }
00326
00327 EMData* EMData::calc_ccfx_cuda( EMData * const with, int y0, int y1, bool no_sum)
00328 {
00329 ENTERFUNC;
00330
00331 if (!with) {
00332 LOGERR("NULL 'with' image. ");
00333 throw NullPointerException("NULL input image");
00334 }
00335
00336 if (!EMUtil::is_same_size(this, with)) {
00337 LOGERR("images not same size: (%d,%d,%d) != (%d,%d,%d)",
00338 nx, ny, nz,
00339 with->get_xsize(), with->get_ysize(), with->get_zsize());
00340 throw ImageFormatException("images not same size");
00341 }
00342 if (get_ndim() > 2) {
00343 LOGERR("2D images only");
00344 throw ImageDimensionException("2D images only");
00345 }
00346
00347 if (y1 <= y0) {
00348 y1 = ny;
00349 }
00350
00351 if (y0 >= y1) {
00352 y0 = 0;
00353 }
00354
00355 if (y0 < 0) {
00356 y0 = 0;
00357 }
00358
00359 if (y1 > ny) {
00360 y1 = ny;
00361 }
00362
00363 static int nx_device_fft = 0;
00364 static int ny_defice_fft = 0;
00365 static EMData f1;
00366 static EMData f2;
00367 static EMData rslt;
00368
00369 int height = y1-y0;
00370 int width = (nx+2-(nx%2));
00371 if (width != nx_device_fft || height != ny_defice_fft ) {
00372 f1.set_size_cuda(width,height);
00373 f2.set_size_cuda(width,height);
00374 rslt.set_size_cuda(nx,height);
00375 nx_device_fft = width;
00376 ny_defice_fft = height;
00377 }
00378
00379 {
00380 float * cd = get_cuda_data();
00381 CudaDataLock lock(this);
00382 float * f1cd = f1.get_cuda_data();
00383 CudaDataLock lock2(&f1);
00384 cuda_dd_fft_real_to_complex_1d(cd,f1cd,nx,height);
00385 }
00386 {
00387 float * wcd = with->get_cuda_data();
00388 CudaDataLock lock(this);
00389 float * f2cd = f2.get_cuda_data();
00390 CudaDataLock lock2(&f2);
00391 cuda_dd_fft_real_to_complex_1d(wcd,f2cd,nx,height);
00392 }
00393
00394 EMDataForCuda left = f1.get_data_struct_for_cuda();
00395 CudaDataLock lock(&f1);
00396
00397 bool use_texturing = false;
00398 bool center = false;
00399 if (use_texturing) {
00400 f2.bind_cuda_texture(false);
00401 emdata_processor_correlation_texture(&left,center);
00402 f2.unbind_cuda_texture();
00403 } else {
00404 EMDataForCuda right = f2.get_data_struct_for_cuda();
00405 CudaDataLock lock2(&f2);
00406 emdata_processor_correlation(&left,&right,center);
00407 }
00408
00409 {
00410 float* rcd = rslt.get_cuda_data();
00411 CudaDataLock rlock(&rslt);
00412 float * f1cd = f1.get_cuda_data();
00413 CudaDataLock lock2(&f1);
00414 cuda_dd_fft_complex_to_real_1d(f1cd,rcd,nx,height);
00415 }
00416
00417 if (no_sum) {
00418 rslt.gpu_update();
00419 EXITFUNC;
00420 return new EMData(rslt);
00421 }
00422 else {
00423 EXITFUNC;
00424 return rslt.column_sum_cuda();
00425 }
00426
00427 }
00428
00429 EMData* EMData::column_sum_cuda() const {
00430 ENTERFUNC;
00431 if (get_ndim() != 2) throw ImageDimensionException("Column sum cuda has been prgogrammed work exclusively with 2D data.");
00432 EMData *cf = new EMData();
00433 cf->set_size_cuda(nx, 1, 1);
00434 EMDataForCuda left = cf->get_data_struct_for_cuda();
00435 CudaDataLock llock(cf);
00436 bind_cuda_texture(false);
00437 emdata_column_sum(&left,ny);
00438 unbind_cuda_texture();
00439 cf->gpu_update();
00440 EXITFUNC;
00441 return cf;
00442 }
00443
00444 void EMData::set_gpu_rw_data(float* data, const int x, const int y, const int z) {
00445 nx = x; ny = y; nz = z;
00446 nxy = nx*ny;
00447 nxyz = nx*ny*nz;
00448 if (cuda_cache_handle!=-1) {
00449 cuda_cache.replace_gpu_rw(cuda_cache_handle,data);
00450 } else {
00451 cuda_cache_handle = cuda_cache.store_rw_data(this,data);
00452 }
00453 gpu_update();
00454 }
00455
00456 void EMData::free_cuda_memory() const {
00457
00458 if (cuda_cache_handle!=-1) {
00459 cuda_cache.clear_item(cuda_cache_handle);
00460 cuda_cache_handle = -1;
00461 }
00462 }
00463
00465 void EMData::copy_gpu_rw_to_cpu() {
00466 get_data();
00467 }
00468
00469 void EMData::copy_cpu_to_gpu_rw() {
00470 get_cuda_data();
00471 }
00472
00473 void EMData::copy_cpu_to_gpu_ro() {
00474 get_cuda_array();
00475 }
00476
00477 void EMData::copy_gpu_rw_to_gpu_ro() {
00478 cuda_cache.copy_rw_to_ro(cuda_cache_handle);
00479 }
00480
00481 void EMData::copy_gpu_ro_to_gpu_rw() const {
00482 cuda_cache.copy_ro_to_rw(cuda_cache_handle);
00483 }
00484
00485 void EMData::copy_gpu_ro_to_cpu() const {
00486 cuda_cache.copy_ro_to_cpu(cuda_cache_handle,rdata);
00487 }
00488
00489
00490 EMData::CudaCache::CudaCache(const int size) : cache_size(size), current_insert_idx(0), mem_allocated(0), locked(size,0)
00491 {
00492 device_init();
00493 rw_cache = new float *[cache_size];
00494 caller_cache = new const EMData*[cache_size];
00495 ro_cache = new cudaArray *[cache_size];
00496
00497 for(int i = 0; i < cache_size; ++ i ) {
00498 rw_cache[i] = 0;
00499 caller_cache[i] = 0;
00500 ro_cache[i] = 0;
00501 }
00502 }
00503
00504 EMData::CudaCache::~CudaCache()
00505 {
00506 for (int i = 0; i < cache_size; i++) {
00507 clear_item(i);
00508 }
00509
00510 if( rw_cache )
00511 {
00512 delete[]rw_cache;
00513 rw_cache = 0;
00514 }
00515
00516
00517 if( caller_cache )
00518 {
00519 delete[]caller_cache;
00520 caller_cache = 0;
00521 }
00522
00523 cleanup_cuda_fft_dd_plan_cache();
00524 }
00525
00526 void EMData::CudaCache::lock(const int idx) {
00527 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00528 locked[idx] += 1;
00529
00530 }
00531 void EMData::CudaCache::unlock(const int idx) {
00532 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00533 if (locked[idx] == 0) {
00534
00535 return;
00536
00537
00538 }
00539 locked[idx] -=1;
00540 }
00541
00542 int EMData::CudaCache::cache_rw_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz)
00543 {
00544 ensure_slot_space();
00545
00546 float* cuda_rw_data = alloc_rw_data(nx,ny,nz);
00547
00548 if (data != 0 ) {
00549 size_t num_bytes = nx*ny*nz*sizeof(float);
00550 cudaError_t error = cudaMemcpy(cuda_rw_data,data,num_bytes,cudaMemcpyHostToDevice);
00551 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (host to device) error:" + string(cudaGetErrorString(error)));
00552 }
00553
00554 return blind_store_rw_data(emdata,cuda_rw_data);
00555 }
00556
00557 int EMData::CudaCache::blind_store_rw_data(const EMData* const emdata, float* cuda_rw_data)
00558 {
00559
00560 rw_cache[current_insert_idx] = cuda_rw_data;
00561 caller_cache[current_insert_idx] = emdata;
00562 ro_cache[current_insert_idx] = 0;
00563
00564 int ret = current_insert_idx;
00565 current_insert_idx += 1;
00566 current_insert_idx %= cache_size;
00567 if ( current_insert_idx > cache_size ) throw;
00568
00569 return ret;
00570 }
00571
00572 int EMData::CudaCache::store_rw_data(const EMData* const emdata, float* cuda_rw_data)
00573 {
00574 ensure_slot_space();
00575
00576 int nx = emdata->get_xsize();
00577 int ny = emdata->get_ysize();
00578 int nz = emdata->get_zsize();
00579 size_t num_bytes = nx*ny*nz*sizeof(float);
00580 mem_allocated += num_bytes;
00581
00582 return blind_store_rw_data(emdata, cuda_rw_data);
00583 }
00584
00585 void EMData::CudaCache::debug_print() const {
00586 cout << "Cuda device cache debug. Total mem allocated: " << static_cast<float>(mem_allocated)/1000000.0 << "MB" << endl;
00587 for(int i = 0; i < cache_size; ++i) {
00588 int handle = -1;
00589 int nx = 0;
00590 int ny = 0;
00591 int nz = 0;
00592 if (caller_cache[i] != 0) {
00593 handle = caller_cache[i]->cuda_cache_handle;
00594 nx = caller_cache[i]->get_xsize();
00595 ny = caller_cache[i]->get_ysize();
00596 nz = caller_cache[i]->get_zsize();
00597 }
00598 cout << i << ": " << handle << " " << caller_cache[i] << " dims: " << nx << " " << ny << " " << nz << " locked: " << locked[i] << " rw " << rw_cache[i] << " ro " << ro_cache[i] << endl;
00599
00600 }
00601 }
00602
00603 void EMData::CudaCache::replace_gpu_rw(const int idx,float* cuda_rw_data)
00604 {
00605
00606 if ( rw_cache[idx] != 0) {
00607 mem_allocated -= get_emdata_bytes(idx);
00608 cudaError_t error = cudaFree(rw_cache[idx]);
00609 if ( error != cudaSuccess)
00610 throw UnexpectedBehaviorException( "CudaFree error : " + string(cudaGetErrorString(error)));
00611 }
00612 rw_cache[idx] = 0;
00613
00614 const EMData* d = caller_cache[idx];
00615 int nx = d->get_xsize();
00616 int ny = d->get_ysize();
00617 int nz = d->get_zsize();
00618 size_t num_bytes = nx*ny*nz*sizeof(float);
00619 mem_allocated += num_bytes;
00620
00621 rw_cache[idx] = cuda_rw_data;
00622 }
00623
00624 void EMData::CudaCache::ensure_slot_space() {
00625
00626 int checked_entries = 0;
00627 while ( checked_entries < cache_size) {
00628 const EMData* previous = caller_cache[current_insert_idx];
00629 if (previous != 0 ) {
00630 if ( locked[current_insert_idx] == 0 ) {
00631
00632 previous->cuda_cache_lost_imminently();
00633
00634 clear_item(current_insert_idx);
00635 break;
00636 } else {
00637
00638 current_insert_idx++;
00639 current_insert_idx %= cache_size;
00640
00641 checked_entries++;
00642 }
00643 } else break;
00644 }
00645
00646 if (checked_entries == cache_size) {
00647 throw UnexpectedBehaviorException("All of the data objects in the cuda cache are locked! There is no space.");
00648 }
00649 }
00650
00651 float* EMData::CudaCache::alloc_rw_data(const int nx, const int ny, const int nz) {
00652 float* cuda_rw_data;
00653 size_t num_bytes = nx*ny*nz*sizeof(float);
00654
00655 cudaError_t error = cudaMalloc((void**)&cuda_rw_data,num_bytes);
00656 if ( error != cudaSuccess) {
00657 debug_print();
00658 throw BadAllocException( "cudaMalloc error :" + string(cudaGetErrorString(error)));
00659 }
00660
00661
00662
00663
00664
00665
00666
00667
00668 mem_allocated += num_bytes;
00669
00670 return cuda_rw_data;
00671
00672 }
00673
00674 int EMData::CudaCache::cache_ro_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz) {
00675 ensure_slot_space();
00676
00677 cudaArray *array = get_cuda_array_host(data,nx,ny,nz);
00678 if (array != 0) {
00679 mem_allocated += nx*ny*nz*sizeof(float);
00680
00681 rw_cache[current_insert_idx] = 0;
00682 caller_cache[current_insert_idx] = emdata;
00683 ro_cache[current_insert_idx] = array;
00684
00685 int ret = current_insert_idx;
00686 current_insert_idx += 1;
00687 current_insert_idx %= cache_size;
00688
00689 return ret;
00690 }
00691 else {
00692 throw BadAllocException("The allocation of the CUDA array failed");
00693 }
00694 }
00695
00696
00697 void EMData::CudaCache::copy_rw_to_ro(const int idx) {
00698
00699 if (rw_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RO CUDA data: RW data is null.");
00700
00701 if (ro_cache[idx] != 0) {
00702 cudaError_t error = cudaFreeArray(ro_cache[idx]);
00703 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray error " + string(cudaGetErrorString(error)));
00704 ro_cache[idx] = 0;
00705 }
00706
00707 const EMData* d = caller_cache[idx];
00708 int nx = d->get_xsize();
00709 int ny = d->get_ysize();
00710 int nz = d->get_zsize();
00711
00712 cudaArray *array = get_cuda_array_device(rw_cache[idx],nx,ny,nz);
00713 if (array == 0) throw BadAllocException("The allocation of the CUDA array failed");
00714 ro_cache[idx] = array;
00715 }
00716
00717 void EMData::CudaCache::copy_ro_to_rw(const int idx) {
00718
00719 if (ro_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RW CUDA data: RO data is null.");
00720
00721 if (rw_cache[idx] != 0) {
00722 cudaError_t error = cudaFree(rw_cache[idx]);
00723 if ( error != cudaSuccess)
00724 throw UnexpectedBehaviorException( "CudaFree error " + string(cudaGetErrorString(error)));
00725 rw_cache[idx] = 0;
00726 }
00727
00728 const EMData* d = caller_cache[idx];
00729 int nx = d->get_xsize();
00730 int ny = d->get_ysize();
00731 int nz = d->get_zsize();
00732 size_t num_bytes = nx*ny*nz*sizeof(float);
00733
00734 float* cuda_rw_data = alloc_rw_data(nx,ny,nz);
00735
00736 if (nz > 1) {
00737 cudaExtent extent;
00738 extent.width = nx;
00739 extent.height = ny;
00740 extent.depth = nz;
00741 cudaMemcpy3DParms copyParams = {0};
00742 copyParams.srcArray = ro_cache[idx];
00743 copyParams.dstPtr = make_cudaPitchedPtr((void*)cuda_rw_data, extent.width*sizeof(float), extent.width, extent.height);
00744 copyParams.extent = extent;
00745 copyParams.kind = cudaMemcpyDeviceToDevice;
00746 cudaError_t error = cudaMemcpy3D(©Params);
00747 if ( error != cudaSuccess)
00748 throw UnexpectedBehaviorException( "Copying device array to device pointer - CudaMemcpy3D error : " + string(cudaGetErrorString(error)));
00749
00750 } else if ( ny > 1 ) {
00751 cudaError_t error = cudaMemcpyFromArray(cuda_rw_data,ro_cache[idx],0,0,num_bytes,cudaMemcpyDeviceToDevice);
00752 if ( error != cudaSuccess)
00753 throw UnexpectedBehaviorException( "Copying device array to device pointer - cudaMemcpyFromArray error : " + string(cudaGetErrorString(error)));
00754 } else throw UnexpectedBehaviorException("Cuda infrastructure has not been designed to work on 1D data");
00755
00756 rw_cache[idx] = cuda_rw_data;
00757 }
00758
00759
00760 void EMData::CudaCache::copy_ro_to_cpu(const int idx,float* data) {
00761 if (ro_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RW CUDA data: RO data is null.");
00762 if (data == 0) throw NullPointerException("The cpu data pointer is NULL in copy_ro_to_cpu");
00763
00764 const EMData* d = caller_cache[idx];
00765 int nx = d->get_xsize();
00766 int ny = d->get_ysize();
00767 int nz = d->get_zsize();
00768 size_t num_bytes = nx*ny*nz*sizeof(float);
00769
00770 if (nz > 1) {
00771 cudaExtent extent;
00772 extent.width = nx;
00773 extent.height = ny;
00774 extent.depth = nz;
00775 cudaMemcpy3DParms copyParams = {0};
00776 copyParams.srcArray = ro_cache[idx];
00777 copyParams.dstPtr = make_cudaPitchedPtr((void*)data, extent.width*sizeof(float), extent.width, extent.height);
00778 copyParams.extent = extent;
00779 copyParams.kind = cudaMemcpyDeviceToHost;
00780 cudaError_t error = cudaMemcpy3D(©Params);
00781 if ( error != cudaSuccess)
00782 throw UnexpectedBehaviorException( "Copying device array to device pointer - CudaMemcpy3D error : " + string(cudaGetErrorString(error)));
00783
00784 } else if ( ny > 1 ) {
00785 cudaError_t error = cudaMemcpyFromArray(data,ro_cache[idx],0,0,num_bytes,cudaMemcpyDeviceToHost);
00786 if ( error != cudaSuccess)
00787 throw UnexpectedBehaviorException( "Copying device array to device pointer - cudaMemcpyFromArray error : " + string(cudaGetErrorString(error)));
00788 } else throw UnexpectedBehaviorException("Cuda infrastructure has not been designed to work on 1D data");
00789
00790 }
00791 void EMData::CudaCache::clear_item(const int idx) {
00792
00793 if ( rw_cache[idx] != 0) {
00794 mem_allocated -= get_emdata_bytes(idx);
00795 cudaError_t error = cudaFree(rw_cache[idx]);
00796 if ( error != cudaSuccess)
00797 throw UnexpectedBehaviorException( "CudaFree error : " + string(cudaGetErrorString(error)));
00798 }
00799 rw_cache[idx] = 0;
00800
00801 if ( ro_cache[idx] != 0) {
00802 mem_allocated -= get_emdata_bytes(idx);
00803 cudaError_t error = cudaFreeArray(ro_cache[idx]);
00804 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray error : " + string(cudaGetErrorString(error)));
00805
00806 }
00807 ro_cache[idx] = 0;
00808
00809 caller_cache[idx] = 0;
00810
00811 locked[idx] = 0;
00812 }
00813
00814
00815 EMData::CudaDataLock::CudaDataLock(const EMData* const emdata) : data_cuda_handle(-1)
00816 {
00817 emdata->set_gpu_rw_current();
00818 data_cuda_handle = emdata->cuda_cache_handle;
00819 EMData::cuda_cache.lock(data_cuda_handle);
00820 }
00821
00822 EMData::CudaDataLock::~CudaDataLock() {
00823 EMData::cuda_cache.unlock(data_cuda_handle);
00824 }
00825
00826
00827 #endif //EMAN2_USING_CUDA