00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifdef EMAN2_USING_CUDA
00035
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include "cuda/cuda_util.h"
00042 #include "cuda/cuda_processor.h"
00043 #include "cuda/cuda_emfft.h"
00044
00045 using namespace EMAN;
00046
00047 EMData::CudaCache EMData::cuda_cache(100);
00048
00049 float* EMData::get_cuda_data() const {
00050 if (get_size() == 0 ) throw UnexpectedBehaviorException("The size of the data is 0?");
00051 if (cuda_cache_handle==-1 || EMDATA_GPU_NEEDS_UPDATE & flags) {
00052 if (cuda_cache_handle != -1 && gpu_ro_is_current() ) {
00053 cuda_cache.copy_ro_to_rw(cuda_cache_handle);
00054 } else {
00055 if (cuda_cache_handle !=-1 ) {
00056 cuda_cache.clear_item(cuda_cache_handle);
00057 }
00058 cuda_cache_handle = cuda_cache.cache_rw_data(this,rdata,nx,ny,nz);
00059 if (cuda_cache_handle == -1) throw;
00060 }
00061 flags &= ~EMDATA_GPU_NEEDS_UPDATE;
00062 }
00063 return cuda_cache.get_rw_data(cuda_cache_handle);
00064 }
00065
00066 bool EMData::gpu_rw_is_current() const {
00067 if (cuda_cache_handle !=-1 && !(EMDATA_GPU_NEEDS_UPDATE & flags)) return cuda_cache.has_rw_data(cuda_cache_handle);
00068 else return false;
00069 }
00070
00071 bool EMData::cpu_rw_is_current() const {
00072 if (!(EMDATA_CPU_NEEDS_UPDATE & flags) && rdata != 0) return true;
00073 return false;
00074 }
00075
00076 bool EMData::gpu_ro_is_current() const {
00077 if (cuda_cache_handle !=-1 && !(EMDATA_GPU_RO_NEEDS_UPDATE & flags)) return cuda_cache.has_ro_data(cuda_cache_handle);
00078 else return false;
00079 }
00080
00081 void EMData::bind_cuda_texture(const bool interp_mode) const {
00082 check_cuda_array_update();
00083 cuda_cache.lock(cuda_cache_handle);
00084 bind_cuda_array_to_texture(cuda_cache.get_ro_data(cuda_cache_handle),cuda_cache.get_ndim(cuda_cache_handle),interp_mode);
00085 }
00086
00087 void EMData::unbind_cuda_texture() const {
00088 ::unbind_cuda_texture(cuda_cache.get_ndim(cuda_cache_handle));
00089 cuda_cache.unlock(cuda_cache_handle);
00090 }
00091
00092 cudaArray* EMData::get_cuda_array() const {
00093 if (get_size() == 0 ) throw UnexpectedBehaviorException("The size of the data is 0?");
00094 check_cuda_array_update();
00095 return cuda_cache.get_ro_data(cuda_cache_handle);
00096 }
00097
00098 void EMData::check_cuda_array_update() const {
00099 if (cuda_cache_handle==-1 || EMDATA_GPU_RO_NEEDS_UPDATE & flags) {
00100 if (cuda_cache_handle !=- 1 && gpu_rw_is_current() ) {
00101 cuda_cache.copy_rw_to_ro(cuda_cache_handle);
00102 } else {
00103 if (cuda_cache_handle !=-1 ) cuda_cache.clear_item(cuda_cache_handle);
00104 cuda_cache_handle = cuda_cache.cache_ro_data(this,rdata,nx,ny,nz);
00105 if (cuda_cache_handle >=50 ) throw InvalidValueException(cuda_cache_handle,"In get cuda data, the handle is strange");
00106 if (cuda_cache_handle == -1) throw;
00107 }
00108 flags &= ~EMDATA_GPU_RO_NEEDS_UPDATE;
00109 }
00110 }
00111
00112 void EMData::cuda_cache_lost_imminently() const {
00113
00114 get_data();
00115 flags |= EMDATA_GPU_NEEDS_UPDATE| EMDATA_GPU_RO_NEEDS_UPDATE;
00116 cuda_cache_handle = -1;
00117 }
00118 void EMData::cuda_lock() const {
00119 if (cuda_cache_handle == -1) throw UnexpectedBehaviorException("No cuda handle, can't lock");
00120 cuda_cache.lock(cuda_cache_handle);
00121
00122 }
00123 void EMData::cuda_unlock() const {
00124
00125
00126 if (cuda_cache_handle == -1) throw UnexpectedBehaviorException("No cuda handle, can't lock");
00127 cuda_cache.unlock(cuda_cache_handle);
00128 }
00129 EMDataForCuda EMData::get_data_struct_for_cuda() const {
00130 EMDataForCuda tmp = {get_cuda_data(),nx,ny,nz};
00131 return tmp;
00132 }
00133
00134 bool EMData::gpu_operation_preferred() const {
00135 bool cpu = cpu_rw_is_current();
00136 bool gpu = gpu_rw_is_current();
00137 if ( cpu==0 && gpu==0 ) {
00138
00139 return false;
00140
00141
00142
00143
00144
00145
00146 }
00147 if (gpu) return true;
00148 return false;
00149 }
00150
00151 EMData* EMData::calc_ccf_cuda( EMData* image, bool use_texturing,bool center ) const {
00152 EMData* tmp;
00153 if (is_complex()) {
00154
00155 tmp = new EMData(*this);
00156 } else {
00157
00158 tmp = do_fft_cuda();
00159 }
00160
00161 Dict d;
00162 EMData* with = 0;
00163 if (image == this) {
00164 d["with"] = (EMData*) tmp;
00165 } else {
00166 if (!image->is_complex()) {
00167 int wnx = image->get_xsize(); int wny = image->get_ysize(); int wnz = image->get_zsize();
00168 if ( wnx != nx || wny != ny || wnz != nz ) {
00169
00170 Region r;
00171 if (nz > 1) {
00172 r = Region((wnx-nx)/2, (wny-ny)/2, (wnz-nz)/2,nx,ny,nz);
00173 }
00174 else if (ny > 1) {
00175 r = Region((wnx-nx)/2, (wny-ny)/2,nx,ny);
00176 }
00177 else throw UnexpectedBehaviorException("Calc_ccf_cuda doesn't work on 1D images");
00178 EMData* tmp = image->get_clip(r);
00179 with = tmp->do_fft_cuda();
00180 delete tmp;
00181 }else {
00182 with = image->do_fft_cuda();
00183 }
00184 d["with"] = (EMData*) with;
00185 } else {
00186
00187 d["with"] = (EMData*)image;
00188 }
00189 }
00190
00191
00192 EMDataForCuda left = tmp->get_data_struct_for_cuda();
00193 CudaDataLock lock(tmp);
00194 if (use_texturing) {
00195 ((EMData*)d["with"])->bind_cuda_texture(false);
00196 emdata_processor_correlation_texture(&left,center);
00197 ((EMData*)d["with"])->unbind_cuda_texture();
00198 } else {
00199 EMDataForCuda right = ((EMData*)d["with"])->get_data_struct_for_cuda();
00200 CudaDataLock lock2((EMData*)d["with"]);
00201 emdata_processor_correlation(&left,&right,center);
00202 }
00203 tmp->gpu_update();
00204
00205
00206
00207 if (with != 0 && image != this) {
00208 delete with;
00209 with = 0;
00210 }
00211
00212 EMData* soln = tmp->do_ift_cuda(false);
00213 soln->gpu_update();
00214 delete tmp;
00215 tmp = 0;
00216
00217 return soln;
00218 }
00219
00220 EMData *EMData::make_rotational_footprint_cuda( bool unwrap)
00221 {
00222 ENTERFUNC;
00223
00224
00225
00226 float edge_mean = 0;
00227 CudaDataLock(this);
00228 if ( rot_fp != 0 && unwrap == true) {
00229 return new EMData(*rot_fp);
00230 }
00231
00232
00233
00234
00236
00240
00241 int cs = (((nx * 7 / 4) & 0xfffff8) - nx) / 2;
00242
00243 static EMData big_clip;
00244 int big_x = nx+2*cs;
00245 int big_y = ny+2*cs;
00246 int big_z = 1;
00247 if ( nz != 1 ) {
00248 big_z = nz+2*cs;
00249 }
00250
00251
00252 if ( big_clip.get_xsize() != big_x || big_clip.get_ysize() != big_y || big_clip.get_zsize() != big_z ) {
00253 big_clip.set_size_cuda(big_x,big_y,big_z);
00254 big_clip.get_cuda_data();
00255 big_clip.cuda_lock();
00256 }
00257 big_clip.to_value(edge_mean);
00258
00259 if (nz != 1) {
00260 big_clip.insert_clip(this,IntPoint(cs,cs,cs));
00261 } else {
00262 big_clip.insert_clip(this,IntPoint(cs,cs,0));
00263 }
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 EMData *mc = big_clip.calc_ccf_cuda(&big_clip,false,true);
00276 mc->sub(mc->get_edge_mean());
00277
00278 static EMData sml_clip;
00279 int sml_x = nx * 3 / 2;
00280 int sml_y = ny * 3 / 2;
00281 int sml_z = 1;
00282 if ( nz != 1 ) {
00283 sml_z = nz * 3 / 2;
00284 }
00285
00286 if ( sml_clip.get_xsize() != sml_x || sml_clip.get_ysize() != sml_y || sml_clip.get_zsize() != sml_z ) {
00287 sml_clip.set_size_cuda(sml_x,sml_y,sml_z);
00288 sml_clip.get_cuda_data();
00289 sml_clip.cuda_lock();
00290 }
00291 if (nz != 1) {
00292 sml_clip.insert_clip(mc,IntPoint(-cs+nx/4,-cs+ny/4,-cs+nz/4));
00293 } else {
00294 sml_clip.insert_clip(mc,IntPoint(-cs+nx/4,-cs+ny/4,0));
00295 }
00296
00297 delete mc; mc = 0;
00298 EMData * result = NULL;
00299
00300 if (!unwrap || nz != 1) {
00301
00302 result = new EMData(sml_clip);
00303 }
00304 else {
00305 result = sml_clip.unwrap();
00306 }
00307
00308 result->gpu_update();
00309
00310 if ( unwrap == true)
00311 {
00312
00313
00314
00315
00316
00317
00318
00319 rot_fp = result;
00320 return new EMData(*rot_fp);
00321 }
00322 else return result;
00323 }
00324
00325 EMData* EMData::calc_ccfx_cuda( EMData * const with, int y0, int y1, bool no_sum)
00326 {
00327 ENTERFUNC;
00328
00329 if (!with) {
00330 LOGERR("NULL 'with' image. ");
00331 throw NullPointerException("NULL input image");
00332 }
00333
00334 if (!EMUtil::is_same_size(this, with)) {
00335 LOGERR("images not same size: (%d,%d,%d) != (%d,%d,%d)",
00336 nx, ny, nz,
00337 with->get_xsize(), with->get_ysize(), with->get_zsize());
00338 throw ImageFormatException("images not same size");
00339 }
00340 if (get_ndim() > 2) {
00341 LOGERR("2D images only");
00342 throw ImageDimensionException("2D images only");
00343 }
00344
00345 if (y1 <= y0) {
00346 y1 = ny;
00347 }
00348
00349 if (y0 >= y1) {
00350 y0 = 0;
00351 }
00352
00353 if (y0 < 0) {
00354 y0 = 0;
00355 }
00356
00357 if (y1 > ny) {
00358 y1 = ny;
00359 }
00360
00361 static int nx_device_fft = 0;
00362 static int ny_defice_fft = 0;
00363 static EMData f1;
00364 static EMData f2;
00365 static EMData rslt;
00366
00367 int height = y1-y0;
00368 int width = (nx+2-(nx%2));
00369 if (width != nx_device_fft || height != ny_defice_fft ) {
00370 f1.set_size_cuda(width,height);
00371 f2.set_size_cuda(width,height);
00372 rslt.set_size_cuda(nx,height);
00373 nx_device_fft = width;
00374 ny_defice_fft = height;
00375 }
00376
00377 {
00378 float * cd = get_cuda_data();
00379 CudaDataLock lock(this);
00380 float * f1cd = f1.get_cuda_data();
00381 CudaDataLock lock2(&f1);
00382 cuda_dd_fft_real_to_complex_1d(cd,f1cd,nx,height);
00383 }
00384 {
00385 float * wcd = with->get_cuda_data();
00386 CudaDataLock lock(this);
00387 float * f2cd = f2.get_cuda_data();
00388 CudaDataLock lock2(&f2);
00389 cuda_dd_fft_real_to_complex_1d(wcd,f2cd,nx,height);
00390 }
00391
00392 EMDataForCuda left = f1.get_data_struct_for_cuda();
00393 CudaDataLock lock(&f1);
00394
00395 bool use_texturing = false;
00396 bool center = false;
00397 if (use_texturing) {
00398 f2.bind_cuda_texture(false);
00399 emdata_processor_correlation_texture(&left,center);
00400 f2.unbind_cuda_texture();
00401 } else {
00402 EMDataForCuda right = f2.get_data_struct_for_cuda();
00403 CudaDataLock lock2(&f2);
00404 emdata_processor_correlation(&left,&right,center);
00405 }
00406
00407 {
00408 float* rcd = rslt.get_cuda_data();
00409 CudaDataLock rlock(&rslt);
00410 float * f1cd = f1.get_cuda_data();
00411 CudaDataLock lock2(&f1);
00412 cuda_dd_fft_complex_to_real_1d(f1cd,rcd,nx,height);
00413 }
00414
00415 if (no_sum) {
00416 rslt.gpu_update();
00417 EXITFUNC;
00418 return new EMData(rslt);
00419 }
00420 else {
00421 EXITFUNC;
00422 return rslt.column_sum_cuda();
00423 }
00424
00425 }
00426
00427 EMData* EMData::column_sum_cuda() const {
00428 ENTERFUNC;
00429 if (get_ndim() != 2) throw ImageDimensionException("Column sum cuda has been prgogrammed work exclusively with 2D data.");
00430 EMData *cf = new EMData();
00431 cf->set_size_cuda(nx, 1, 1);
00432 EMDataForCuda left = cf->get_data_struct_for_cuda();
00433 CudaDataLock llock(cf);
00434 bind_cuda_texture(false);
00435 emdata_column_sum(&left,ny);
00436 unbind_cuda_texture();
00437 cf->gpu_update();
00438 EXITFUNC;
00439 return cf;
00440 }
00441
00442 void EMData::set_gpu_rw_data(float* data, const int x, const int y, const int z) {
00443 nx = x; ny = y; nz = z;
00444 nxy = nx*ny;
00445 nxyz = nx*ny*nz;
00446 if (cuda_cache_handle!=-1) {
00447 cuda_cache.replace_gpu_rw(cuda_cache_handle,data);
00448 } else {
00449 cuda_cache_handle = cuda_cache.store_rw_data(this,data);
00450 }
00451 gpu_update();
00452 }
00453
00454 void EMData::free_cuda_memory() const {
00455
00456 if (cuda_cache_handle!=-1) {
00457 cuda_cache.clear_item(cuda_cache_handle);
00458 cuda_cache_handle = -1;
00459 }
00460 }
00461
00463 void EMData::copy_gpu_rw_to_cpu() {
00464 get_data();
00465 }
00466
00467 void EMData::copy_cpu_to_gpu_rw() {
00468 get_cuda_data();
00469 }
00470
00471 void EMData::copy_cpu_to_gpu_ro() {
00472 get_cuda_array();
00473 }
00474
00475 void EMData::copy_gpu_rw_to_gpu_ro() {
00476 cuda_cache.copy_rw_to_ro(cuda_cache_handle);
00477 }
00478
00479 void EMData::copy_gpu_ro_to_gpu_rw() const {
00480 cuda_cache.copy_ro_to_rw(cuda_cache_handle);
00481 }
00482
00483 void EMData::copy_gpu_ro_to_cpu() const {
00484 cuda_cache.copy_ro_to_cpu(cuda_cache_handle,rdata);
00485 }
00486
00487
00488 EMData::CudaCache::CudaCache(const int size) : cache_size(size), current_insert_idx(0), mem_allocated(0), locked(size,0)
00489 {
00490 device_init();
00491 rw_cache = new float *[cache_size];
00492 caller_cache = new const EMData*[cache_size];
00493 ro_cache = new cudaArray *[cache_size];
00494
00495 for(int i = 0; i < cache_size; ++ i ) {
00496 rw_cache[i] = 0;
00497 caller_cache[i] = 0;
00498 ro_cache[i] = 0;
00499 }
00500 }
00501
00502 EMData::CudaCache::~CudaCache()
00503 {
00504 for (int i = 0; i < cache_size; i++) {
00505 clear_item(i);
00506 }
00507
00508 if( rw_cache )
00509 {
00510 delete[]rw_cache;
00511 rw_cache = 0;
00512 }
00513
00514
00515 if( caller_cache )
00516 {
00517 delete[]caller_cache;
00518 caller_cache = 0;
00519 }
00520
00521 cleanup_cuda_fft_dd_plan_cache();
00522 }
00523
00524 void EMData::CudaCache::lock(const int idx) {
00525 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00526 locked[idx] += 1;
00527
00528 }
00529 void EMData::CudaCache::unlock(const int idx) {
00530 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00531 if (locked[idx] == 0) {
00532
00533 return;
00534
00535
00536 }
00537 locked[idx] -=1;
00538 }
00539
00540 int EMData::CudaCache::cache_rw_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz)
00541 {
00542 ensure_slot_space();
00543
00544 float* cuda_rw_data = alloc_rw_data(nx,ny,nz);
00545
00546 if (data != 0 ) {
00547 size_t num_bytes = nx*ny*nz*sizeof(float);
00548 cudaError_t error = cudaMemcpy(cuda_rw_data,data,num_bytes,cudaMemcpyHostToDevice);
00549 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (host to device) error:" + string(cudaGetErrorString(error)));
00550 }
00551
00552 return blind_store_rw_data(emdata,cuda_rw_data);
00553 }
00554
00555 int EMData::CudaCache::blind_store_rw_data(const EMData* const emdata, float* cuda_rw_data)
00556 {
00557
00558 rw_cache[current_insert_idx] = cuda_rw_data;
00559 caller_cache[current_insert_idx] = emdata;
00560 ro_cache[current_insert_idx] = 0;
00561
00562 int ret = current_insert_idx;
00563 current_insert_idx += 1;
00564 current_insert_idx %= cache_size;
00565 if ( current_insert_idx > cache_size ) throw;
00566
00567 return ret;
00568 }
00569
00570 int EMData::CudaCache::store_rw_data(const EMData* const emdata, float* cuda_rw_data)
00571 {
00572 ensure_slot_space();
00573
00574 int nx = emdata->get_xsize();
00575 int ny = emdata->get_ysize();
00576 int nz = emdata->get_zsize();
00577 size_t num_bytes = nx*ny*nz*sizeof(float);
00578 mem_allocated += num_bytes;
00579
00580 return blind_store_rw_data(emdata, cuda_rw_data);
00581 }
00582
00583 void EMData::CudaCache::debug_print() const {
00584 cout << "Cuda device cache debug. Total mem allocated: " << static_cast<float>(mem_allocated)/1000000.0 << "MB" << endl;
00585 for(int i = 0; i < cache_size; ++i) {
00586 int handle = -1;
00587 int nx = 0;
00588 int ny = 0;
00589 int nz = 0;
00590 if (caller_cache[i] != 0) {
00591 handle = caller_cache[i]->cuda_cache_handle;
00592 nx = caller_cache[i]->get_xsize();
00593 ny = caller_cache[i]->get_ysize();
00594 nz = caller_cache[i]->get_zsize();
00595 }
00596 cout << i << ": " << handle << " " << caller_cache[i] << " dims: " << nx << " " << ny << " " << nz << " locked: " << locked[i] << " rw " << rw_cache[i] << " ro " << ro_cache[i] << endl;
00597
00598 }
00599 }
00600
00601 void EMData::CudaCache::replace_gpu_rw(const int idx,float* cuda_rw_data)
00602 {
00603
00604 if ( rw_cache[idx] != 0) {
00605 mem_allocated -= get_emdata_bytes(idx);
00606 cudaError_t error = cudaFree(rw_cache[idx]);
00607 if ( error != cudaSuccess)
00608 throw UnexpectedBehaviorException( "CudaFree error : " + string(cudaGetErrorString(error)));
00609 }
00610 rw_cache[idx] = 0;
00611
00612 const EMData* d = caller_cache[idx];
00613 int nx = d->get_xsize();
00614 int ny = d->get_ysize();
00615 int nz = d->get_zsize();
00616 size_t num_bytes = nx*ny*nz*sizeof(float);
00617 mem_allocated += num_bytes;
00618
00619 rw_cache[idx] = cuda_rw_data;
00620 }
00621
00622 void EMData::CudaCache::ensure_slot_space() {
00623
00624 int checked_entries = 0;
00625 while ( checked_entries < cache_size) {
00626 const EMData* previous = caller_cache[current_insert_idx];
00627 if (previous != 0 ) {
00628 if ( locked[current_insert_idx] == 0 ) {
00629
00630 previous->cuda_cache_lost_imminently();
00631
00632 clear_item(current_insert_idx);
00633 break;
00634 } else {
00635
00636 current_insert_idx++;
00637 current_insert_idx %= cache_size;
00638
00639 checked_entries++;
00640 }
00641 } else break;
00642 }
00643
00644 if (checked_entries == cache_size) {
00645 throw UnexpectedBehaviorException("All of the data objects in the cuda cache are locked! There is no space.");
00646 }
00647 }
00648
00649 float* EMData::CudaCache::alloc_rw_data(const int nx, const int ny, const int nz) {
00650 float* cuda_rw_data;
00651 size_t num_bytes = nx*ny*nz*sizeof(float);
00652
00653 cudaError_t error = cudaMalloc((void**)&cuda_rw_data,num_bytes);
00654 if ( error != cudaSuccess) {
00655 debug_print();
00656 throw BadAllocException( "cudaMalloc error :" + string(cudaGetErrorString(error)));
00657 }
00658
00659
00660
00661
00662
00663
00664
00665
00666 mem_allocated += num_bytes;
00667
00668 return cuda_rw_data;
00669
00670 }
00671
00672 int EMData::CudaCache::cache_ro_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz) {
00673 ensure_slot_space();
00674
00675 cudaArray *array = get_cuda_array_host(data,nx,ny,nz);
00676 if (array != 0) {
00677 mem_allocated += nx*ny*nz*sizeof(float);
00678
00679 rw_cache[current_insert_idx] = 0;
00680 caller_cache[current_insert_idx] = emdata;
00681 ro_cache[current_insert_idx] = array;
00682
00683 int ret = current_insert_idx;
00684 current_insert_idx += 1;
00685 current_insert_idx %= cache_size;
00686
00687 return ret;
00688 }
00689 else {
00690 throw BadAllocException("The allocation of the CUDA array failed");
00691 }
00692 }
00693
00694
00695 void EMData::CudaCache::copy_rw_to_ro(const int idx) {
00696
00697 if (rw_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RO CUDA data: RW data is null.");
00698
00699 if (ro_cache[idx] != 0) {
00700 cudaError_t error = cudaFreeArray(ro_cache[idx]);
00701 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray error " + string(cudaGetErrorString(error)));
00702 ro_cache[idx] = 0;
00703 }
00704
00705 const EMData* d = caller_cache[idx];
00706 int nx = d->get_xsize();
00707 int ny = d->get_ysize();
00708 int nz = d->get_zsize();
00709
00710 cudaArray *array = get_cuda_array_device(rw_cache[idx],nx,ny,nz);
00711 if (array == 0) throw BadAllocException("The allocation of the CUDA array failed");
00712 ro_cache[idx] = array;
00713 }
00714
00715 void EMData::CudaCache::copy_ro_to_rw(const int idx) {
00716
00717 if (ro_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RW CUDA data: RO data is null.");
00718
00719 if (rw_cache[idx] != 0) {
00720 cudaError_t error = cudaFree(rw_cache[idx]);
00721 if ( error != cudaSuccess)
00722 throw UnexpectedBehaviorException( "CudaFree error " + string(cudaGetErrorString(error)));
00723 rw_cache[idx] = 0;
00724 }
00725
00726 const EMData* d = caller_cache[idx];
00727 int nx = d->get_xsize();
00728 int ny = d->get_ysize();
00729 int nz = d->get_zsize();
00730 size_t num_bytes = nx*ny*nz*sizeof(float);
00731
00732 float* cuda_rw_data = alloc_rw_data(nx,ny,nz);
00733
00734 if (nz > 1) {
00735 cudaExtent extent;
00736 extent.width = nx;
00737 extent.height = ny;
00738 extent.depth = nz;
00739 cudaMemcpy3DParms copyParams = {0};
00740 copyParams.srcArray = ro_cache[idx];
00741 copyParams.dstPtr = make_cudaPitchedPtr((void*)cuda_rw_data, extent.width*sizeof(float), extent.width, extent.height);
00742 copyParams.extent = extent;
00743 copyParams.kind = cudaMemcpyDeviceToDevice;
00744 cudaError_t error = cudaMemcpy3D(©Params);
00745 if ( error != cudaSuccess)
00746 throw UnexpectedBehaviorException( "Copying device array to device pointer - CudaMemcpy3D error : " + string(cudaGetErrorString(error)));
00747
00748 } else if ( ny > 1 ) {
00749 cudaError_t error = cudaMemcpyFromArray(cuda_rw_data,ro_cache[idx],0,0,num_bytes,cudaMemcpyDeviceToDevice);
00750 if ( error != cudaSuccess)
00751 throw UnexpectedBehaviorException( "Copying device array to device pointer - cudaMemcpyFromArray error : " + string(cudaGetErrorString(error)));
00752 } else throw UnexpectedBehaviorException("Cuda infrastructure has not been designed to work on 1D data");
00753
00754 rw_cache[idx] = cuda_rw_data;
00755 }
00756
00757
00758 void EMData::CudaCache::copy_ro_to_cpu(const int idx,float* data) {
00759 if (ro_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RW CUDA data: RO data is null.");
00760 if (data == 0) throw NullPointerException("The cpu data pointer is NULL in copy_ro_to_cpu");
00761
00762 const EMData* d = caller_cache[idx];
00763 int nx = d->get_xsize();
00764 int ny = d->get_ysize();
00765 int nz = d->get_zsize();
00766 size_t num_bytes = nx*ny*nz*sizeof(float);
00767
00768 if (nz > 1) {
00769 cudaExtent extent;
00770 extent.width = nx;
00771 extent.height = ny;
00772 extent.depth = nz;
00773 cudaMemcpy3DParms copyParams = {0};
00774 copyParams.srcArray = ro_cache[idx];
00775 copyParams.dstPtr = make_cudaPitchedPtr((void*)data, extent.width*sizeof(float), extent.width, extent.height);
00776 copyParams.extent = extent;
00777 copyParams.kind = cudaMemcpyDeviceToHost;
00778 cudaError_t error = cudaMemcpy3D(©Params);
00779 if ( error != cudaSuccess)
00780 throw UnexpectedBehaviorException( "Copying device array to device pointer - CudaMemcpy3D error : " + string(cudaGetErrorString(error)));
00781
00782 } else if ( ny > 1 ) {
00783 cudaError_t error = cudaMemcpyFromArray(data,ro_cache[idx],0,0,num_bytes,cudaMemcpyDeviceToHost);
00784 if ( error != cudaSuccess)
00785 throw UnexpectedBehaviorException( "Copying device array to device pointer - cudaMemcpyFromArray error : " + string(cudaGetErrorString(error)));
00786 } else throw UnexpectedBehaviorException("Cuda infrastructure has not been designed to work on 1D data");
00787
00788 }
00789 void EMData::CudaCache::clear_item(const int idx) {
00790
00791 if ( rw_cache[idx] != 0) {
00792 mem_allocated -= get_emdata_bytes(idx);
00793 cudaError_t error = cudaFree(rw_cache[idx]);
00794 if ( error != cudaSuccess)
00795 throw UnexpectedBehaviorException( "CudaFree error : " + string(cudaGetErrorString(error)));
00796 }
00797 rw_cache[idx] = 0;
00798
00799 if ( ro_cache[idx] != 0) {
00800 mem_allocated -= get_emdata_bytes(idx);
00801 cudaError_t error = cudaFreeArray(ro_cache[idx]);
00802 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray error : " + string(cudaGetErrorString(error)));
00803
00804 }
00805 ro_cache[idx] = 0;
00806
00807 caller_cache[idx] = 0;
00808
00809 locked[idx] = 0;
00810 }
00811
00812
00813 EMData::CudaDataLock::CudaDataLock(const EMData* const emdata) : data_cuda_handle(-1)
00814 {
00815 emdata->set_gpu_rw_current();
00816 data_cuda_handle = emdata->cuda_cache_handle;
00817 EMData::cuda_cache.lock(data_cuda_handle);
00818 }
00819
00820 EMData::CudaDataLock::~CudaDataLock() {
00821 EMData::cuda_cache.unlock(data_cuda_handle);
00822 }
00823
00824
00825 #endif //EMAN2_USING_CUDA