00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifdef EMAN2_USING_CUDA
00035
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include <cuda/cuda_util.h>
00042 #include <cuda/cuda_emfft.h>
00043
00044 using namespace EMAN;
00045
00046
00047 const EMData* EMData::firstinlist = 0;
00048 const EMData* EMData::lastinlist = 0;
00049 int EMData::memused = 0;
00050 int EMData::fudgemem = 1.024E8;
00051 int EMData::mempoolused = -1;
00052 int EMData::mempoolarraysize = 0;
00053 bool EMData::usemempoolswitch = false;
00054 bool EMData::usecuda = (getenv("EMANUSECUDA") == NULL) ? 0 : bool(atoi(getenv("EMANUSECUDA")));
00055 float* EMData::mempool[] = {0};
00056
00057 bool EMData::copy_to_cuda_keepcpu() const
00058 {
00059
00060 if(rw_alloc()) {
00061 memused += num_bytes;
00062 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00063 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00064 }else{return false;}
00065
00066 return true;
00067 }
00068
00069 bool EMData::copy_to_cuda()
00070 {
00071
00072 if(rw_alloc()) {
00073 memused += num_bytes;
00074 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00075 if ( error != cudaSuccess) {
00076
00077 throw UnexpectedBehaviorException( "CudaMemcpy (host to device) failed:" + string(cudaGetErrorString(error)));
00078 }
00079 }else{return false;}
00080
00081
00082
00083 return true;
00084 }
00085
00086 bool EMData::copy_to_cudaro() const
00087 {
00088
00089
00090 if(ro_alloc()) {
00091 memused += num_bytes;
00092 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00093 }else{return false;}
00094
00095 return true;
00096 }
00097
00098 bool EMData::rw_alloc() const
00099 {
00100
00101 num_bytes = nxyz*sizeof(float);
00102 if(cudarwdata){return true;}
00103
00104 if(usemempoolswitch && mempoolused > -1 && mempoolarraysize >= int(num_bytes))
00105 {
00106 cudarwdata = mempool[mempoolused];
00107 mempool[mempoolused] = 0;
00108 mempoolused--;
00109 return true;
00110 }
00111 if(!freeup_devicemem(num_bytes)){return false;}
00112 cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00113 if ( error != cudaSuccess){return false;}
00114 if(!cudarodata){addtolist();}
00115
00116 return true;
00117 }
00118
00119 bool EMData::ro_alloc() const
00120 {
00121
00122 num_bytes = nxyz*sizeof(float);
00123 if(cudarodata){return true;}
00124 if(!freeup_devicemem(num_bytes)){return false;}
00125 cudarodata = get_cuda_array(nx, ny, nz);
00126 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00127 if(!cudarwdata){addtolist();}
00128
00129 return true;
00130
00131 }
00132
00133 void EMData::bindcudaarrayA(const bool intp_mode) const
00134 {
00135 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00136 if(nz > 1){
00137
00138 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00139 }else{
00140
00141 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00142 }
00143
00144 }
00145
00146 void EMData::unbindcudaarryA() const
00147 {
00148
00149 if(nz > 1){
00150 unbind_cuda_textureA(3);
00151 }else{
00152 unbind_cuda_textureA(2);
00153 }
00154
00155 }
00156
00157 void EMData::bindcudaarrayB(const bool intp_mode) const
00158 {
00159 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00160 if(nz > 1){
00161
00162 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00163 }else{
00164
00165 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00166 }
00167
00168 }
00169
00170 void EMData::unbindcudaarryB() const
00171 {
00172
00173 if(nz > 1){
00174 unbind_cuda_textureB(3);
00175 }else{
00176 unbind_cuda_textureB(2);
00177 }
00178
00179 }
00180
00181 bool EMData::copy_from_device(const bool rocpy)
00182 {
00183
00184
00185 if(cudarwdata && !rocpy){
00186
00187 if(rdata == 0){rdata = (float*)malloc(num_bytes);}
00188 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00189 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00190 rw_free();
00191 if(cudarodata) ro_free();
00192 } else if (cudarodata && rocpy) {
00193 if (nz > 1){
00194
00195 cudaExtent extent;
00196 extent.width = nx;
00197 extent.height = ny;
00198 extent.depth = nz;
00199 cudaMemcpy3DParms copyParams = {0};
00200 copyParams.srcArray = cudarodata;
00201 copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00202 copyParams.extent = extent;
00203 copyParams.kind = cudaMemcpyDeviceToHost;
00204 cudaError_t error = cudaMemcpy3D(©Params);
00205 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00206 } else{
00207
00208 cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00209 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00210 }
00211 ro_free();
00212 } else {
00213 return false;
00214 }
00215
00216 update();
00217 return true;
00218 }
00219
00220 bool EMData::copy_rw_to_ro() const
00221 {
00222
00223 if(cudarwdata == 0){return false;}
00224
00225 if(cudarodata == 0){
00226 if(!freeup_devicemem(num_bytes)){return false;}
00227 cudarodata = get_cuda_array(nx, ny, nz);
00228 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00229 memused += num_bytes;
00230 }
00231
00232 copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00233 roneedsupdate = 0;
00234 elementaccessed();
00235
00236 return true;
00237
00238 }
00239
00240
00241
00242 void EMData::runcuda(float * results) const
00243 {
00244
00245 if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00246 if(cudarwdata != 0){
00247
00248 } else {
00249 addtolist();
00250 }
00251 cudarwdata = results;
00252
00253 }
00254
00255 void EMData::rw_free() const
00256 {
00257
00258 if(usemempoolswitch && mempoolused < (MEMPOOL_SIZE-1) && mempoolarraysize >= int(num_bytes))
00259 {
00260 mempoolused++;
00261 mempool[mempoolused] = cudarwdata;
00262 return;
00263 }
00264 cudaError_t error = cudaFree(cudarwdata);
00265 if ( error != cudaSuccess){
00266 cout << rdata << " " << cudarwdata << endl;
00267 throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00268 }
00269 cudarwdata = 0;
00270 memused -= num_bytes;
00271 if(!cudarodata){removefromlist();}
00272
00273 }
00274
00275 void EMData::ro_free() const
00276 {
00277
00278 cudaError_t error = cudaFreeArray(cudarodata);
00279 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00280 cudarodata = 0;
00281 memused -= num_bytes;
00282 if(!cudarwdata){removefromlist();}
00283
00284 }
00285
00286 bool EMData::isrodataongpu() const
00287 {
00288 if(cudarodata != 0 && !roneedsupdate){return true;}
00289 if(cudarwdata != 0){
00290 if(copy_rw_to_ro()){;
00291 return true;
00292 } else {
00293 return false;
00294 }
00295 }else{
00296 return false;
00297 }
00298
00299 }
00300 bool EMData::freeup_devicemem(const int& num_bytes) const
00301 {
00302 size_t freemem=0, totalmem=0;
00303 cudaMemGetInfo(&freemem, &totalmem);
00304
00305 if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00306 return true;
00307 }else{
00308
00309
00310 while(lastinlist != 0){
00311 if(lastinlist->cudarwdata){
00312 cudaFree(lastinlist->cudarwdata);
00313 lastinlist->cudarwdata = 0;
00314 memused -= lastinlist->nxyz*sizeof(float);
00315 cudaMemGetInfo(&freemem, &totalmem);
00316 }
00317 if(lastinlist->cudarodata){
00318 cudaFreeArray(lastinlist->cudarodata);
00319 lastinlist->cudarodata = 0;
00320 memused -= lastinlist->nxyz*sizeof(float);
00321 cudaMemGetInfo(&freemem, &totalmem);
00322 }
00323 if(lastinlist != firstinlist){
00324 lastinlist->nextlistitem->prevlistitem = 0;
00325 lastinlist = lastinlist->nextlistitem;
00326 }else{
00327 firstinlist = 0;
00328 lastinlist = 0;
00329 }
00330 if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}
00331 }
00332 }
00333
00334 return false;
00335 }
00336
00337 void EMData::addtolist() const
00338 {
00339 if(firstinlist == 0){
00340 firstinlist = this;
00341 lastinlist = this;
00342 nextlistitem = 0;
00343 prevlistitem = 0;
00344 }else{
00345
00346 firstinlist->nextlistitem = this;
00347 prevlistitem = firstinlist;
00348 nextlistitem = 0;
00349 firstinlist = this;
00350 }
00351
00352 }
00353
00354 void EMData::elementaccessed() const
00355 {
00356 removefromlist();
00357
00358 firstinlist->nextlistitem = this;
00359 prevlistitem = firstinlist;
00360 firstinlist = this;
00361 }
00362
00363 void EMData::removefromlist() const
00364 {
00365
00366 if(firstinlist == lastinlist){
00367 firstinlist = 0;
00368 lastinlist = 0;
00369 return;
00370 }
00371 if(nextlistitem !=0){
00372 nextlistitem->prevlistitem = prevlistitem;
00373 }else{
00374 firstinlist = prevlistitem;
00375 }
00376 if(prevlistitem !=0){
00377 prevlistitem->nextlistitem = nextlistitem;
00378 }else{
00379 lastinlist = nextlistitem;
00380 }
00381
00382 }
00383
00384 void EMData::usemempool(int size)
00385 {
00386
00387 usemempoolswitch = true;
00388 mempoolarraysize = size;
00389
00390 }
00391
00392 void EMData::freemempool()
00393 {
00394 for(int i = 0; i < MEMPOOL_SIZE; i ++)
00395 {
00396 if(mempool[i] == 0)
00397 {
00398 break;
00399 }else{
00400 cudaFree(mempool[i]);
00401 }
00402 }
00403 }
00404
00405 void EMData::switchoncuda()
00406 {
00407 EMData::usecuda = 1;
00408 }
00409
00410 void EMData::switchoffcuda()
00411 {
00412 EMData::usecuda = 0;
00413 }
00414
00415 void EMData::cuda_cleanup()
00416 {
00417 do_cuda_fft_cache_destroy();
00418
00419 while(lastinlist){
00420 if(lastinlist->cudarwdata) lastinlist->rw_free();
00421 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00422 }
00423
00424 cudaThreadExit();
00425
00426 }
00427
00428 bool EMData::cuda_initialize()
00429 {
00430 if(device_init())
00431 {
00432 return 1;
00433 } else {
00434 switchoffcuda();
00435 return 0;
00436 }
00437 }
00438 #endif //EMAN2_USING_CUDA