00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifdef EMAN2_USING_CUDA
00035
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include <cuda/cuda_util.h>
00042 #include <cuda/cuda_emfft.h>
00043
00044 using namespace EMAN;
00045
00046
00047 const EMData* EMData::firstinlist = 0;
00048 const EMData* EMData::lastinlist = 0;
00049 int EMData::memused = 0;
00050 int EMData::fudgemem = 1.024E7;
00051 int EMData::mempoolused = -1;
00052 int EMData::mempoolarraysize = 0;
00053 bool EMData::usemempoolswitch = false;
00054 bool EMData::usecuda = (getenv("EMANUSECUDA") == NULL || atoi(getenv("EMANUSECUDA")) ) ? 1 : 0;
00055 float* EMData::mempool[] = {0};
00056
00057 bool EMData::copy_to_cuda_keepcpu() const
00058 {
00059
00060 if(rw_alloc()) {
00061 memused += num_bytes;
00062 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00063 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00064 }else{return false;}
00065
00066 return true;
00067 }
00068
00069 bool EMData::copy_to_cuda()
00070 {
00071
00072 if(rw_alloc()) {
00073 memused += num_bytes;
00074 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00075 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00076 }else{return false;}
00077 free_rdata();
00078
00079 return true;
00080 }
00081
00082 bool EMData::copy_to_cudaro() const
00083 {
00084
00085
00086 if(ro_alloc()) {
00087 memused += num_bytes;
00088 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00089 }else{return false;}
00090
00091 return true;
00092 }
00093
00094 bool EMData::rw_alloc() const
00095 {
00096
00097 num_bytes = nxyz*sizeof(float);
00098 if(cudarwdata){return true;}
00099
00100 if(usemempoolswitch && mempoolused > -1 && mempoolarraysize >= int(num_bytes))
00101 {
00102 cudarwdata = mempool[mempoolused];
00103 mempool[mempoolused] = 0;
00104 mempoolused--;
00105 return true;
00106 }
00107 if(!freeup_devicemem(num_bytes)){return false;}
00108 cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00109 if ( error != cudaSuccess){return false;}
00110 if(!cudarodata){addtolist();}
00111
00112 return true;
00113 }
00114
00115 bool EMData::ro_alloc() const
00116 {
00117
00118 num_bytes = nxyz*sizeof(float);
00119 if(cudarodata){return true;}
00120 if(!freeup_devicemem(num_bytes)){return false;}
00121 cudarodata = get_cuda_array(nx, ny, nz);
00122 if(!cudarwdata){addtolist();}
00123
00124 return true;
00125
00126 }
00127
00128 void EMData::bindcudaarrayA(const bool intp_mode) const
00129 {
00130 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00131 if(nz > 1){
00132
00133 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00134 }else{
00135
00136 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00137 }
00138
00139 }
00140
00141 void EMData::unbindcudaarryA() const
00142 {
00143
00144 if(nz > 1){
00145 unbind_cuda_textureA(3);
00146 }else{
00147 unbind_cuda_textureA(2);
00148 }
00149
00150 }
00151
00152 void EMData::bindcudaarrayB(const bool intp_mode) const
00153 {
00154 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00155 if(nz > 1){
00156
00157 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00158 }else{
00159
00160 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00161 }
00162
00163 }
00164
00165 void EMData::unbindcudaarryB() const
00166 {
00167
00168 if(nz > 1){
00169 unbind_cuda_textureB(3);
00170 }else{
00171 unbind_cuda_textureB(2);
00172 }
00173
00174 }
00175
00176 bool EMData::copy_from_device(const bool rocpy)
00177 {
00178
00179
00180 if(cudarwdata && !rocpy){
00181
00182 if(rdata == 0){rdata = (float*)malloc(num_bytes);}
00183 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00184 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00185 rw_free();
00186 } else if (cudarodata && rocpy) {
00187 if (nz > 1){
00188
00189 cudaExtent extent;
00190 extent.width = nx;
00191 extent.height = ny;
00192 extent.depth = nz;
00193 cudaMemcpy3DParms copyParams = {0};
00194 copyParams.srcArray = cudarodata;
00195 copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00196 copyParams.extent = extent;
00197 copyParams.kind = cudaMemcpyDeviceToHost;
00198 cudaError_t error = cudaMemcpy3D(©Params);
00199 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00200 } else{
00201
00202 cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00203 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00204 }
00205 ro_free();
00206 } else {
00207 return false;
00208 }
00209
00210 update();
00211 return true;
00212 }
00213
00214 bool EMData::copy_rw_to_ro() const
00215 {
00216
00217 if(cudarwdata == 0){return false;}
00218
00219 if(cudarodata == 0){
00220 if(!freeup_devicemem(num_bytes)){return false;}
00221 cudarodata = get_cuda_array(nx, ny, nz);
00222 memused += num_bytes;
00223 }
00224
00225 copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00226 roneedsupdate = 0;
00227
00228 return true;
00229
00230 }
00231
00232
00233
00234 void EMData::runcuda(float * results) const
00235 {
00236
00237 if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00238 if(cudarwdata != 0){
00239
00240 } else {
00241 addtolist();
00242 }
00243 cudarwdata = results;
00244
00245 }
00246
00247 void EMData::rw_free() const
00248 {
00249
00250 if(usemempoolswitch && mempoolused < (MEMPOOL_SIZE-1) && mempoolarraysize >= int(num_bytes))
00251 {
00252 mempoolused++;
00253 mempool[mempoolused] = cudarwdata;
00254 return;
00255 }
00256 cudaError_t error = cudaFree(cudarwdata);
00257 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00258 cudarwdata = 0;
00259 memused -= num_bytes;
00260 if(!cudarodata){removefromlist();}
00261
00262 }
00263
00264 void EMData::ro_free() const
00265 {
00266
00267 cudaError_t error = cudaFreeArray(cudarodata);
00268 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00269 cudarodata = 0;
00270 memused -= num_bytes;
00271 if(!cudarwdata){removefromlist();}
00272
00273 }
00274
00275 bool EMData::isrodataongpu() const
00276 {
00277 if(cudarodata != 0 && !roneedsupdate){return true;}
00278 if(cudarwdata != 0){
00279 if(copy_rw_to_ro()){;
00280 return true;
00281 } else {
00282 return false;
00283 }
00284 }else{
00285 return false;
00286 }
00287
00288 }
00289 bool EMData::freeup_devicemem(const int& num_bytes) const
00290 {
00291 size_t freemem, totalmem;
00292 cudaMemGetInfo(&freemem, &totalmem);
00293
00294 if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00295 return true;
00296 }else{
00297
00298
00299 while(lastinlist != 0){
00300 if(lastinlist->cudarwdata){
00301 cudaFree(lastinlist->cudarwdata);
00302 lastinlist->cudarwdata = 0;
00303 memused -= lastinlist->nxyz*sizeof(float);
00304 cudaMemGetInfo(&freemem, &totalmem);
00305 }
00306 if(lastinlist->cudarodata){
00307 cudaFreeArray(lastinlist->cudarodata);
00308 lastinlist->cudarodata = 0;
00309 memused -= lastinlist->nxyz*sizeof(float);
00310 cudaMemGetInfo(&freemem, &totalmem);
00311 }
00312 if(lastinlist != firstinlist){
00313 lastinlist->nextlistitem->prevlistitem = 0;
00314 lastinlist = lastinlist->nextlistitem;
00315 }else{
00316 firstinlist = 0;
00317 lastinlist = 0;
00318 }
00319 if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}
00320 }
00321 }
00322
00323 return false;
00324 }
00325
00326 void EMData::addtolist() const
00327 {
00328 if(firstinlist == 0){
00329 firstinlist = this;
00330 lastinlist = this;
00331 nextlistitem = 0;
00332 prevlistitem = 0;
00333 }else{
00334
00335 firstinlist->nextlistitem = this;
00336 prevlistitem = firstinlist;
00337 nextlistitem = 0;
00338 firstinlist = this;
00339 }
00340
00341 }
00342
00343 void EMData::elementaccessed() const
00344 {
00345 removefromlist();
00346
00347 firstinlist->nextlistitem = this;
00348 prevlistitem = firstinlist;
00349 firstinlist = this;
00350 }
00351
00352 void EMData::removefromlist() const
00353 {
00354
00355 if(firstinlist == lastinlist){
00356 firstinlist = 0;
00357 lastinlist = 0;
00358 return;
00359 }
00360 if(nextlistitem !=0){
00361 nextlistitem->prevlistitem = prevlistitem;
00362 }else{
00363 firstinlist = prevlistitem;
00364 }
00365 if(prevlistitem !=0){
00366 prevlistitem->nextlistitem = nextlistitem;
00367 }else{
00368 lastinlist = nextlistitem;
00369 }
00370
00371 }
00372
00373 void EMData::usemempool(int size)
00374 {
00375
00376 usemempoolswitch = true;
00377 mempoolarraysize = size;
00378
00379 }
00380
00381 void EMData::freemempool()
00382 {
00383 for(int i = 0; i < MEMPOOL_SIZE; i ++)
00384 {
00385 if(mempool[i] == 0)
00386 {
00387 break;
00388 }else{
00389 cudaFree(mempool[i]);
00390 }
00391 }
00392 }
00393
00394 void EMData::switchoncuda()
00395 {
00396 EMData::usecuda = 1;
00397 }
00398
00399 void EMData::switchoffcuda()
00400 {
00401 EMData::usecuda = 0;
00402 }
00403
00404 void EMData::cuda_cleanup()
00405 {
00406 do_cuda_fft_cache_destroy();
00407
00408 while(lastinlist){
00409 if(lastinlist->cudarwdata) lastinlist->rw_free();
00410 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00411 }
00412
00413 cudaThreadExit();
00414
00415 }
00416
00417 bool EMData::cuda_initialize()
00418 {
00419 if(device_init())
00420 {
00421 return 1;
00422 } else {
00423 switchoffcuda();
00424 return 0;
00425 }
00426 }
00427 #endif //EMAN2_USING_CUDA