00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056 #ifdef EMAN2_USING_CUDA
00057
00058 #include "emdata.h"
00059 #include "exception.h"
00060 #include <cuda_runtime_api.h>
00061 #include <driver_functions.h>
00062 #include <cuda.h>
00063 #include <cuda/cuda_util.h>
00064 #include <cuda/cuda_emfft.h>
00065
00066 using namespace EMAN;
00067
00068
00069 const EMData* EMData::firstinlist = 0;
00070 const EMData* EMData::lastinlist = 0;
00071 int EMData::memused = 0;
00072 int EMData::fudgemem = 1.024E8;
00073 int EMData::cudadevicenum = -1;
00074 bool EMData::usecuda = 0;
00075 bool EMData::nocudainit = (getenv("NOCUDAINIT") == NULL) ? 0 : bool(atoi(getenv("NOCUDAINIT")));
00076
00077 bool EMData::copy_to_cuda() const
00078 {
00079
00080 if(rw_alloc()) {
00081 memused += num_bytes;
00082 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00083 if ( error != cudaSuccess) {
00084
00085 throw UnexpectedBehaviorException("CudaMemcpy (host to device) failed:" + string(cudaGetErrorString(error)));
00086 }
00087 }else{return false;}
00088
00089
00090 return true;
00091 }
00092
00093 bool EMData::copy_to_cudaro() const
00094 {
00095
00096
00097 if(ro_alloc()) {
00098 memused += num_bytes;
00099 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00100 }else{return false;}
00101
00102
00103 return true;
00104 }
00105
00106 bool EMData::rw_alloc() const
00107 {
00108 if(cudarwdata){return true;}
00109 num_bytes = nxyz*sizeof(float);
00110 if(!freeup_devicemem(num_bytes)){return false;}
00111 cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00112 if ( error != cudaSuccess){return false;}
00113 if(!cudarodata){
00114 addtolist();
00115 }else{
00116 elementaccessed();
00117 }
00118 return true;
00119 }
00120
00121 bool EMData::ro_alloc() const
00122 {
00123 if(cudarodata){return true;}
00124 num_bytes = nxyz*sizeof(float);
00125 if(!freeup_devicemem(num_bytes)){return false;}
00126 cudarodata = get_cuda_array(nx, ny, nz);
00127 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00128 if(!cudarwdata){
00129 addtolist();
00130 }else{
00131 elementaccessed();
00132 }
00133
00134 return true;
00135
00136 }
00137
00138 void EMData::bindcudaarrayA(const bool intp_mode) const
00139 {
00140 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00141 if(nz > 1){
00142 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00143 }else{
00144 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00145 }
00146
00147 }
00148
00149 void EMData::unbindcudaarryA() const
00150 {
00151
00152 if(nz > 1){
00153 unbind_cuda_textureA(3);
00154 }else{
00155 unbind_cuda_textureA(2);
00156 }
00157
00158 }
00159
00160 void EMData::bindcudaarrayB(const bool intp_mode) const
00161 {
00162 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00163 if(nz > 1){
00164 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00165 }else{
00166 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00167 }
00168
00169 }
00170
00171 void EMData::unbindcudaarryB() const
00172 {
00173
00174 if(nz > 1){
00175 unbind_cuda_textureB(3);
00176 }else{
00177 unbind_cuda_textureB(2);
00178 }
00179
00180 }
00181
00182 bool EMData::copy_from_device(const bool rocpy)
00183 {
00184 if(cudarwdata && !rocpy){
00185 if(rdata == 0){rdata = (float*)malloc(num_bytes);}
00186 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00187 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00188 rw_free();
00189 if(cudarodata) ro_free();
00190 } else if (cudarodata && rocpy) {
00191 if(rdata == 0){rdata = (float*)malloc(num_bytes);}
00192 if (nz > 1){
00193 cudaExtent extent;
00194 extent.width = nx;
00195 extent.height = ny;
00196 extent.depth = nz;
00197 cudaMemcpy3DParms copyParams = {0};
00198 copyParams.srcArray = cudarodata;
00199 copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00200 copyParams.extent = extent;
00201 copyParams.kind = cudaMemcpyDeviceToHost;
00202 cudaError_t error = cudaMemcpy3D(©Params);
00203 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00204 } else{
00205 cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00206 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00207 }
00208 ro_free();
00209 if(cudarwdata) rw_free();
00210 } else {
00211 return false;
00212 }
00213
00214 update();
00215 return true;
00216 }
00217
00218 bool EMData::copy_rw_to_ro() const
00219 {
00220
00221 if(cudarwdata == 0){return false;}
00222
00223 if(cudarodata == 0){
00224 if(!freeup_devicemem(num_bytes)){return false;}
00225 cudarodata = get_cuda_array(nx, ny, nz);
00226 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00227 memused += num_bytes;
00228 }
00229
00230 copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00231 roneedsupdate = 0;
00232 elementaccessed();
00233 return true;
00234
00235 }
00236
00237
00238
00239 void EMData::runcuda(float * results) const
00240 {
00241
00242 if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00243 if(cudarwdata != 0){
00244
00245 } else {
00246 addtolist();
00247 }
00248 cudarwdata = results;
00249
00250 }
00251
00252 void EMData::rw_free() const
00253 {
00254 cudaError_t error = cudaFree(cudarwdata);
00255 if ( error != cudaSuccess){
00256 cout << rdata << " " << cudarwdata << endl;
00257 throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00258 }
00259 cudarwdata = 0;
00260 memused -= num_bytes;
00261 if(!cudarodata){removefromlist();}
00262
00263 }
00264
00265 void EMData::ro_free() const
00266 {
00267 cudaError_t error = cudaFreeArray(cudarodata);
00268 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00269 cudarodata = 0;
00270 memused -= num_bytes;
00271 if(!cudarwdata){removefromlist();}
00272
00273 }
00274
00275 bool EMData::isrodataongpu() const
00276 {
00277 if(cudarodata != 0 && !roneedsupdate){
00278 if(cudarodata !=0) elementaccessed();
00279 return true;
00280 }
00281 if(cudarwdata != 0){
00282 if(copy_rw_to_ro()){;
00283 return true;
00284 } else {
00285 return false;
00286 }
00287 }else{
00288 return false;
00289 }
00290
00291 }
00292
00293 bool EMData::freeup_devicemem(const int& num_bytes) const
00294 {
00295 size_t freemem=0, totalmem=0;
00296 cudaMemGetInfo(&freemem, &totalmem);
00297
00298 if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00299 return true;
00300 }else{
00301
00302 while(lastinlist != 0){
00303 if(lastinlist->cudarwdata){
00304
00305 const_cast<EMData*>(lastinlist)->copy_from_device();
00306 }
00307 if(lastinlist->cudarodata){
00308 const_cast<EMData*>(lastinlist)->copy_from_device(1);
00309 }
00310 cudaMemGetInfo(&freemem, &totalmem);
00311 if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}
00312 }
00313 }
00314
00315 return false;
00316 }
00317
00318 void EMData::setdirtybit() const
00319 {
00320 cudadirtybit = 1;
00321 }
00322
00323 void EMData::addtolist() const
00324 {
00325
00326 if(firstinlist == 0){
00327 firstinlist = this;
00328 lastinlist = this;
00329 nextlistitem = 0;
00330 prevlistitem = 0;
00331 }else{
00332
00333 firstinlist->nextlistitem = this;
00334 prevlistitem = firstinlist;
00335 nextlistitem = 0;
00336 firstinlist = this;
00337 }
00338
00339 }
00340
00341 void EMData::elementaccessed() const
00342 {
00343
00344 if(firstinlist == this){return;}
00345 removefromlist();
00346 addtolist();
00347 }
00348
00349 void EMData::removefromlist() const
00350 {
00351
00352
00353 if(firstinlist == lastinlist){
00354 firstinlist = 0;
00355 lastinlist = 0;
00356 nextlistitem = 0;
00357 prevlistitem = 0;
00358 return;
00359 }
00360 if(nextlistitem !=0){
00361 nextlistitem->prevlistitem = prevlistitem;
00362 }else{
00363 firstinlist = prevlistitem;
00364 }
00365 if(prevlistitem !=0){
00366 prevlistitem->nextlistitem = nextlistitem;
00367 }else{
00368 lastinlist = nextlistitem;
00369 }
00370 nextlistitem = 0;
00371 prevlistitem = 0;
00372
00373 }
00374
00375 void EMData::switchoncuda()
00376 {
00377 EMData::usecuda = 1;
00378 }
00379
00380 void EMData::switchoffcuda()
00381 {
00382 EMData::usecuda = 0;
00383 }
00384
00385 void EMData::cuda_cleanup()
00386 {
00387 do_cuda_fft_cache_destroy();
00388
00389 while(lastinlist){
00390 if(lastinlist->cudarwdata) lastinlist->rw_free();
00391 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00392 }
00393
00394 cudaThreadExit();
00395
00396 if(EMData::cudadevicenum >= 0)
00397 {
00398 char filename[16];
00399 sprintf(filename,"%s%d",cudalockfile,EMData::cudadevicenum);
00400 remove(filename);
00401 }
00402
00403 }
00404
00405 bool EMData::cuda_initialize()
00406 {
00407 if(EMData::nocudainit) return 0;
00408 int device = device_init();
00409
00410 if(device != -1)
00411 {
00412 EMData::cudadevicenum = device;
00413 switchoncuda();
00414 return 1;
00415 } else {
00416 switchoffcuda();
00417 return 0;
00418 }
00419 }
00420
00421 const char* EMData::getcudalock()
00422 {
00423 return cudalockfile;
00424 }
00425
00426 #endif //EMAN2_USING_CUDA