00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058 #ifdef EMAN2_USING_CUDA
00059
00060 #include "emdata.h"
00061 #include "exception.h"
00062 #include <cuda_runtime_api.h>
00063 #include <driver_functions.h>
00064 #include <cuda.h>
00065 #include <cuda/cuda_util.h>
00066 #include <cuda/cuda_emfft.h>
00067
00068 using namespace EMAN;
00069
00070
00071 const EMData* EMData::firstinlist = 0;
00072 const EMData* EMData::lastinlist = 0;
00073 int EMData::memused = 0;
00074 int EMData::fudgemem = 1.024E8;
00075 int EMData::cudadevicenum = -1;
00076 bool EMData::usecuda = 0;
00077 bool EMData::nocudainit = (getenv("NOCUDAINIT") == NULL) ? 0 : bool(atoi(getenv("NOCUDAINIT")));
00078
00079 bool EMData::copy_to_cuda() const
00080 {
00081
00082 if(rw_alloc()) {
00083 memused += num_bytes;
00084 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00085 if ( error != cudaSuccess) {
00086
00087 throw UnexpectedBehaviorException("CudaMemcpy (host to device) failed:" + string(cudaGetErrorString(error)));
00088 }
00089 }else{return false;}
00090
00091
00092 return true;
00093 }
00094
00095 bool EMData::copy_to_cudaro() const
00096 {
00097
00098
00099 if(ro_alloc()) {
00100 memused += num_bytes;
00101 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00102 }else{return false;}
00103
00104
00105 return true;
00106 }
00107
00108 bool EMData::rw_alloc() const
00109 {
00110 if(cudarwdata){return true;}
00111 num_bytes = nxyz*sizeof(float);
00112 if(!freeup_devicemem(num_bytes)){return false;}
00113 cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00114 if ( error != cudaSuccess){return false;}
00115 if(!cudarodata){
00116 addtolist();
00117 }else{
00118 elementaccessed();
00119 }
00120 return true;
00121 }
00122
00123 bool EMData::ro_alloc() const
00124 {
00125 if(cudarodata){return true;}
00126 num_bytes = nxyz*sizeof(float);
00127 if(!freeup_devicemem(num_bytes)){return false;}
00128 cudarodata = get_cuda_array(nx, ny, nz);
00129 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00130 if(!cudarwdata){
00131 addtolist();
00132 }else{
00133 elementaccessed();
00134 }
00135
00136 return true;
00137
00138 }
00139
00140 void EMData::bindcudaarrayA(const bool intp_mode) const
00141 {
00142 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00143 if(nz > 1){
00144 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00145 }else{
00146 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00147 }
00148
00149 }
00150
00151 void EMData::unbindcudaarryA() const
00152 {
00153
00154 if(nz > 1){
00155 unbind_cuda_textureA(3);
00156 }else{
00157 unbind_cuda_textureA(2);
00158 }
00159
00160 }
00161
00162 void EMData::bindcudaarrayB(const bool intp_mode) const
00163 {
00164 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00165 if(nz > 1){
00166 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00167 }else{
00168 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00169 }
00170
00171 }
00172
00173 void EMData::unbindcudaarryB() const
00174 {
00175
00176 if(nz > 1){
00177 unbind_cuda_textureB(3);
00178 }else{
00179 unbind_cuda_textureB(2);
00180 }
00181
00182 }
00183
00184 bool EMData::copy_from_device(const bool rocpy)
00185 {
00186 if(cudarwdata && !rocpy){
00187 if(rdata == 0){rdata = (float*)malloc(num_bytes);}
00188 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00189 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00190 rw_free();
00191 if(cudarodata) ro_free();
00192 } else if (cudarodata && rocpy) {
00193 if(rdata == 0){rdata = (float*)malloc(num_bytes);}
00194 if (nz > 1){
00195 cudaExtent extent;
00196 extent.width = nx;
00197 extent.height = ny;
00198 extent.depth = nz;
00199 cudaMemcpy3DParms copyParams = {0};
00200 copyParams.srcArray = cudarodata;
00201 copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00202 copyParams.extent = extent;
00203 copyParams.kind = cudaMemcpyDeviceToHost;
00204 cudaError_t error = cudaMemcpy3D(©Params);
00205 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00206 } else{
00207 cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00208 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00209 }
00210 ro_free();
00211 if(cudarwdata) rw_free();
00212 } else {
00213 return false;
00214 }
00215
00216 update();
00217 return true;
00218 }
00219
00220 bool EMData::copy_rw_to_ro() const
00221 {
00222
00223 if(cudarwdata == 0){return false;}
00224
00225 if(cudarodata == 0){
00226 if(!freeup_devicemem(num_bytes)){return false;}
00227 cudarodata = get_cuda_array(nx, ny, nz);
00228 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00229 memused += num_bytes;
00230 }
00231
00232 copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00233 roneedsupdate = 0;
00234 elementaccessed();
00235 return true;
00236
00237 }
00238
00239
00240
00241 void EMData::runcuda(float * results) const
00242 {
00243
00244 if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00245 if(cudarwdata != 0){
00246
00247 } else {
00248 addtolist();
00249 }
00250 cudarwdata = results;
00251
00252 }
00253
00254 void EMData::rw_free() const
00255 {
00256 cudaError_t error = cudaFree(cudarwdata);
00257 if ( error != cudaSuccess){
00258 cout << rdata << " " << cudarwdata << endl;
00259 throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00260 }
00261 cudarwdata = 0;
00262 memused -= num_bytes;
00263 if(!cudarodata){removefromlist();}
00264
00265 }
00266
00267 void EMData::ro_free() const
00268 {
00269 cudaError_t error = cudaFreeArray(cudarodata);
00270 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00271 cudarodata = 0;
00272 memused -= num_bytes;
00273 if(!cudarwdata){removefromlist();}
00274
00275 }
00276
00277 bool EMData::isrodataongpu() const
00278 {
00279 if(cudarodata != 0 && !roneedsupdate){
00280 if(cudarodata !=0) elementaccessed();
00281 return true;
00282 }
00283 if(cudarwdata != 0){
00284 if(copy_rw_to_ro()){;
00285 return true;
00286 } else {
00287 return false;
00288 }
00289 }else{
00290 return false;
00291 }
00292
00293 }
00294
00295 bool EMData::freeup_devicemem(const int& num_bytes) const
00296 {
00297 size_t freemem=0, totalmem=0;
00298 cudaMemGetInfo(&freemem, &totalmem);
00299
00300 if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00301 return true;
00302 }else{
00303
00304 while(lastinlist != 0){
00305 if(lastinlist->cudarwdata){
00306
00307 const_cast<EMData*>(lastinlist)->copy_from_device();
00308 }
00309 if(lastinlist->cudarodata){
00310 const_cast<EMData*>(lastinlist)->copy_from_device(1);
00311 }
00312 cudaMemGetInfo(&freemem, &totalmem);
00313 if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}
00314 }
00315 }
00316
00317 return false;
00318 }
00319
00320 void EMData::setdirtybit() const
00321 {
00322 cudadirtybit = 1;
00323 }
00324
00325 void EMData::addtolist() const
00326 {
00327
00328 if(firstinlist == 0){
00329 firstinlist = this;
00330 lastinlist = this;
00331 nextlistitem = 0;
00332 prevlistitem = 0;
00333 }else{
00334
00335 firstinlist->nextlistitem = this;
00336 prevlistitem = firstinlist;
00337 nextlistitem = 0;
00338 firstinlist = this;
00339 }
00340
00341 }
00342
00343 void EMData::elementaccessed() const
00344 {
00345
00346 if(firstinlist == this){return;}
00347 removefromlist();
00348 addtolist();
00349 }
00350
00351 void EMData::removefromlist() const
00352 {
00353
00354
00355 if(firstinlist == lastinlist){
00356 firstinlist = 0;
00357 lastinlist = 0;
00358 nextlistitem = 0;
00359 prevlistitem = 0;
00360 return;
00361 }
00362 if(nextlistitem !=0){
00363 nextlistitem->prevlistitem = prevlistitem;
00364 }else{
00365 firstinlist = prevlistitem;
00366 }
00367 if(prevlistitem !=0){
00368 prevlistitem->nextlistitem = nextlistitem;
00369 }else{
00370 lastinlist = nextlistitem;
00371 }
00372 nextlistitem = 0;
00373 prevlistitem = 0;
00374
00375 }
00376
00377 void EMData::switchoncuda()
00378 {
00379 EMData::usecuda = 1;
00380 }
00381
00382 void EMData::switchoffcuda()
00383 {
00384 EMData::usecuda = 0;
00385 }
00386
00387 void EMData::cuda_cleanup()
00388 {
00389 do_cuda_fft_cache_destroy();
00390
00391 while(lastinlist){
00392 if(lastinlist->cudarwdata) lastinlist->rw_free();
00393 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00394 }
00395
00396 cudaThreadExit();
00397
00398 if(EMData::cudadevicenum >= 0)
00399 {
00400 char filename[16];
00401 sprintf(filename,"%s%d",cudalockfile,EMData::cudadevicenum);
00402 remove(filename);
00403 }
00404
00405 }
00406
00407 bool EMData::cuda_initialize()
00408 {
00409 if(EMData::nocudainit) return 0;
00410 int device = device_init();
00411
00412 if(device != -1)
00413 {
00414 EMData::cudadevicenum = device;
00415 switchoncuda();
00416 return 1;
00417 } else {
00418 switchoffcuda();
00419 return 0;
00420 }
00421 }
00422
00423 const char* EMData::getcudalock()
00424 {
00425 return cudalockfile;
00426 }
00427
00428 #endif //EMAN2_USING_CUDA