00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifdef EMAN2_USING_CUDA
00035
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include <cuda/cuda_util.h>
00042 #include <cuda/cuda_emfft.h>
00043
00044 using namespace EMAN;
00045
00046
00047 const EMData* EMData::firstinlist = 0;
00048 const EMData* EMData::lastinlist = 0;
00049 int EMData::memused = 0;
00050 int EMData::fudgemem = 1.024E8;
00051 int EMData::cudadevicenum = -1;
00052 bool EMData::usecuda = 0;
00053
00054 bool EMData::copy_to_cuda_keepcpu() const
00055 {
00056
00057 if(rw_alloc()) {
00058 memused += num_bytes;
00059 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00060 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00061 }else{return false;}
00062
00063 return true;
00064 }
00065
00066 bool EMData::copy_to_cuda()
00067 {
00068
00069 if(rw_alloc()) {
00070 memused += num_bytes;
00071 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00072 if ( error != cudaSuccess) {
00073
00074 throw UnexpectedBehaviorException( "CudaMemcpy (host to device) failed:" + string(cudaGetErrorString(error)));
00075 }
00076 }else{return false;}
00077
00078
00079
00080 return true;
00081 }
00082
00083 bool EMData::copy_to_cudaro() const
00084 {
00085
00086
00087 if(ro_alloc()) {
00088 memused += num_bytes;
00089 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00090 }else{return false;}
00091
00092 return true;
00093 }
00094
00095 bool EMData::rw_alloc() const
00096 {
00097
00098 if(cudarwdata){return true;}
00099 num_bytes = nxyz*sizeof(float);
00100 if(!freeup_devicemem(num_bytes)){return false;}
00101 cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00102 if ( error != cudaSuccess){return false;}
00103 if(!cudarodata){addtolist();}
00104
00105 return true;
00106 }
00107
00108 bool EMData::ro_alloc() const
00109 {
00110
00111 if(cudarodata){return true;}
00112 num_bytes = nxyz*sizeof(float);
00113 if(!freeup_devicemem(num_bytes)){return false;}
00114 cudarodata = get_cuda_array(nx, ny, nz);
00115 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00116 if(!cudarwdata){addtolist();}
00117
00118 return true;
00119
00120 }
00121
00122 void EMData::bindcudaarrayA(const bool intp_mode) const
00123 {
00124 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00125 if(nz > 1){
00126
00127 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00128 }else{
00129
00130 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00131 }
00132
00133 }
00134
00135 void EMData::unbindcudaarryA() const
00136 {
00137
00138 if(nz > 1){
00139 unbind_cuda_textureA(3);
00140 }else{
00141 unbind_cuda_textureA(2);
00142 }
00143
00144 }
00145
00146 void EMData::bindcudaarrayB(const bool intp_mode) const
00147 {
00148 if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00149 if(nz > 1){
00150
00151 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00152 }else{
00153
00154 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00155 }
00156
00157 }
00158
00159 void EMData::unbindcudaarryB() const
00160 {
00161
00162 if(nz > 1){
00163 unbind_cuda_textureB(3);
00164 }else{
00165 unbind_cuda_textureB(2);
00166 }
00167
00168 }
00169
00170 bool EMData::copy_from_device(const bool rocpy)
00171 {
00172
00173
00174 if(cudarwdata && !rocpy){
00175
00176 if(rdata == 0){rdata = (float*)malloc(num_bytes);}
00177 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00178 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00179 rw_free();
00180 if(cudarodata) ro_free();
00181 } else if (cudarodata && rocpy) {
00182 if (nz > 1){
00183
00184 cudaExtent extent;
00185 extent.width = nx;
00186 extent.height = ny;
00187 extent.depth = nz;
00188 cudaMemcpy3DParms copyParams = {0};
00189 copyParams.srcArray = cudarodata;
00190 copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00191 copyParams.extent = extent;
00192 copyParams.kind = cudaMemcpyDeviceToHost;
00193 cudaError_t error = cudaMemcpy3D(©Params);
00194 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00195 } else{
00196
00197 cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00198 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00199 }
00200 ro_free();
00201 } else {
00202 return false;
00203 }
00204
00205 update();
00206 return true;
00207 }
00208
00209 bool EMData::copy_rw_to_ro() const
00210 {
00211
00212 if(cudarwdata == 0){return false;}
00213
00214 if(cudarodata == 0){
00215 if(!freeup_devicemem(num_bytes)){return false;}
00216 cudarodata = get_cuda_array(nx, ny, nz);
00217 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00218 memused += num_bytes;
00219 }
00220
00221 copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00222 roneedsupdate = 0;
00223 elementaccessed();
00224
00225 return true;
00226
00227 }
00228
00229
00230
00231 void EMData::runcuda(float * results) const
00232 {
00233
00234 if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00235 if(cudarwdata != 0){
00236
00237 } else {
00238 addtolist();
00239 }
00240 cudarwdata = results;
00241
00242 }
00243
00244 void EMData::rw_free() const
00245 {
00246
00247 cudaError_t error = cudaFree(cudarwdata);
00248 if ( error != cudaSuccess){
00249 cout << rdata << " " << cudarwdata << endl;
00250 throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00251 }
00252 cudarwdata = 0;
00253 memused -= num_bytes;
00254 if(!cudarodata){removefromlist();}
00255
00256 }
00257
00258 void EMData::ro_free() const
00259 {
00260
00261 cudaError_t error = cudaFreeArray(cudarodata);
00262 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00263 cudarodata = 0;
00264 memused -= num_bytes;
00265 if(!cudarwdata){removefromlist();}
00266
00267 }
00268
00269 bool EMData::isrodataongpu() const
00270 {
00271 if(cudarodata != 0 && !roneedsupdate){return true;}
00272 if(cudarwdata != 0){
00273 if(copy_rw_to_ro()){;
00274 return true;
00275 } else {
00276 return false;
00277 }
00278 }else{
00279 return false;
00280 }
00281
00282 }
00283 bool EMData::freeup_devicemem(const int& num_bytes) const
00284 {
00285 size_t freemem=0, totalmem=0;
00286 cudaMemGetInfo(&freemem, &totalmem);
00287
00288 if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00289 return true;
00290 }else{
00291
00292
00293 while(lastinlist != 0){
00294 if(lastinlist->cudarwdata){
00295 cudaFree(lastinlist->cudarwdata);
00296 lastinlist->cudarwdata = 0;
00297 memused -= lastinlist->nxyz*sizeof(float);
00298 cudaMemGetInfo(&freemem, &totalmem);
00299 }
00300 if(lastinlist->cudarodata){
00301 cudaFreeArray(lastinlist->cudarodata);
00302 lastinlist->cudarodata = 0;
00303 memused -= lastinlist->nxyz*sizeof(float);
00304 cudaMemGetInfo(&freemem, &totalmem);
00305 }
00306 if(lastinlist != firstinlist){
00307 lastinlist->nextlistitem->prevlistitem = 0;
00308 lastinlist = lastinlist->nextlistitem;
00309 }else{
00310 firstinlist = 0;
00311 lastinlist = 0;
00312 }
00313 if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}
00314 }
00315 }
00316
00317 return false;
00318 }
00319
00320 void EMData::addtolist() const
00321 {
00322 if(firstinlist == 0){
00323 firstinlist = this;
00324 lastinlist = this;
00325 nextlistitem = 0;
00326 prevlistitem = 0;
00327 }else{
00328
00329 firstinlist->nextlistitem = this;
00330 prevlistitem = firstinlist;
00331 nextlistitem = 0;
00332 firstinlist = this;
00333 }
00334
00335 }
00336
00337 void EMData::elementaccessed() const
00338 {
00339 removefromlist();
00340
00341 firstinlist->nextlistitem = this;
00342 prevlistitem = firstinlist;
00343 firstinlist = this;
00344 }
00345
00346 void EMData::removefromlist() const
00347 {
00348
00349 if(firstinlist == lastinlist){
00350 firstinlist = 0;
00351 lastinlist = 0;
00352 return;
00353 }
00354 if(nextlistitem !=0){
00355 nextlistitem->prevlistitem = prevlistitem;
00356 }else{
00357 firstinlist = prevlistitem;
00358 }
00359 if(prevlistitem !=0){
00360 prevlistitem->nextlistitem = nextlistitem;
00361 }else{
00362 lastinlist = nextlistitem;
00363 }
00364
00365 }
00366
00367 void EMData::switchoncuda()
00368 {
00369 EMData::usecuda = 1;
00370 }
00371
00372 void EMData::switchoffcuda()
00373 {
00374 EMData::usecuda = 0;
00375 }
00376
00377 void EMData::cuda_cleanup()
00378 {
00379 do_cuda_fft_cache_destroy();
00380
00381 while(lastinlist){
00382 if(lastinlist->cudarwdata) lastinlist->rw_free();
00383 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00384 }
00385
00386 cudaThreadExit();
00387
00388 if(EMData::cudadevicenum >= 0)
00389 {
00390 char filename[16];
00391 sprintf(filename,"%s%d",cudalockfile,EMData::cudadevicenum);
00392 remove(filename);
00393 }
00394
00395 }
00396
00397 bool EMData::cuda_initialize()
00398 {
00399 int device = device_init();
00400
00401 if(device != -1)
00402 {
00403 EMData::cudadevicenum = device;
00404 switchoncuda();
00405 return 1;
00406 } else {
00407 switchoffcuda();
00408 return 0;
00409 }
00410 }
00411
00412 const char* EMData::getcudalock()
00413 {
00414 return cudalockfile;
00415 }
00416
00417 #endif //EMAN2_USING_CUDA