Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

emdata_cuda.cpp

Go to the documentation of this file.
00001 /*
00002  * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
00003  * Copyright (c) 2000-2006 Baylor College of Medicine
00004  *
00005  * This software is issued under a joint BSD/GNU license. You may use the
00006  * source code in this file under either license. However, note that the
00007  * complete EMAN2 and SPARX software packages have some GPL dependencies,
00008  * so you are responsible for compliance with the licenses of these packages
00009  * if you opt to use BSD licensing. The warranty disclaimer below holds
00010  * in either instance.
00011  *
00012  * This complete copyright notice must be included in any revised version of the
00013  * source code. Additional authorship citations may be added, but existing
00014  * author citations must be preserved.
00015  *
00016  * This program is free software; you can redistribute it and/or modify
00017  * it under the terms of the GNU General Public License as published by
00018  * the Free Software Foundation; either version 2 of the License, or
00019  * (at your option) any later version.
00020  *
00021  * This program is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00024  * GNU General Public License for more details.
00025  *
00026  * You should have received a copy of the GNU General Public License
00027  * along with this program; if not, write to the Free Software
00028  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00029  *
00030  * */
00031 
00032 /*
00033  * This code is for a CUDA memory managment scheme. EMData rdata arrays are copied to CUDA DDRAM memory via
00034  * copy_to_cuda() and to texture memory via copy_to_cudaro(). EMData is copied back using copy_from_device(), 
00035  * and DDRAM data can be freed via rw_free() and ro_free(). When data is copied to CUDA DDRAM, memory is managed
00036  * via a static doubly linked list. When copy_to_cuda() is called there is first a check to ensure that there is enough
00037  * memory available. If so , the copy is made and a call to addlist() is made, adding this EMData item to the
00038  * static doubly linked list. If there is not enough memory, then the function, freeup_devicemem(), is called and the
00039  * last item on the linked list is removed. If there is still not enough room, then the next last item is removed, etc, etc
00040  * If there is still no room after the last item is removed, then no copy is made(and everything on this list is removed). 
00041  * Items are removed from the list via: reomvefromlist(). Used in this maner the memory managment algorithm is a FILO(first in last out), 
00042  * HOWEVER, when CUDA is used in applications a call to elementaccessed() can be made, which moves the item to the top of the list. 
00043  * When this scheme is used, the memory management algorithm becomes, LRU(least recently used), which should give better results in 
00044  * almost all cases. As a side note, to actutally use texture memory, a call to bindcudaarray?() should be made, when needed
00045  * A corresponding call to unbindcudaarray?() needs to be made after texture memory is not needed. These operations do not actually
00046  * move data around, just bind it to a Texture object, which are very limited resources!!!. There are just two such texture object,
00047  * known as texA, and texB. These can be utilized in the actual CUDA code that nvcc compiles (in directory libEM/cuda).
00048  * Note that elementacessed is called every time getcudarwdata(), getcudarodata() or isroongpu() called. Hence LRU is used by default, 
00049  * and you are forced to use these getter function b/c cudarwdata and cudarodata are private. You could get arround this in EMData 
00050  * functions though..... 
00051  * Note that possible concurrency issues can arise, because when data is copied bewteen the Host and GPU, there are two copies. 
00052  * To account for this possible problem, CUDA functions can call setdirtybit() which will copy back from GPU to host whenever
00053  * get_data() is called (This function is a getter for EMData's rdata). Currently this technology is not in use because I haven't 
00054  * debuggesd it, so whenever a call to get_data()is called and there is data on the GPU a copy from GPU to CPU is made irrespctive
00055  * of whether or not the data on the CPU vs GPU is the same.
00056 */
00057 
00058 #ifdef EMAN2_USING_CUDA
00059 
00060 #include "emdata.h"
00061 #include "exception.h"
00062 #include <cuda_runtime_api.h>
00063 #include <driver_functions.h>
00064 #include <cuda.h>
00065 #include <cuda/cuda_util.h>
00066 #include <cuda/cuda_emfft.h>
00067 
00068 using namespace EMAN;
00069 
00070 // Static init
00071 const EMData* EMData::firstinlist = 0;
00072 const EMData* EMData::lastinlist = 0;
00073 int EMData::memused = 0;
00074 int EMData::fudgemem = 1.024E8; //let's leave 10 MB of 'fudge' memory on the device
00075 int EMData::cudadevicenum = -1;
00076 bool EMData::usecuda = 0;
00077 bool EMData::nocudainit = (getenv("NOCUDAINIT") == NULL) ? 0 : bool(atoi(getenv("NOCUDAINIT")));
00078 
00079 bool EMData::copy_to_cuda() const
00080 {
00081         //cout << "copying from host to device RW" << " " << num_bytes << endl;
00082         if(rw_alloc()) {
00083                 memused += num_bytes;
00084                 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00085                 if ( error != cudaSuccess) {
00086                         //cout << rdata << " " << cudarwdata << endl;
00087                         throw UnexpectedBehaviorException("CudaMemcpy (host to device) failed:" + string(cudaGetErrorString(error)));
00088                 }
00089         }else{return false;}
00090         //setdirtybit() //uncomment this line if you want to ensure that only one effective copy exists on either the host or GPU
00091         
00092         return true;
00093 }
00094 
00095 bool EMData::copy_to_cudaro() const
00096 {
00097         
00098         //cout << "copying from host to device RO" << " " << num_bytes << endl;
00099         if(ro_alloc()) {
00100                 memused += num_bytes;
00101                 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00102         }else{return false;}
00103         //setdirtybit() //uncomment this line if you want to ensure that only one effective copy exists on either the host or GPU
00104         
00105         return true;
00106 }
00107 
00108 bool EMData::rw_alloc() const
00109 {
00110         if(cudarwdata){return true;} // already exists
00111         num_bytes = nxyz*sizeof(float);
00112         if(!freeup_devicemem(num_bytes)){return false;}
00113         cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00114         if ( error != cudaSuccess){return false;}
00115         if(!cudarodata){
00116                 addtolist();
00117         }else{
00118                 elementaccessed();
00119         }
00120         return true;
00121 }
00122 
00123 bool EMData::ro_alloc() const
00124 {
00125         if(cudarodata){return true;} // already exists
00126         num_bytes = nxyz*sizeof(float);
00127         if(!freeup_devicemem(num_bytes)){return false;}
00128         cudarodata = get_cuda_array(nx, ny, nz);
00129         if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00130         if(!cudarwdata){
00131                 addtolist();
00132         }else{
00133                 elementaccessed();
00134         }
00135 
00136         return true;
00137         
00138 }
00139 
00140 void EMData::bindcudaarrayA(const bool intp_mode) const
00141 {
00142         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00143         if(nz > 1){
00144                 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00145         }else{
00146                 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00147         }
00148         
00149 }
00150 
00151 void EMData::unbindcudaarryA() const
00152 {
00153         
00154         if(nz > 1){
00155                 unbind_cuda_textureA(3);
00156         }else{
00157                 unbind_cuda_textureA(2);
00158         }
00159         
00160 }
00161 
00162 void EMData::bindcudaarrayB(const bool intp_mode) const
00163 {
00164         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00165         if(nz > 1){
00166                 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00167         }else{
00168                 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00169         }
00170         
00171 }
00172 
00173 void EMData::unbindcudaarryB() const
00174 {
00175         
00176         if(nz > 1){
00177                 unbind_cuda_textureB(3);
00178         }else{
00179                 unbind_cuda_textureB(2);
00180         }
00181         
00182 }
00183 
00184 bool EMData::copy_from_device(const bool rocpy)
00185 {
00186         if(cudarwdata && !rocpy){
00187                 if(rdata == 0){rdata = (float*)malloc(num_bytes);} //allocate space if needed, assumes size hasn't changed(Which is hasn't so far)
00188                 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00189                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00190                 rw_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00191                 if(cudarodata) ro_free(); // clear any RO data, for call safety
00192         } else if (cudarodata && rocpy) {
00193                 if(rdata == 0){rdata = (float*)malloc(num_bytes);} //allocate space if needed
00194                 if (nz > 1){
00195                         cudaExtent extent;
00196                         extent.width  = nx;
00197                         extent.height = ny;
00198                         extent.depth  = nz;
00199                         cudaMemcpy3DParms copyParams = {0};
00200                         copyParams.srcArray = cudarodata;
00201                         copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00202                         copyParams.extent   = extent;
00203                         copyParams.kind     = cudaMemcpyDeviceToHost;
00204                         cudaError_t error = cudaMemcpy3D(&copyParams);
00205                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00206                 } else{
00207                         cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00208                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00209                 }       
00210                 ro_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00211                 if(cudarwdata) rw_free(); // clear any RW data, for call safety
00212         } else {
00213                 return false;
00214         }
00215 
00216         update(); 
00217         return true;
00218 }
00219 
00220 bool EMData::copy_rw_to_ro() const
00221 {
00222         
00223         if(cudarwdata == 0){return false;}
00224         
00225         if(cudarodata == 0){
00226                 if(!freeup_devicemem(num_bytes)){return false;}
00227                 cudarodata = get_cuda_array(nx, ny, nz);
00228                 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00229                 memused += num_bytes;
00230         }
00231         //this will copy over any prexisting data (saves a malloc)....(but sometimes not a safe call.....)
00232         copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00233         roneedsupdate = 0; //just copied, so no longer need an update
00234         elementaccessed(); //To move the image to the top of the stack, prevents deletion before useage(If the image is at the stack bottom, and then anoth image is moved on....)
00235         return true;
00236         
00237 }
00238 
00239 // The policy here is that when an EMData object is created, cudarwdata is set to 0. and no mem is allocated. It is
00240 //only when cudarwdata points to allocated data does the EMData object go on the list. cudarwdata should NEVER be set 
00241 void EMData::runcuda(float * results) const
00242 {
00243         
00244         if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00245         if(cudarwdata != 0){
00246                 //rw_free();} //delete the old data, why not jus overwrite!! (save a cudaFree)
00247         } else {
00248                 addtolist(); // now that we are using memory add to the list
00249         }
00250         cudarwdata = results;
00251         
00252 }
00253 
00254 void EMData::rw_free() const
00255 {
00256         cudaError_t error = cudaFree(cudarwdata);
00257         if ( error != cudaSuccess){
00258                 cout << rdata << " " << cudarwdata << endl;
00259                 throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00260         }
00261         cudarwdata = 0;
00262         memused -= num_bytes;
00263         if(!cudarodata){removefromlist();}
00264         
00265 }
00266 
00267 void EMData::ro_free() const
00268 {
00269         cudaError_t error = cudaFreeArray(cudarodata);
00270         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00271         cudarodata = 0;
00272         memused -= num_bytes;
00273         if(!cudarwdata){removefromlist();}
00274         
00275 }
00276 
00277 bool EMData::isrodataongpu() const
00278 {
00279         if(cudarodata != 0 && !roneedsupdate){
00280                 if(cudarodata !=0) elementaccessed();
00281                 return true;
00282         }
00283         if(cudarwdata != 0){
00284                 if(copy_rw_to_ro()){;
00285                         return true;
00286                 } else {
00287                         return false;
00288                 }
00289         }else{
00290                 return false;
00291         }
00292         
00293 }
00294 
00295 bool EMData::freeup_devicemem(const int& num_bytes) const
00296 {
00297         size_t freemem=0, totalmem=0; //initialize to prevent undefined behaviour
00298         cudaMemGetInfo(&freemem, &totalmem);
00299         //cout  << "memusage" << " " << freemem << " " << totalmem << endl;
00300         if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00301                 return true;
00302         }else{  
00303                 //keep on removing stuff until enough memory is available
00304                 while(lastinlist != 0){
00305                         if(lastinlist->cudarwdata){
00306                                 //screw the constness, always copy from GPU to host rather than throwing stuff away!!!
00307                                 const_cast<EMData*>(lastinlist)->copy_from_device();
00308                         }
00309                         if(lastinlist->cudarodata){
00310                                 const_cast<EMData*>(lastinlist)->copy_from_device(1);
00311                         }
00312                         cudaMemGetInfo(&freemem, &totalmem); //update free memory
00313                         if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}     //this should break the loop....
00314                 }
00315         }       
00316         
00317         return false;   //if we failed :(
00318 }
00319 
00320 void EMData::setdirtybit() const
00321 {
00322         cudadirtybit = 1;
00323 }
00324 
00325 void EMData::addtolist() const
00326 {
00327         //Adds item to top of list
00328         if(firstinlist == 0){ //if this is the first item in the list (first object in list), then make a new list
00329                 firstinlist = this;
00330                 lastinlist = this;
00331                 nextlistitem = 0;
00332                 prevlistitem = 0;
00333         }else{
00334                 //we add to top of list
00335                 firstinlist->nextlistitem = this;
00336                 prevlistitem = firstinlist;
00337                 nextlistitem = 0;
00338                 firstinlist = this;
00339         }
00340         
00341 }
00342 
00343 void EMData::elementaccessed() const
00344 {
00345         //DO not move item to top of list if already at top of list
00346         if(firstinlist == this){return;}
00347         removefromlist();
00348         addtolist();
00349 }
00350 
00351 void EMData::removefromlist() const
00352 {
00353 
00354         //remove from list
00355         if(firstinlist == lastinlist){ //last item in list....
00356                 firstinlist = 0;
00357                 lastinlist = 0;
00358                 nextlistitem = 0;
00359                 prevlistitem = 0;
00360                 return;
00361         }
00362         if(nextlistitem !=0){
00363                 nextlistitem->prevlistitem = prevlistitem;      //this object is not first in the list
00364         }else{
00365                 firstinlist = prevlistitem;
00366         }
00367         if(prevlistitem !=0){
00368                 prevlistitem->nextlistitem = nextlistitem;      //this item is not last in the list
00369         }else{
00370                 lastinlist = nextlistitem;
00371         }
00372         nextlistitem = 0;
00373         prevlistitem = 0;
00374         
00375 }
00376 
00377 void EMData::switchoncuda()
00378 {
00379         EMData::usecuda = 1;    
00380 }
00381 
00382 void EMData::switchoffcuda()
00383 {
00384         EMData::usecuda = 0;    
00385 }
00386 
00387 void EMData::cuda_cleanup() 
00388 {
00389         do_cuda_fft_cache_destroy();
00390         //Cleanup any object mess.... CUDA has OCD
00391         while(lastinlist){
00392                 if(lastinlist->cudarwdata) lastinlist->rw_free();
00393                 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00394         }
00395         //Exit CUDA threads
00396         cudaThreadExit();
00397         //Free the CUDA device lock
00398         if(EMData::cudadevicenum >= 0)
00399         {
00400                 char filename[16];
00401                 sprintf(filename,"%s%d",cudalockfile,EMData::cudadevicenum); //Only works for Linux
00402                 remove(filename);
00403         }
00404 
00405 }
00406 
00407 bool EMData::cuda_initialize()
00408 {
00409         if(EMData::nocudainit) return 0;
00410         int device = device_init();
00411 
00412         if(device != -1)
00413         {
00414                 EMData::cudadevicenum = device;
00415                 switchoncuda();
00416                 return 1;
00417         } else {
00418                 switchoffcuda();
00419                 return 0;
00420         }
00421 }
00422 
00423 const char* EMData::getcudalock()
00424 {
00425         return cudalockfile;
00426 }
00427 
00428 #endif //EMAN2_USING_CUDA

Generated on Tue Jun 11 13:40:37 2013 for EMAN2 by  doxygen 1.3.9.1