Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

emdata_cuda.cpp

Go to the documentation of this file.
00001 /*
00002  * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
00003  * Copyright (c) 2000-2006 Baylor College of Medicine
00004  *
00005  * This software is issued under a joint BSD/GNU license. You may use the
00006  * source code in this file under either license. However, note that the
00007  * complete EMAN2 and SPARX software packages have some GPL dependencies,
00008  * so you are responsible for compliance with the licenses of these packages
00009  * if you opt to use BSD licensing. The warranty disclaimer below holds
00010  * in either instance.
00011  *
00012  * This complete copyright notice must be included in any revised version of the
00013  * source code. Additional authorship citations may be added, but existing
00014  * author citations must be preserved.
00015  *
00016  * This program is free software; you can redistribute it and/or modify
00017  * it under the terms of the GNU General Public License as published by
00018  * the Free Software Foundation; either version 2 of the License, or
00019  * (at your option) any later version.
00020  *
00021  * This program is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00024  * GNU General Public License for more details.
00025  *
00026  * You should have received a copy of the GNU General Public License
00027  * along with this program; if not, write to the Free Software
00028  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00029  *
00030  * */
00031 
00032 
00033 
00034 #ifdef EMAN2_USING_CUDA
00035 
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include <cuda/cuda_util.h>
00042 #include <cuda/cuda_emfft.h>
00043 
00044 using namespace EMAN;
00045 
00046 // Static init
00047 const EMData* EMData::firstinlist = 0;
00048 const EMData* EMData::lastinlist = 0;
00049 int EMData::memused = 0;
00050 int EMData::fudgemem = 1.024E8; //let's leave 10 MB of 'fudge' memory on the device
00051 int EMData::cudadevicenum = -1;
00052 bool EMData::usecuda = 0;
00053 
00054 bool EMData::copy_to_cuda_keepcpu() const
00055 {
00056         //cout << "copying from host to device RW" << " " << num_bytes << endl;
00057         if(rw_alloc()) {
00058                 memused += num_bytes;   
00059                 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00060                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00061         }else{return false;}
00062         
00063         return true;
00064 }
00065 
00066 bool EMData::copy_to_cuda()
00067 {
00068         //cout << "copying from host to device RW" << " " << num_bytes << endl;
00069         if(rw_alloc()) {
00070                 memused += num_bytes;
00071                 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00072                 if ( error != cudaSuccess) {
00073                         //cout << rdata << " " << cudarwdata << endl;
00074                         throw UnexpectedBehaviorException( "CudaMemcpy (host to device) failed:" + string(cudaGetErrorString(error)));
00075                 }
00076         }else{return false;}
00077         //Temporaly disabled, causes LOTS of bugs......
00078         //free_rdata(); //we have the data on either the host or device, not both (prevents concurrency issues)
00079         
00080         return true;
00081 }
00082 
00083 bool EMData::copy_to_cudaro() const
00084 {
00085         
00086         //cout << "copying from host to device RO" << " " << num_bytes << endl;
00087         if(ro_alloc()) {
00088                 memused += num_bytes;
00089                 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00090         }else{return false;}
00091         
00092         return true;
00093 }
00094 
00095 bool EMData::rw_alloc() const
00096 {
00097         //cout << "rw_alloc" << endl;
00098         if(cudarwdata){return true;} // already exists
00099         num_bytes = nxyz*sizeof(float);
00100         if(!freeup_devicemem(num_bytes)){return false;}
00101         cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00102         if ( error != cudaSuccess){return false;}
00103         if(!cudarodata){addtolist();}
00104         //cout << "rw alloc finish" << endl;
00105         return true;
00106 }
00107 
00108 bool EMData::ro_alloc() const
00109 {
00110         //cout << "ro_alloc" << endl;
00111         if(cudarodata){return true;} // already exists
00112         num_bytes = nxyz*sizeof(float);
00113         if(!freeup_devicemem(num_bytes)){return false;}
00114         cudarodata = get_cuda_array(nx, ny, nz);
00115         if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00116         if(!cudarwdata){addtolist();}
00117         //cout << "ro alloc finish " << " " <<  cudarodata << endl;
00118         return true;
00119         
00120 }
00121 
00122 void EMData::bindcudaarrayA(const bool intp_mode) const
00123 {
00124         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00125         if(nz > 1){
00126                 //cout << "3d bind" << endl;
00127                 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00128         }else{
00129                 //cout << "2d bind" << endl;
00130                 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00131         }
00132         
00133 }
00134 
00135 void EMData::unbindcudaarryA() const
00136 {
00137         
00138         if(nz > 1){
00139                 unbind_cuda_textureA(3);
00140         }else{
00141                 unbind_cuda_textureA(2);
00142         }
00143         
00144 }
00145 
00146 void EMData::bindcudaarrayB(const bool intp_mode) const
00147 {
00148         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00149         if(nz > 1){
00150                 //cout << "3d bind" << endl;
00151                 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00152         }else{
00153                 //cout << "2d bind" << endl;
00154                 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00155         }
00156         
00157 }
00158 
00159 void EMData::unbindcudaarryB() const
00160 {
00161         
00162         if(nz > 1){
00163                 unbind_cuda_textureB(3);
00164         }else{
00165                 unbind_cuda_textureB(2);
00166         }
00167         
00168 }
00169 
00170 bool EMData::copy_from_device(const bool rocpy)
00171 {
00172         //cout << "copy from device to host " << cudarwdata << " " << rocpy << endl;
00173         //maybe we should check to see if rdata is still allocated? If not we would need to do either a malloc or new (also assumes that the size of rdata has not changed)
00174         if(cudarwdata && !rocpy){
00175                 //cout << "rw copy back " << rdata << " numbytes " << num_bytes << endl;
00176                 if(rdata == 0){rdata = (float*)malloc(num_bytes);} //allocate space if needed, assumes size hasn't changed(Which is hasn't so far)
00177                 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00178                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00179                 rw_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00180                 if(cudarodata) ro_free(); // clear any RO data, for call safety
00181         } else if (cudarodata && rocpy) {
00182                 if (nz > 1){
00183                         //cout << "ro copy back 3D" << endl;
00184                         cudaExtent extent;
00185                         extent.width  = nx;
00186                         extent.height = ny;
00187                         extent.depth  = nz;
00188                         cudaMemcpy3DParms copyParams = {0};
00189                         copyParams.srcArray = cudarodata;
00190                         copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00191                         copyParams.extent   = extent;
00192                         copyParams.kind     = cudaMemcpyDeviceToHost;
00193                         cudaError_t error = cudaMemcpy3D(&copyParams);
00194                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00195                 } else{
00196                         //cout << "ro copy back 2D" << endl;
00197                         cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00198                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00199                 }       
00200                 ro_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00201         } else {
00202                 return false;
00203         }
00204         //cout << "finished copying" << endl;
00205         update(); 
00206         return true;
00207 }
00208 
00209 bool EMData::copy_rw_to_ro() const
00210 {
00211         
00212         if(cudarwdata == 0){return false;}
00213         
00214         if(cudarodata == 0){
00215                 if(!freeup_devicemem(num_bytes)){return false;}
00216                 cudarodata = get_cuda_array(nx, ny, nz);
00217                 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00218                 memused += num_bytes;
00219         }
00220         //this will copy over any prexisting data (saves a malloc)....(but sometimes not a safe call.....)
00221         copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00222         roneedsupdate = 0; //just copied, so no longer need an update
00223         elementaccessed(); //To move the image to the top of the stack, prevents deletion before useage(If the image is at the stack bottom, and then anoth image is moved on....)
00224         
00225         return true;
00226         
00227 }
00228 
00229 // The policy here is that when an EMData object is created, cudarwdata is set to 0. and no mem is allocated. It is
00230 //only when cudarwdata points to allocated data does the EMData object go on the list. cudarwdata should NEVER be set 
00231 void EMData::runcuda(float * results) const
00232 {
00233         
00234         if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00235         if(cudarwdata != 0){
00236                 //rw_free();} //delete the old data, why not jus overwrite!! (save a cudaFree)
00237         } else {
00238                 addtolist(); // now that we are using memory add to the list
00239         }
00240         cudarwdata = results;
00241         
00242 }
00243 
00244 void EMData::rw_free() const
00245 {
00246         //cout << "rw_free " << " " << cudarwdata << endl;
00247         cudaError_t error = cudaFree(cudarwdata);
00248         if ( error != cudaSuccess){
00249                 cout << rdata << " " << cudarwdata << endl;
00250                 throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00251         }
00252         cudarwdata = 0;
00253         memused -= num_bytes;
00254         if(!cudarodata){removefromlist();}
00255         
00256 }
00257 
00258 void EMData::ro_free() const
00259 {
00260         //cout << "ro_free " << " " << cudarodata << endl;
00261         cudaError_t error = cudaFreeArray(cudarodata);
00262         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00263         cudarodata = 0;
00264         memused -= num_bytes;
00265         if(!cudarwdata){removefromlist();}
00266         
00267 }
00268 
00269 bool EMData::isrodataongpu() const
00270 {
00271         if(cudarodata != 0 && !roneedsupdate){return true;}
00272         if(cudarwdata != 0){
00273                 if(copy_rw_to_ro()){;
00274                         return true;
00275                 } else {
00276                         return false;
00277                 }
00278         }else{
00279                 return false;
00280         }
00281         
00282 }
00283 bool EMData::freeup_devicemem(const int& num_bytes) const
00284 {
00285         size_t freemem=0, totalmem=0; //initialize to prevent undefined behaviour
00286         cudaMemGetInfo(&freemem, &totalmem);
00287         //cout  << "memusage" << " " << freemem << " " << totalmem << endl;
00288         if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00289                 return true;
00290         }else{
00291                 //if(num_bytes > memused){return false;} //it is not possible to free up enough memory!!        
00292                 //keep on removing stuff until enough memory is available
00293                 while(lastinlist != 0){
00294                         if(lastinlist->cudarwdata){
00295                                 cudaFree(lastinlist->cudarwdata);
00296                                 lastinlist->cudarwdata = 0;
00297                                 memused -= lastinlist->nxyz*sizeof(float);
00298                                 cudaMemGetInfo(&freemem, &totalmem); //update free memory
00299                         }
00300                         if(lastinlist->cudarodata){
00301                                 cudaFreeArray(lastinlist->cudarodata);
00302                                 lastinlist->cudarodata = 0;
00303                                 memused -= lastinlist->nxyz*sizeof(float);
00304                                 cudaMemGetInfo(&freemem, &totalmem); //update free memory
00305                         }
00306                         if(lastinlist != firstinlist){ //if there is more than one itme on the list
00307                                 lastinlist->nextlistitem->prevlistitem = 0;     // set the previtem link in the next item to zero
00308                                 lastinlist = lastinlist->nextlistitem;          // chop the last item in the list off and set to next item
00309                         }else{
00310                                 firstinlist = 0;        // we have deleted everything on the list
00311                                 lastinlist = 0;
00312                         }
00313                         if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}     //this should break the loop....
00314                 }
00315         }       
00316         
00317         return false;   //if we failed :(
00318 }
00319 
00320 void EMData::addtolist() const
00321 {
00322         if(firstinlist == 0){ //if this is the first item in the list (first object in list)
00323                 firstinlist = this;
00324                 lastinlist = this;
00325                 nextlistitem = 0;
00326                 prevlistitem = 0;
00327         }else{
00328                 //we add to top of list
00329                 firstinlist->nextlistitem = this;
00330                 prevlistitem = firstinlist;
00331                 nextlistitem = 0;
00332                 firstinlist = this;
00333         }       
00334         
00335 }
00336 
00337 void EMData::elementaccessed() const
00338 {
00339         removefromlist();
00340         //now insert at top (there is no case where we call this function, but there is nothing in the list)
00341         firstinlist->nextlistitem = this;
00342         prevlistitem = firstinlist;
00343         firstinlist = this;
00344 }
00345 
00346 void EMData::removefromlist() const
00347 {
00348         //remove from list
00349         if(firstinlist == lastinlist){ //last item in list....
00350                 firstinlist = 0;
00351                 lastinlist = 0;
00352                 return;
00353         }
00354         if(nextlistitem !=0){
00355                 nextlistitem->prevlistitem = prevlistitem;      //this object is not first in the list
00356         }else{
00357                 firstinlist = prevlistitem;
00358         }
00359         if(prevlistitem !=0){
00360                 prevlistitem->nextlistitem = nextlistitem;      //this item is not last in the list
00361         }else{
00362                 lastinlist = nextlistitem;
00363         }
00364         
00365 }
00366 
00367 void EMData::switchoncuda()
00368 {
00369         EMData::usecuda = 1;    
00370 }
00371 
00372 void EMData::switchoffcuda()
00373 {
00374         EMData::usecuda = 0;    
00375 }
00376 
00377 void EMData::cuda_cleanup() 
00378 {
00379         do_cuda_fft_cache_destroy();
00380         //Cleanup any object mess.... CUDA has OCD
00381         while(lastinlist){
00382                 if(lastinlist->cudarwdata) lastinlist->rw_free();
00383                 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00384         }
00385         //Exit CUDA threads
00386         cudaThreadExit();
00387         //Free the CUDA device lock
00388         if(EMData::cudadevicenum >= 0)
00389         {
00390                 char filename[16];
00391                 sprintf(filename,"%s%d",cudalockfile,EMData::cudadevicenum); //Only works for Linux
00392                 remove(filename);
00393         }
00394 
00395 }
00396 
00397 bool EMData::cuda_initialize()
00398 {
00399         int device = device_init();
00400 
00401         if(device != -1)
00402         {
00403                 EMData::cudadevicenum = device;
00404                 switchoncuda();
00405                 return 1;
00406         } else {
00407                 switchoffcuda();
00408                 return 0;
00409         }
00410 }
00411 
00412 const char* EMData::getcudalock()
00413 {
00414         return cudalockfile;
00415 }
00416 
00417 #endif //EMAN2_USING_CUDA

Generated on Thu Nov 17 12:43:44 2011 for EMAN2 by  doxygen 1.3.9.1