Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

emdata_cuda.cpp

Go to the documentation of this file.
00001 /*
00002  * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
00003  * Copyright (c) 2000-2006 Baylor College of Medicine
00004  *
00005  * This software is issued under a joint BSD/GNU license. You may use the
00006  * source code in this file under either license. However, note that the
00007  * complete EMAN2 and SPARX software packages have some GPL dependencies,
00008  * so you are responsible for compliance with the licenses of these packages
00009  * if you opt to use BSD licensing. The warranty disclaimer below holds
00010  * in either instance.
00011  *
00012  * This complete copyright notice must be included in any revised version of the
00013  * source code. Additional authorship citations may be added, but existing
00014  * author citations must be preserved.
00015  *
00016  * This program is free software; you can redistribute it and/or modify
00017  * it under the terms of the GNU General Public License as published by
00018  * the Free Software Foundation; either version 2 of the License, or
00019  * (at your option) any later version.
00020  *
00021  * This program is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00024  * GNU General Public License for more details.
00025  *
00026  * You should have received a copy of the GNU General Public License
00027  * along with this program; if not, write to the Free Software
00028  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00029  *
00030  * */
00031 
00032 
00033 
00034 #ifdef EMAN2_USING_CUDA
00035 
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include <cuda/cuda_util.h>
00042 #include <cuda/cuda_emfft.h>
00043 
00044 using namespace EMAN;
00045 
00046 // Static init
00047 const EMData* EMData::firstinlist = 0;
00048 const EMData* EMData::lastinlist = 0;
00049 int EMData::memused = 0;
00050 int EMData::fudgemem = 1.024E7; //let's leave 10 MB of 'fudge' memory on the device
00051 int EMData::mempoolused = -1;
00052 int EMData::mempoolarraysize = 0;
00053 bool EMData::usemempoolswitch = false;
00054 bool EMData::usecuda = (getenv("EMANUSECUDA") == NULL || atoi(getenv("EMANUSECUDA")) ) ? 1 : 0;
00055 float* EMData::mempool[] = {0};
00056 
00057 bool EMData::copy_to_cuda() const
00058 {
00059         //cout << "copying from host to device RW" << " " << num_bytes << endl;
00060         if(rw_alloc()) {
00061                 memused += num_bytes;   
00062                 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00063                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00064         }else{return false;}
00065         
00066         return true;
00067 }
00068 
00069 bool EMData::copy_to_cuda()
00070 {
00071         //cout << "copying from host to device RW" << " " << num_bytes << endl;
00072         if(rw_alloc()) {
00073                 memused += num_bytes;
00074                 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00075                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00076         }else{return false;}
00077         free_rdata(); //we have the data on either the host or device, not both (prevents concurrency issues)
00078         
00079         return true;
00080 }
00081 
00082 bool EMData::copy_to_cudaro() const
00083 {
00084         
00085         //cout << "copying from host to device RO" << " " << num_bytes << endl;
00086         if(ro_alloc()) {
00087                 memused += num_bytes;
00088                 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00089         }else{return false;}
00090         
00091         return true;
00092 }
00093 
00094 bool EMData::rw_alloc() const
00095 {
00096         //cout << "rw_alloc" << endl;
00097         num_bytes = nxyz*sizeof(float);
00098         if(cudarwdata){return true;} // already exists
00099         //use the mempool if available
00100         if(usemempoolswitch && mempoolused > -1 && mempoolarraysize >= int(num_bytes))
00101         {
00102                 cudarwdata = mempool[mempoolused];
00103                 mempool[mempoolused] = 0;
00104                 mempoolused--;
00105                 return true;
00106         }       
00107         if(!freeup_devicemem(num_bytes)){return false;}
00108         cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00109         if ( error != cudaSuccess){return false;}
00110         if(!cudarodata){addtolist();}
00111         //cout << "rw alloc finish" << endl;
00112         return true;
00113 }
00114 
00115 bool EMData::ro_alloc() const
00116 {
00117         //cout << "ro_alloc" << endl;
00118         num_bytes = nxyz*sizeof(float);
00119         if(cudarodata){return true;} // already exists
00120         if(!freeup_devicemem(num_bytes)){return false;}
00121         cudarodata = get_cuda_array(nx, ny, nz);
00122         if(!cudarwdata){addtolist();}
00123         //cout << "ro alloc finish " << " " <<  cudarodata << endl;
00124         return true;
00125         
00126 }
00127 
00128 void EMData::bindcudaarrayA(const bool intp_mode) const
00129 {
00130         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00131         if(nz > 1){
00132                 //cout << "3d bind" << endl;
00133                 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00134         }else{
00135                 //cout << "2d bind" << endl;
00136                 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00137         }
00138         
00139 }
00140 
00141 void EMData::unbindcudaarryA() const
00142 {
00143         
00144         if(nz > 1){
00145                 unbind_cuda_textureA(3);
00146         }else{
00147                 unbind_cuda_textureA(2);
00148         }
00149         
00150 }
00151 
00152 void EMData::bindcudaarrayB(const bool intp_mode) const
00153 {
00154         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00155         if(nz > 1){
00156                 //cout << "3d bind" << endl;
00157                 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00158         }else{
00159                 //cout << "2d bind" << endl;
00160                 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00161         }
00162         
00163 }
00164 
00165 void EMData::unbindcudaarryB() const
00166 {
00167         
00168         if(nz > 1){
00169                 unbind_cuda_textureB(3);
00170         }else{
00171                 unbind_cuda_textureB(2);
00172         }
00173         
00174 }
00175 
00176 bool EMData::copy_from_device(const bool rocpy)
00177 {
00178         //cout << "copy from device to host " << cudarwdata << " " << rocpy << endl;
00179         //maybe we should check to see if rdata is still allocated? If not we would need to do either a malloc or new (also assumes that the size of rdata has not changed)
00180         if(cudarwdata && !rocpy){
00181                 //cout << "rw copy back " << rdata << " numbytes " << num_bytes << endl;
00182                 if(rdata == 0){rdata = (float*)malloc(num_bytes);} //allocate space if needed
00183                 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00184                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00185                 rw_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00186         } else if (cudarodata && rocpy) {
00187                 if (nz > 1){
00188                         //cout << "ro copy back 3D" << endl;
00189                         cudaExtent extent;
00190                         extent.width  = nx;
00191                         extent.height = ny;
00192                         extent.depth  = nz;
00193                         cudaMemcpy3DParms copyParams = {0};
00194                         copyParams.srcArray = cudarodata;
00195                         copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00196                         copyParams.extent   = extent;
00197                         copyParams.kind     = cudaMemcpyDeviceToHost;
00198                         cudaError_t error = cudaMemcpy3D(&copyParams);
00199                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00200                 } else{
00201                         //cout << "ro copy back 2D" << endl;
00202                         cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00203                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00204                 }       
00205                 ro_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00206         } else {
00207                  return false;
00208         }
00209         //cout << "finished copying" << endl;
00210         update(); 
00211         return true;
00212 }
00213 
00214 bool EMData::copy_rw_to_ro() const
00215 {
00216         
00217         if(cudarwdata == 0){return false;}
00218         
00219         if(cudarodata == 0){
00220                 if(!freeup_devicemem(num_bytes)){return false;}
00221                 cudarodata = get_cuda_array(nx, ny, nz);
00222                 memused += num_bytes;
00223         }
00224         //this will copy over any prexisting data (saves a malloc)....
00225         copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00226         roneedsupdate = 0; //just copied, so no longer need an update
00227         
00228         return true;
00229         
00230 }
00231 
00232 // The policy here is that when an EMData object is created, cudarwdata is set to 0. and no mem is allocated. It is
00233 //only when cudarwdata points to allocated data does the EMData object go on the list. cudarwdata should NEVER be set 
00234 void EMData::runcuda(float * results) const
00235 {
00236         
00237         if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00238         if(cudarwdata != 0){
00239                 //rw_free();} //delete the old data, why not jus overwrite!! (save a cudaFree)
00240         } else {
00241                 addtolist(); // now that we are using memory add to the list
00242         }
00243         cudarwdata = results;
00244         
00245 }
00246 
00247 void EMData::rw_free() const
00248 {
00249         //cout << "rw_free " << " " << cudarwdata << endl;
00250         if(usemempoolswitch && mempoolused < (MEMPOOL_SIZE-1) && mempoolarraysize >= int(num_bytes))
00251         {
00252                 mempoolused++;
00253                 mempool[mempoolused] = cudarwdata;
00254                 return;
00255         }
00256         cudaError_t error = cudaFree(cudarwdata);
00257         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00258         cudarwdata = 0;
00259         memused -= num_bytes;
00260         if(!cudarodata){removefromlist();}
00261         
00262 }
00263 
00264 void EMData::ro_free() const
00265 {
00266         //cout << "ro_free " << " " << cudarodata << endl;
00267         cudaError_t error = cudaFreeArray(cudarodata);
00268         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00269         cudarodata = 0;
00270         memused -= num_bytes;
00271         if(!cudarwdata){removefromlist();}
00272         
00273 }
00274 
00275 bool EMData::isrodataongpu() const
00276 {
00277         if(cudarodata != 0 && !roneedsupdate){return true;}
00278         if(cudarwdata != 0){
00279                 if(copy_rw_to_ro()){;
00280                         return true;
00281                 } else {
00282                         return false;
00283                 }
00284         }else{
00285                 return false;
00286         }
00287         
00288 }
00289 bool EMData::freeup_devicemem(const int& num_bytes) const
00290 {
00291         size_t freemem, totalmem;
00292         cudaMemGetInfo(&freemem, &totalmem);
00293         //cout  << "memusage" << " " << freemem << " " << totalmem << endl;
00294         if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00295                 return true;
00296         }else{
00297                 //if(num_bytes > memused){return false;} //it is not possible to free up enough memory!!        
00298                 //keep on removing stuff until enough memory is available
00299                 while(lastinlist != 0){
00300                         if(lastinlist->cudarwdata){
00301                                 cudaFree(lastinlist->cudarwdata);
00302                                 lastinlist->cudarwdata = 0;
00303                                 memused -= lastinlist->nxyz*sizeof(float);
00304                                 cudaMemGetInfo(&freemem, &totalmem); //update free memory
00305                         }
00306                         if(lastinlist->cudarodata){
00307                                 cudaFreeArray(lastinlist->cudarodata);
00308                                 lastinlist->cudarodata = 0;
00309                                 memused -= lastinlist->nxyz*sizeof(float);
00310                                 cudaMemGetInfo(&freemem, &totalmem); //update free memory
00311                         }
00312                         if(lastinlist != firstinlist){ //if there is more than one itme on the list
00313                                 lastinlist->nextlistitem->prevlistitem = 0;     // set the previtem link in the next item to zero
00314                                 lastinlist = lastinlist->nextlistitem;          // chop the last item in the list off and set to next item
00315                         }else{
00316                                 firstinlist = 0;        // we have deleted everything on the list
00317                                 lastinlist = 0;
00318                         }
00319                         if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}     //this should break the loop....
00320                 }
00321         }       
00322         
00323         return false;   //if we failed :(
00324 }
00325 
00326 void EMData::addtolist() const
00327 {
00328         if(firstinlist == 0){ //if this is the first item in the list (first object in list)
00329                 firstinlist = this;
00330                 lastinlist = this;
00331                 nextlistitem = 0;
00332                 prevlistitem = 0;
00333         }else{
00334                 //we add to top of list
00335                 firstinlist->nextlistitem = this;
00336                 prevlistitem = firstinlist;
00337                 nextlistitem = 0;
00338                 firstinlist = this;
00339         }       
00340         
00341 }
00342 
00343 void EMData::elementaccessed() const
00344 {
00345         removefromlist();
00346         //now insert at top (there is no case where we call this function, but there is nothing in the list)
00347         firstinlist->nextlistitem = this;
00348         prevlistitem = firstinlist;
00349         firstinlist = this;
00350 }
00351 
00352 void EMData::removefromlist() const
00353 {
00354         //remove from list
00355         if(firstinlist == lastinlist){ //last item in list....
00356                 firstinlist = 0;
00357                 lastinlist = 0;
00358                 return;
00359         }
00360         if(nextlistitem !=0){
00361                 nextlistitem->prevlistitem = prevlistitem;      //this object is not first in the list
00362         }else{
00363                 firstinlist = prevlistitem;
00364         }
00365         if(prevlistitem !=0){
00366                 prevlistitem->nextlistitem = nextlistitem;      //this item is not last in the list
00367         }else{
00368                 lastinlist = nextlistitem;
00369         }
00370         
00371 }
00372 
00373 void EMData::usemempool(int size)
00374 {
00375         
00376         usemempoolswitch = true;
00377         mempoolarraysize = size; // this allow for complex arrays to be stores, but this does waste a little space
00378 
00379 }
00380 
00381 void EMData::freemempool()
00382 {
00383         for(int i = 0; i < MEMPOOL_SIZE; i ++)
00384         {
00385                 if(mempool[i] == 0)
00386                 {
00387                         break;
00388                 }else{
00389                         cudaFree(mempool[i]);
00390                 }
00391         }
00392 }
00393 
00394 void EMData::switchoncuda()
00395 {
00396         EMData::usecuda = 1;    
00397 }
00398 
00399 void EMData::switchoffcuda()
00400 {
00401         EMData::usecuda = 0;    
00402 }
00403 
00404 void EMData::cuda_cleanup() 
00405 {
00406         do_cuda_fft_cache_destroy();
00407         //Cleanup any object mess.... CUDA has OCD
00408         while(lastinlist){
00409                 if(lastinlist->cudarwdata) lastinlist->rw_free();
00410                 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00411         }
00412         
00413         cudaThreadExit();
00414 
00415 }
00416 
00417 void EMData::cuda_initialize()
00418 {
00419         device_init();
00420 }
00421 #endif //EMAN2_USING_CUDA

Generated on Mon Mar 7 18:18:30 2011 for EMAN2 by  doxygen 1.3.9.1