Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

emdata_cuda.cpp

Go to the documentation of this file.
00001 /*
00002  * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
00003  * Copyright (c) 2000-2006 Baylor College of Medicine
00004  *
00005  * This software is issued under a joint BSD/GNU license. You may use the
00006  * source code in this file under either license. However, note that the
00007  * complete EMAN2 and SPARX software packages have some GPL dependencies,
00008  * so you are responsible for compliance with the licenses of these packages
00009  * if you opt to use BSD licensing. The warranty disclaimer below holds
00010  * in either instance.
00011  *
00012  * This complete copyright notice must be included in any revised version of the
00013  * source code. Additional authorship citations may be added, but existing
00014  * author citations must be preserved.
00015  *
00016  * This program is free software; you can redistribute it and/or modify
00017  * it under the terms of the GNU General Public License as published by
00018  * the Free Software Foundation; either version 2 of the License, or
00019  * (at your option) any later version.
00020  *
00021  * This program is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00024  * GNU General Public License for more details.
00025  *
00026  * You should have received a copy of the GNU General Public License
00027  * along with this program; if not, write to the Free Software
00028  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00029  *
00030  * */
00031 
00032 
00033 
00034 #ifdef EMAN2_USING_CUDA
00035 
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include <cuda/cuda_util.h>
00042 #include <cuda/cuda_emfft.h>
00043 
00044 using namespace EMAN;
00045 
00046 // Static init
00047 const EMData* EMData::firstinlist = 0;
00048 const EMData* EMData::lastinlist = 0;
00049 int EMData::memused = 0;
00050 int EMData::fudgemem = 1.024E8; //let's leave 10 MB of 'fudge' memory on the device
00051 int EMData::mempoolused = -1;
00052 int EMData::mempoolarraysize = 0;
00053 bool EMData::usemempoolswitch = false;
00054 bool EMData::usecuda = (getenv("EMANUSECUDA") == NULL) ? 0 : bool(atoi(getenv("EMANUSECUDA")));
00055 float* EMData::mempool[] = {0};
00056 
00057 bool EMData::copy_to_cuda_keepcpu() const
00058 {
00059         //cout << "copying from host to device RW" << " " << num_bytes << endl;
00060         if(rw_alloc()) {
00061                 memused += num_bytes;   
00062                 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00063                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00064         }else{return false;}
00065         
00066         return true;
00067 }
00068 
00069 bool EMData::copy_to_cuda()
00070 {
00071         //cout << "copying from host to device RW" << " " << num_bytes << endl;
00072         if(rw_alloc()) {
00073                 memused += num_bytes;
00074                 cudaError_t error = cudaMemcpy(cudarwdata,rdata,num_bytes,cudaMemcpyHostToDevice);
00075                 if ( error != cudaSuccess) {
00076                         //cout << rdata << " " << cudarwdata << endl;
00077                         throw UnexpectedBehaviorException( "CudaMemcpy (host to device) failed:" + string(cudaGetErrorString(error)));
00078                 }
00079         }else{return false;}
00080         //Temporaly disabled, causes LOTS of bugs......
00081         //free_rdata(); //we have the data on either the host or device, not both (prevents concurrency issues)
00082         
00083         return true;
00084 }
00085 
00086 bool EMData::copy_to_cudaro() const
00087 {
00088         
00089         //cout << "copying from host to device RO" << " " << num_bytes << endl;
00090         if(ro_alloc()) {
00091                 memused += num_bytes;
00092                 copy_to_array(rdata, cudarodata, nx, ny, nz, cudaMemcpyHostToDevice);
00093         }else{return false;}
00094         
00095         return true;
00096 }
00097 
00098 bool EMData::rw_alloc() const
00099 {
00100         //cout << "rw_alloc" << endl;
00101         num_bytes = nxyz*sizeof(float);
00102         if(cudarwdata){return true;} // already exists
00103         //use the mempool if available
00104         if(usemempoolswitch && mempoolused > -1 && mempoolarraysize >= int(num_bytes))
00105         {
00106                 cudarwdata = mempool[mempoolused];
00107                 mempool[mempoolused] = 0;
00108                 mempoolused--;
00109                 return true;
00110         }
00111         if(!freeup_devicemem(num_bytes)){return false;}
00112         cudaError_t error = cudaMalloc((void**)&cudarwdata,num_bytes);
00113         if ( error != cudaSuccess){return false;}
00114         if(!cudarodata){addtolist();}
00115         //cout << "rw alloc finish" << endl;
00116         return true;
00117 }
00118 
00119 bool EMData::ro_alloc() const
00120 {
00121         //cout << "ro_alloc" << endl;
00122         num_bytes = nxyz*sizeof(float);
00123         if(cudarodata){return true;} // already exists
00124         if(!freeup_devicemem(num_bytes)){return false;}
00125         cudarodata = get_cuda_array(nx, ny, nz);
00126         if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00127         if(!cudarwdata){addtolist();}
00128         //cout << "ro alloc finish " << " " <<  cudarodata << endl;
00129         return true;
00130         
00131 }
00132 
00133 void EMData::bindcudaarrayA(const bool intp_mode) const
00134 {
00135         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00136         if(nz > 1){
00137                 //cout << "3d bind" << endl;
00138                 bind_cuda_array_to_textureA(cudarodata, 3, intp_mode);
00139         }else{
00140                 //cout << "2d bind" << endl;
00141                 bind_cuda_array_to_textureA(cudarodata, 2, intp_mode);
00142         }
00143         
00144 }
00145 
00146 void EMData::unbindcudaarryA() const
00147 {
00148         
00149         if(nz > 1){
00150                 unbind_cuda_textureA(3);
00151         }else{
00152                 unbind_cuda_textureA(2);
00153         }
00154         
00155 }
00156 
00157 void EMData::bindcudaarrayB(const bool intp_mode) const
00158 {
00159         if(cudarodata == 0){throw UnexpectedBehaviorException( "Cuda Array not allocated!!");}
00160         if(nz > 1){
00161                 //cout << "3d bind" << endl;
00162                 bind_cuda_array_to_textureB(cudarodata, 3, intp_mode);
00163         }else{
00164                 //cout << "2d bind" << endl;
00165                 bind_cuda_array_to_textureB(cudarodata, 2, intp_mode);
00166         }
00167         
00168 }
00169 
00170 void EMData::unbindcudaarryB() const
00171 {
00172         
00173         if(nz > 1){
00174                 unbind_cuda_textureB(3);
00175         }else{
00176                 unbind_cuda_textureB(2);
00177         }
00178         
00179 }
00180 
00181 bool EMData::copy_from_device(const bool rocpy)
00182 {
00183         //cout << "copy from device to host " << cudarwdata << " " << rocpy << endl;
00184         //maybe we should check to see if rdata is still allocated? If not we would need to do either a malloc or new (also assumes that the size of rdata has not changed)
00185         if(cudarwdata && !rocpy){
00186                 //cout << "rw copy back " << rdata << " numbytes " << num_bytes << endl;
00187                 if(rdata == 0){rdata = (float*)malloc(num_bytes);} //allocate space if needed, assumes size hasn't changed(Which is hasn't so far)
00188                 cudaError_t error = cudaMemcpy(rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
00189                 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00190                 rw_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00191                 if(cudarodata) ro_free(); // clear any RO data, for call safety
00192         } else if (cudarodata && rocpy) {
00193                 if (nz > 1){
00194                         //cout << "ro copy back 3D" << endl;
00195                         cudaExtent extent;
00196                         extent.width  = nx;
00197                         extent.height = ny;
00198                         extent.depth  = nz;
00199                         cudaMemcpy3DParms copyParams = {0};
00200                         copyParams.srcArray = cudarodata;
00201                         copyParams.dstPtr = make_cudaPitchedPtr((void*)rdata, extent.width*sizeof(float), extent.width, extent.height);
00202                         copyParams.extent   = extent;
00203                         copyParams.kind     = cudaMemcpyDeviceToHost;
00204                         cudaError_t error = cudaMemcpy3D(&copyParams);
00205                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00206                 } else{
00207                         //cout << "ro copy back 2D" << endl;
00208                         cudaError_t error = cudaMemcpyFromArray(rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
00209                         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "RO CudaMemcpy (device to host) failed:" + string(cudaGetErrorString(error)));
00210                 }       
00211                 ro_free(); //we have the data on either the host or device, not both (prevents concurrency issues)
00212         } else {
00213                  return false;
00214         }
00215         //cout << "finished copying" << endl;
00216         update(); 
00217         return true;
00218 }
00219 
00220 bool EMData::copy_rw_to_ro() const
00221 {
00222         
00223         if(cudarwdata == 0){return false;}
00224         
00225         if(cudarodata == 0){
00226                 if(!freeup_devicemem(num_bytes)){return false;}
00227                 cudarodata = get_cuda_array(nx, ny, nz);
00228                 if(cudarodata == 0) throw UnexpectedBehaviorException("Bad Array alloc");
00229                 memused += num_bytes;
00230         }
00231         //this will copy over any prexisting data (saves a malloc)....(but sometimes not a safe call.....)
00232         copy_to_array(cudarwdata, cudarodata, nx, ny, nz, cudaMemcpyDeviceToDevice);
00233         roneedsupdate = 0; //just copied, so no longer need an update
00234         elementaccessed(); //To move the image to the top of the stack, prevents deletion before useage(If the image is at the stack bottom, and then anoth image is moved on....)
00235         
00236         return true;
00237         
00238 }
00239 
00240 // The policy here is that when an EMData object is created, cudarwdata is set to 0. and no mem is allocated. It is
00241 //only when cudarwdata points to allocated data does the EMData object go on the list. cudarwdata should NEVER be set 
00242 void EMData::runcuda(float * results) const
00243 {
00244         
00245         if(results == 0){throw UnexpectedBehaviorException( "Cuda failed!!!");}
00246         if(cudarwdata != 0){
00247                 //rw_free();} //delete the old data, why not jus overwrite!! (save a cudaFree)
00248         } else {
00249                 addtolist(); // now that we are using memory add to the list
00250         }
00251         cudarwdata = results;
00252         
00253 }
00254 
00255 void EMData::rw_free() const
00256 {
00257         //cout << "rw_free " << " " << cudarwdata << endl;
00258         if(usemempoolswitch && mempoolused < (MEMPOOL_SIZE-1) && mempoolarraysize >= int(num_bytes))
00259         {
00260                 mempoolused++;
00261                 mempool[mempoolused] = cudarwdata;
00262                 return;
00263         }
00264         cudaError_t error = cudaFree(cudarwdata);
00265         if ( error != cudaSuccess){
00266                 cout << rdata << " " << cudarwdata << endl;
00267                 throw UnexpectedBehaviorException( "CudaFree failed:" + string(cudaGetErrorString(error)));
00268         }
00269         cudarwdata = 0;
00270         memused -= num_bytes;
00271         if(!cudarodata){removefromlist();}
00272         
00273 }
00274 
00275 void EMData::ro_free() const
00276 {
00277         //cout << "ro_free " << " " << cudarodata << endl;
00278         cudaError_t error = cudaFreeArray(cudarodata);
00279         if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray failed:" + string(cudaGetErrorString(error)));
00280         cudarodata = 0;
00281         memused -= num_bytes;
00282         if(!cudarwdata){removefromlist();}
00283         
00284 }
00285 
00286 bool EMData::isrodataongpu() const
00287 {
00288         if(cudarodata != 0 && !roneedsupdate){return true;}
00289         if(cudarwdata != 0){
00290                 if(copy_rw_to_ro()){;
00291                         return true;
00292                 } else {
00293                         return false;
00294                 }
00295         }else{
00296                 return false;
00297         }
00298         
00299 }
00300 bool EMData::freeup_devicemem(const int& num_bytes) const
00301 {
00302         size_t freemem=0, totalmem=0; //initialize to prevent undefined behaviour
00303         cudaMemGetInfo(&freemem, &totalmem);
00304         //cout  << "memusage" << " " << freemem << " " << totalmem << endl;
00305         if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
00306                 return true;
00307         }else{
00308                 //if(num_bytes > memused){return false;} //it is not possible to free up enough memory!!        
00309                 //keep on removing stuff until enough memory is available
00310                 while(lastinlist != 0){
00311                         if(lastinlist->cudarwdata){
00312                                 cudaFree(lastinlist->cudarwdata);
00313                                 lastinlist->cudarwdata = 0;
00314                                 memused -= lastinlist->nxyz*sizeof(float);
00315                                 cudaMemGetInfo(&freemem, &totalmem); //update free memory
00316                         }
00317                         if(lastinlist->cudarodata){
00318                                 cudaFreeArray(lastinlist->cudarodata);
00319                                 lastinlist->cudarodata = 0;
00320                                 memused -= lastinlist->nxyz*sizeof(float);
00321                                 cudaMemGetInfo(&freemem, &totalmem); //update free memory
00322                         }
00323                         if(lastinlist != firstinlist){ //if there is more than one itme on the list
00324                                 lastinlist->nextlistitem->prevlistitem = 0;     // set the previtem link in the next item to zero
00325                                 lastinlist = lastinlist->nextlistitem;          // chop the last item in the list off and set to next item
00326                         }else{
00327                                 firstinlist = 0;        // we have deleted everything on the list
00328                                 lastinlist = 0;
00329                         }
00330                         if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){return true;}     //this should break the loop....
00331                 }
00332         }       
00333         
00334         return false;   //if we failed :(
00335 }
00336 
00337 void EMData::addtolist() const
00338 {
00339         if(firstinlist == 0){ //if this is the first item in the list (first object in list)
00340                 firstinlist = this;
00341                 lastinlist = this;
00342                 nextlistitem = 0;
00343                 prevlistitem = 0;
00344         }else{
00345                 //we add to top of list
00346                 firstinlist->nextlistitem = this;
00347                 prevlistitem = firstinlist;
00348                 nextlistitem = 0;
00349                 firstinlist = this;
00350         }       
00351         
00352 }
00353 
00354 void EMData::elementaccessed() const
00355 {
00356         removefromlist();
00357         //now insert at top (there is no case where we call this function, but there is nothing in the list)
00358         firstinlist->nextlistitem = this;
00359         prevlistitem = firstinlist;
00360         firstinlist = this;
00361 }
00362 
00363 void EMData::removefromlist() const
00364 {
00365         //remove from list
00366         if(firstinlist == lastinlist){ //last item in list....
00367                 firstinlist = 0;
00368                 lastinlist = 0;
00369                 return;
00370         }
00371         if(nextlistitem !=0){
00372                 nextlistitem->prevlistitem = prevlistitem;      //this object is not first in the list
00373         }else{
00374                 firstinlist = prevlistitem;
00375         }
00376         if(prevlistitem !=0){
00377                 prevlistitem->nextlistitem = nextlistitem;      //this item is not last in the list
00378         }else{
00379                 lastinlist = nextlistitem;
00380         }
00381         
00382 }
00383 
00384 void EMData::usemempool(int size)
00385 {
00386         
00387         usemempoolswitch = true;
00388         mempoolarraysize = size; // this allow for complex arrays to be stores, but this does waste a little space
00389 
00390 }
00391 
00392 void EMData::freemempool()
00393 {
00394         for(int i = 0; i < MEMPOOL_SIZE; i ++)
00395         {
00396                 if(mempool[i] == 0)
00397                 {
00398                         break;
00399                 }else{
00400                         cudaFree(mempool[i]);
00401                 }
00402         }
00403 }
00404 
00405 void EMData::switchoncuda()
00406 {
00407         EMData::usecuda = 1;    
00408 }
00409 
00410 void EMData::switchoffcuda()
00411 {
00412         EMData::usecuda = 0;    
00413 }
00414 
00415 void EMData::cuda_cleanup() 
00416 {
00417         do_cuda_fft_cache_destroy();
00418         //Cleanup any object mess.... CUDA has OCD
00419         while(lastinlist){
00420                 if(lastinlist->cudarwdata) lastinlist->rw_free();
00421                 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
00422         }
00423         
00424         cudaThreadExit();
00425 
00426 }
00427 
00428 bool EMData::cuda_initialize()
00429 {
00430         if(device_init())
00431         {
00432                 return 1;
00433         } else {
00434                 switchoffcuda();
00435                 return 0;
00436         }
00437 }
00438 #endif //EMAN2_USING_CUDA

Generated on Tue Jul 12 13:48:57 2011 for EMAN2 by  doxygen 1.3.9.1