emdata_cuda.h

Go to the documentation of this file.
00001 /*
00002  * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
00003  * Copyright (c) 2000-2006 Baylor College of Medicine
00004  *
00005  * This software is issued under a joint BSD/GNU license. You may use the
00006  * source code in this file under either license. However, note that the
00007  * complete EMAN2 and SPARX software packages have some GPL dependencies,
00008  * so you are responsible for compliance with the licenses of these packages
00009  * if you opt to use BSD licensing. The warranty disclaimer below holds
00010  * in either instance.
00011  *
00012  * This complete copyright notice must be included in any revised version of the
00013  * source code. Additional authorship citations may be added, but existing
00014  * author citations must be preserved.
00015  *
00016  * This program is free software; you can redistribute it and/or modify
00017  * it under the terms of the GNU General Public License as published by
00018  * the Free Software Foundation; either version 2 of the License, or
00019  * (at your option) any later version.
00020  *
00021  * This program is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00024  * GNU General Public License for more details.
00025  *
00026  * You should have received a copy of the GNU General Public License
00027  * along with this program; if not, write to the Free Software
00028  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00029  *
00030  * */
00031 
00032 #ifndef eman__emdatacuda_h__
00033 #define eman__emdatacuda_h__ 1
00034 
00035 #ifdef EMAN2_USING_CUDA
00036 public:
00037 
00067         class CudaDataLock {
00068         public:
00072                 CudaDataLock(const EMData* const that);
00073 
00077                 ~CudaDataLock();
00078 
00079         private:
00083                 CudaDataLock& operator=(const CudaDataLock&);
00087                 CudaDataLock(const CudaDataLock& );
00091                 CudaDataLock();
00092 
00094                 int data_cuda_handle;
00095         };
00096 
00097 public:
00098 
00107         void bind_cuda_texture(const bool interp_mode =true) const;
00108 
00112         void unbind_cuda_texture() const;
00113 
00119         float* get_cuda_data() const;
00120 
00124         EMDataForCuda get_data_struct_for_cuda() const;
00125 
00133         EMData* calc_ccf_cuda(EMData* image, bool use_texturing,bool center=false ) const;
00134 
00135         EMData* calc_ccfx_cuda( EMData * const with, int y0=0, int y1=-1, bool no_sum=false);
00136 
00137         EMData * make_rotational_footprint_cuda( bool unwrap=true);
00138 
00145         inline void gpu_update() {
00146                 flags |= EMDATA_NEEDUPD | EMDATA_CPU_NEEDS_UPDATE | EMDATA_GPU_RO_NEEDS_UPDATE;
00147         }
00148 
00154         EMData* column_sum_cuda() const;
00155 
00156 
00160         void copy_gpu_rw_to_cpu();
00161 
00162         void copy_cpu_to_gpu_rw();
00163 
00164         // A long term solution?
00165         inline void set_gpu_rw_current() const {
00166                 get_cuda_data();
00167         }
00168 
00169         bool gpu_operation_preferred() const;
00170 
00171         void copy_cpu_to_gpu_ro();
00172 
00173         void copy_gpu_rw_to_gpu_ro();
00174 
00175         void copy_gpu_ro_to_gpu_rw() const;
00176 
00177         void copy_gpu_ro_to_cpu() const;
00178 
00179         void print_this() const { cout << "this " << this << " " << cuda_cache_handle << endl; }
00180 
00181 
00182 
00183 
00188         void cuda_lock() const;
00189 
00195         void cuda_unlock() const;
00196 
00202         bool gpu_rw_is_current() const;
00203 
00210         bool cpu_rw_is_current() const;
00211 
00212 
00213         void set_gpu_rw_data(float* data, const int x, const int y, const int z);
00214 private:
00219         int get_cuda_handle() const { return cuda_cache_handle; };
00220 
00221 
00227         bool gpu_ro_is_current() const;
00228 
00229         void check_cuda_array_update() const;
00230         cudaArray* get_cuda_array() const;
00231 
00234         void free_cuda_memory() const;
00235 
00237         mutable int cuda_cache_handle;
00238 
00239 
00252         class CudaCache {
00260                 friend class EMData;
00261                 friend class CudaDataLock;
00262         public:
00266                 CudaCache(const int size);
00267 
00270                 ~CudaCache();
00271         protected:
00275 
00284                 int cache_rw_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz);
00285 
00286 
00293                 int store_rw_data(const EMData* const emdata, float* cuda_rw_data);
00294 
00300                 void replace_gpu_rw(const int handle, float* cuda_rw_data);
00301 
00310                 int cache_ro_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz);
00311 
00316                 inline float* get_rw_data(const int idx) const { return rw_cache[idx]; }
00317 
00322                 inline cudaArray* get_ro_data(const int idx) const { return ro_cache[idx]; }
00323 
00328                 inline bool has_rw_data(const int idx) const {
00329                         if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00330                         return (rw_cache[idx] != 0);
00331                 }
00332 
00337                 inline bool has_ro_data(const int idx) const {
00338                         if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00339                         return (ro_cache[idx] != 0);
00340                 }
00341 
00346                 void clear_item(const int idx);
00347 
00353                 void copy_rw_to_ro(const int idx);
00354 
00360                 void copy_ro_to_rw(const int idx);
00361 
00367                 void copy_ro_to_cpu(const int idx,float* data);
00368 
00373                 inline int get_ndim(const int idx) {
00374                         if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00375                         return caller_cache[idx]->get_ndim();
00376                 }
00377 
00384                 void lock(const int idx);
00385 
00392                 void unlock(const int idx);
00393 
00397                 void debug_print() const;
00398         private:
00400                 CudaCache(const CudaCache&);
00402                 CudaCache& operator=(const CudaCache&);
00404                 CudaCache();
00405 
00413                 float* alloc_rw_data(const int nx, const int ny, const int nz);
00414 
00415 
00420                 inline size_t get_emdata_bytes(const int idx) {
00421                         if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00422 
00423                         const EMData* e = caller_cache[idx];
00424                         return e->get_size()*sizeof(float);
00425                 }
00426 
00431                 void ensure_slot_space();
00432 
00442                 int blind_store_rw_data(const EMData* const emdata, float*  cuda_rw_data);
00443 
00445                 int cache_size;
00447                 int current_insert_idx;
00449                 size_t mem_allocated;
00451                 float** rw_cache;
00453                 const EMData** caller_cache;
00455                 cudaArray ** ro_cache;
00457                 vector<int> locked;
00458         };
00459 
00461         friend class CudaCache;
00462 
00464         friend class CudaDataLock;
00465 
00470         void cuda_cache_lost_imminently() const;
00471 
00473         static CudaCache cuda_cache;
00474 
00475 public:
00476         void cuda_cache_debug_print() const { cuda_cache.debug_print(); }
00477 
00478 #endif // EMAN2_USING_CUDA
00479 
00480 #endif //eman__emdatacuda_h__ 1
00481 

Generated on Mon Jul 19 12:40:09 2010 for EMAN2 by  doxygen 1.4.7