00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef eman__emdatacuda_h__
00033 #define eman__emdatacuda_h__ 1
00034
00035 #ifdef EMAN2_USING_CUDA
00036 public:
00037
00067 class CudaDataLock {
00068 public:
00072 CudaDataLock(const EMData* const that);
00073
00077 ~CudaDataLock();
00078
00079 private:
00083 CudaDataLock& operator=(const CudaDataLock&);
00087 CudaDataLock(const CudaDataLock& );
00091 CudaDataLock();
00092
00094 int data_cuda_handle;
00095 };
00096
00097 public:
00098
00107 void bind_cuda_texture(const bool interp_mode =true) const;
00108
00112 void unbind_cuda_texture() const;
00113
00119 float* get_cuda_data() const;
00120
00124 EMDataForCuda get_data_struct_for_cuda() const;
00125
00133 EMData* calc_ccf_cuda(EMData* image, bool use_texturing,bool center=false ) const;
00134
00135 EMData* calc_ccfx_cuda( EMData * const with, int y0=0, int y1=-1, bool no_sum=false);
00136
00137 EMData * make_rotational_footprint_cuda( bool unwrap=true);
00138
00145 inline void gpu_update() {
00146 flags |= EMDATA_NEEDUPD | EMDATA_CPU_NEEDS_UPDATE | EMDATA_GPU_RO_NEEDS_UPDATE;
00147 }
00148
00154 EMData* column_sum_cuda() const;
00155
00156
00160 void copy_gpu_rw_to_cpu();
00161
00162 void copy_cpu_to_gpu_rw();
00163
00164
00165 inline void set_gpu_rw_current() const {
00166 get_cuda_data();
00167 }
00168
00169 bool gpu_operation_preferred() const;
00170
00171 void copy_cpu_to_gpu_ro();
00172
00173 void copy_gpu_rw_to_gpu_ro();
00174
00175 void copy_gpu_ro_to_gpu_rw() const;
00176
00177 void copy_gpu_ro_to_cpu() const;
00178
00179 void print_this() const { cout << "this " << this << " " << cuda_cache_handle << endl; }
00180
00181
00182
00183
00188 void cuda_lock() const;
00189
00195 void cuda_unlock() const;
00196
00202 bool gpu_rw_is_current() const;
00203
00210 bool cpu_rw_is_current() const;
00211
00212
00213 void set_gpu_rw_data(float* data, const int x, const int y, const int z);
00214 private:
00219 int get_cuda_handle() const { return cuda_cache_handle; };
00220
00221
00227 bool gpu_ro_is_current() const;
00228
00229 void check_cuda_array_update() const;
00230 cudaArray* get_cuda_array() const;
00231
00234 void free_cuda_memory() const;
00235
00237 mutable int cuda_cache_handle;
00238
00239
00252 class CudaCache {
00260 friend class EMData;
00261 friend class CudaDataLock;
00262 public:
00266 CudaCache(const int size);
00267
00270 ~CudaCache();
00271 protected:
00275
00284 int cache_rw_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz);
00285
00286
00293 int store_rw_data(const EMData* const emdata, float* cuda_rw_data);
00294
00300 void replace_gpu_rw(const int handle, float* cuda_rw_data);
00301
00310 int cache_ro_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz);
00311
00316 inline float* get_rw_data(const int idx) const { return rw_cache[idx]; }
00317
00322 inline cudaArray* get_ro_data(const int idx) const { return ro_cache[idx]; }
00323
00328 inline bool has_rw_data(const int idx) const {
00329 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00330 return (rw_cache[idx] != 0);
00331 }
00332
00337 inline bool has_ro_data(const int idx) const {
00338 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00339 return (ro_cache[idx] != 0);
00340 }
00341
00346 void clear_item(const int idx);
00347
00353 void copy_rw_to_ro(const int idx);
00354
00360 void copy_ro_to_rw(const int idx);
00361
00367 void copy_ro_to_cpu(const int idx,float* data);
00368
00373 inline int get_ndim(const int idx) {
00374 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00375 return caller_cache[idx]->get_ndim();
00376 }
00377
00384 void lock(const int idx);
00385
00392 void unlock(const int idx);
00393
00397 void debug_print() const;
00398 private:
00400 CudaCache(const CudaCache&);
00402 CudaCache& operator=(const CudaCache&);
00404 CudaCache();
00405
00413 float* alloc_rw_data(const int nx, const int ny, const int nz);
00414
00415
00420 inline size_t get_emdata_bytes(const int idx) {
00421 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00422
00423 const EMData* e = caller_cache[idx];
00424 return e->get_size()*sizeof(float);
00425 }
00426
00431 void ensure_slot_space();
00432
00442 int blind_store_rw_data(const EMData* const emdata, float* cuda_rw_data);
00443
00445 int cache_size;
00447 int current_insert_idx;
00449 size_t mem_allocated;
00451 float** rw_cache;
00453 const EMData** caller_cache;
00455 cudaArray ** ro_cache;
00457 vector<int> locked;
00458 };
00459
00461 friend class CudaCache;
00462
00464 friend class CudaDataLock;
00465
00470 void cuda_cache_lost_imminently() const;
00471
00473 static CudaCache cuda_cache;
00474
00475 public:
00476 void cuda_cache_debug_print() const { cuda_cache.debug_print(); }
00477
00478 #endif // EMAN2_USING_CUDA
00479
00480 #endif //eman__emdatacuda_h__ 1
00481