8 #include <cuda_runtime.h> 13 #define checkcublas(X) assert( ( X ) == CUBLAS_STATUS_SUCCESS ) 15 FIR::FIR(cublasHandle_t handle, cudaStream_t stream, float2 *hcoeff,
int M,
int f,
int nt) :
16 _handle(handle),_stream(stream),_M(M),_f(f)
25 assert(
_dout != NULL);
30 cudaMemcpy(
_dcoeff,hcoeff,
_ntap*
sizeof(float2),cudaMemcpyHostToDevice);
41 memset(
this,0,
sizeof(*
this));
46 float2 alpha = {1.0f,0.0f};
47 float2 beta = {0.0f,0.0f};
56 for(
int i=0;i<
_f;i++) {
68 cudaMemsetAsync(&
_dout[rem],0,_nb*
sizeof(float2),
_stream);
73 cudaMemcpyAsync(hout,
_dout,
_nb*
sizeof(float2),cudaMemcpyDeviceToHost,
_stream);
81 cudaMemcpyAsync(dout,
_dout,
_nb*
sizeof(float2),cudaMemcpyDeviceToDevice,
_stream);
FIR(cublasHandle_t handle, cudaStream_t stream, float2 *hcoeff, int M, int f, int nt)
void fir_apply(const float2 *din)
void fir_to_dev(float2 *dout)
void fir_to_host(float2 *hout)
void run_fir(const float2 *din, float2 *hout)