11 #ifndef GPU_KERNELS_INCUDED_h 12 #define GPU_KERNELS_INCUDED_h 14 #include <cuda_runtime.h> 15 #include "cublas_v2.h" 17 #include <curand_kernel.h> 20 #include <thrust/device_vector.h> 21 #include <thrust/device_ptr.h> 34 #define pi_f 3.14159265358979f 35 #define Q_PHASE_alt -1.570796327f 36 #define _31_BIT_VALUE 2147483647.5 41 #define PFB_DECIM_TPB 64. //Threads per block 76 double* __restrict tone_frquencies,
78 uint single_tone_length,
80 float2* __restrict intput,
81 float2* __restrict output
86 int* __restrict tone_frequencies,
87 int* __restrict tone_phases,
90 size_t single_tone_length,
92 float2* __restrict input,
93 float2* __restrict output
100 int* __restrict tone_frequencies,
101 int* __restrict tone_phases,
103 size_t index_counter,
104 size_t single_tone_length,
106 float2* __restrict input,
107 float2* __restrict output,
108 cudaStream_t internal_stream
112 float2* __restrict__ output,
113 unsigned int output_size,
115 unsigned long int last_index,
116 cudaStream_t internal_stream,
121 float2* __restrict__ input,
122 float2* __restrict__ output,
123 unsigned int output_size,
124 unsigned long int last_index,
126 cudaStream_t internal_stream
130 float2* __restrict__ from,
131 float2* __restrict__ to,
135 cudaStream_t internal_stream
138 float2* __restrict__ input,
139 float2* __restrict__ output,
141 cudaStream_t internal_stream
145 float2* __restrict__ input,
146 float2* __restrict__ output,
148 int effective_batching,
149 cudaStream_t internal_stream
153 template <
typename T>
176 float2* __restrict__ input,
177 float2* __restrict__ output,
191 void _cudaGetErrorEnum(cublasStatus_t error);
194 float2* __restrict__ input,
195 float2* __restrict__ output,
196 float2* __restrict__ profile,
197 cuComplex* __restrict__ zero,
198 cuComplex* __restrict__ one,
201 cublasHandle_t* __restrict__ handle
208 float2* __restrict__ input,
209 float2* __restrict__ output,
217 double2* __restrict__ input,
218 double2* __restrict__ output,
219 double2* __restrict__ profile,
220 cuDoubleComplex* __restrict__ zero,
221 cuDoubleComplex* __restrict__ one,
224 cublasHandle_t* __restrict__ handle
void D_cublas_decim(double2 *__restrict__ input, double2 *__restrict__ output, double2 *__restrict__ profile, cuDoubleComplex *__restrict__ zero, cuDoubleComplex *__restrict__ one, int ppt, int n_freqs, cublasHandle_t *__restrict__ handle)
void cublas_decim(float2 *__restrict__ input, float2 *__restrict__ output, float2 *__restrict__ profile, cuComplex *__restrict__ zero, cuComplex *__restrict__ one, int ppt, int n_freqs, cublasHandle_t *__restrict__ handle)
Descriptor of the mutitone generation.
void tone_select_wrapper(float2 *__restrict__ input, float2 *__restrict__ output, filter_param *__restrict__ filter_info, int effective_batching, cudaStream_t internal_stream)
void decimate_spectra(float2 *__restrict__ input, float2 *__restrict__ output, int decim, int nfft, int input_len, int output_len, cudaStream_t stram_f)
unsigned long int num_steps
void decimate_pfb(float2 *__restrict__ input, float2 *__restrict__ output, int decim, int nfft, int output_length, cudaStream_t stram_f)
int * tones
How many samples per each tone are present in the device buffer.
float2 * window
Polyphase filter parameter wrapper and utility variables for buffer reminder.
int n_tones
Total length of the device buffer.
__global__ void direct_demodulator_fp64(double *__restrict tone_frquencies, size_t index_counter, uint single_tone_length, size_t total_length, float2 *__restrict intput, float2 *__restrict output)
Direct demodulation kernel. This kernel takes the raw input from the SDR and separate channels...
void move_buffer_wrapper(float2 *__restrict__ from, float2 *__restrict__ to, int size, int from_offset, int to_offset, cudaStream_t internal_stream)
__global__ void direct_demodulator_integer(int *__restrict tone_frequencies, int *__restrict tone_phases, int wavetablelen, size_t index_counter, size_t single_tone_length, size_t total_length, float2 *__restrict input, float2 *__restrict output)
Integer version of the direct demodulation kernel (numerically more stable). This kernel takes the ra...
int length
Pointer to an already initialized window.
T * make_hamming_window(int length, int side, bool diagnostic, bool host_ret)
float2 * tone_gen(tone_parameters *info, int sampling_rate, float scale=1., bool device=false)
void direct_demodulator_wrapper(int *__restrict tone_frequencies, int *__restrict tone_phases, int wavetablelen, size_t index_counter, size_t single_tone_length, size_t total_length, float2 *__restrict input, float2 *__restrict output, cudaStream_t internal_stream)
void print_chirp_params(std::string comment, chirp_parameter cp)
float2 * make_sinc_window(int length, float fc, bool diagnostic, bool host_ret)
int eff_n_tones
Must be an array containing the fft bin number corresponding to the tone frequency.
void chirp_demodulator_wrapper(float2 *__restrict__ input, float2 *__restrict__ output, unsigned int output_size, unsigned long int last_index, chirp_parameter *__restrict__ info, cudaStream_t internal_stream)
int average_buffer
How many points to calculate in the FFT.
float2 * make_flat_window(int length, int side, bool diagnostic)
Creates a flattop window in the GPU memory.
void polyphase_filter_wrapper(float2 *__restrict__ input, float2 *__restrict__ output, filter_param *__restrict__ filter_info, cudaStream_t internal_stream)
int batching
How many buffer are averaged (length of the window has to be average_buffer * n_tones) ...
void chirp_gen_wrapper(float2 *__restrict__ output, unsigned int output_size, chirp_parameter *__restrict__ info, unsigned long int last_index, cudaStream_t internal_stream, float scale)