3 #define checkcublas(X) assert( ( X ) == CUBLAS_STATUS_SUCCESS ) 10 diagnostic = init_diagnostic;
16 bool mixed_buffer_type =
false;
17 int chirp_counter = 0;
19 w_type last_w_type = NODSP;
22 try{last_w_type =
parameters->wave_type.at(0);}
catch(
const std::out_of_range& e){
23 if(diagnostic)
print_warning(
"No signal processing options found. Transmitting full buffer.");
27 for(
size_t i = 0; i <
parameters->wave_type.size(); i++){
28 if(
parameters->wave_type[i]!=last_w_type)mixed_buffer_type =
true;
29 if(
parameters->wave_type[i]==CHIRP)chirp_counter++;
32 print_error(
"Multiple chirp RX buffer demodulation has been requested. This feature is not implemented yet.");
36 if(mixed_buffer_type){
37 print_error(
"Mixed RX buffer demodulation has been requested. This feature is not implemented yet.");
43 cudaDeviceGetStreamPriorityRange ( &low_p, &high_p );
44 cudaStreamCreateWithPriority(&internal_stream,cudaStreamNonBlocking, low_p);
50 decimator_active = (
parameters->decim > 0)?
true:
false;
60 if(diagnostic)
print_warning(
"Demodulator diagnostic enabled.");
64 cudaMalloc((
void **)&DIRECT_tone_frquencies,
parameters->wave_type.size()*
sizeof(int));
67 cudaMalloc((
void **)&DIRECT_tone_phases,
parameters->wave_type.size()*
sizeof(int));
68 cudaMemset(DIRECT_tone_phases, 0,
parameters->wave_type.size()*
sizeof(int));
70 DIRECT_tones = (
int*)malloc(
parameters->wave_type.size()*
sizeof(int));
71 for(uint k=0; k<
parameters->wave_type.size(); k++ ){
76 cudaMemcpy(DIRECT_tone_frquencies, DIRECT_tones,
parameters->freq.size() *
sizeof(int),cudaMemcpyHostToDevice);
79 cudaMalloc((
void **)&direct_input,
parameters->buffer_len*
sizeof(float2));
85 cudaMalloc((
void **)&direct_output, DIRECT_output_size*
sizeof(float2));
88 DIRECT_current_index = 0;
91 cublasCreate(&handle);
92 cublasSetStream(handle,internal_stream);
94 DIRECT_FIR_output_size = DIRECT_output_size;
104 cudaMalloc((
void **)&FIR_output, DIRECT_FIR_output_size*
sizeof(float2));
113 cudaMalloc((
void **)&transposed, DIRECT_FIR_output_size*
sizeof(float2));
116 process_ptr = &RX_buffer_demodulator::process_direct;
117 clr_ptr = &RX_buffer_demodulator::close_direct;
124 if(diagnostic)
print_warning(
"Demodulator diagnostic enabled.");
127 process_ptr = &RX_buffer_demodulator::process_pfb;
128 clr_ptr = &RX_buffer_demodulator::close_pfb;
137 upload_multitone_parameters();
140 in_out_len =
parameters->fft_tones * batching;
143 cudaMalloc((
void **)&raw_input,in_out_len*
sizeof(float2));
144 cudaMalloc((
void **)&input,in_out_len*
sizeof(float2));
146 cudaMalloc((
void **)&output,(decimator_active?3:1)*in_out_len*
sizeof(float2));
147 cudaMalloc((
void **)&reduced_output,
parameters->wave_type.size()*batching*
sizeof(float2));
153 CUFFT_C2C, batching);
156 cufftSetStream(plan, internal_stream);
162 if(decimator_active){
167 cudaMalloc((
void **)&decim_output,
sizeof(float2)*(2.*
parameters->buffer_len)/(
parameters->decim));
172 print_warning(
"When using TONES demodulation type, the decimation should be achieved increasing the number of pfb channels");
180 if(diagnostic)
print_warning(
"Demodulator diagnostic enabled.");
183 process_ptr = &RX_buffer_demodulator::process_chirp;
184 clr_ptr = &RX_buffer_demodulator::close_chirp;
188 cudaDeviceGetStreamPriorityRange ( &low_p, &high_p );
189 cudaStreamCreateWithPriority(&internal_stream,cudaStreamNonBlocking, high_p);
194 print_warning(
"Number of frequency steps of the chirp demodulator is not set. Setting it to maximum (chirp time * sampling rate).");
198 print_warning(
"Number of frequency steps of the chirp demodulator is less than 2. This may result in single tone demodulation.");
204 print_warning(
"Duration of each frequency in chirp signal cannot be less than one sample. Setting duration of each tone to 1.");
221 cudaMemcpy(d_parameter, &h_parameter,
sizeof(
chirp_parameter),cudaMemcpyHostToDevice);
224 cudaMalloc((
void **)&input,
sizeof(float2)*
parameters->buffer_len);
225 cudaMalloc((
void **)&output,(decimator_active?3:1)*
sizeof(float2)*
parameters->buffer_len);
228 if(decimator_active){
233 if(
parameters->decim>1)
print_warning(
"A decimation factor >1 requested in chirp demodulation. There is interpreted as ppt*decim");
238 cublasCreate(&handle);
239 cublasSetStream(handle,internal_stream);
242 zero = make_cuComplex (0.0f, 0.0f);
243 one = make_cuComplex (1.0f, 0.0f);
253 cudaMalloc((
void **)&decim_output,
sizeof(float2)*std::ceil((
float)
parameters->buffer_len/(
float)ppt)*8);
267 if(diagnostic)
print_warning(
"Demodulator diagnostic enabled.");
270 process_ptr = &RX_buffer_demodulator::process_pfb_spec;
271 clr_ptr = &RX_buffer_demodulator::close_pfb_spec;
280 upload_multitone_parameters();
283 in_out_len =
parameters->fft_tones * batching;
286 cudaMalloc((
void **)&raw_input,in_out_len*
sizeof(float2));
287 cudaMalloc((
void **)&input,(decimator_active?3:1)*in_out_len*
sizeof(float2));
288 cudaMalloc((
void **)&output,in_out_len*
sizeof(float2));
295 CUFFT_C2C, batching);
298 cufftSetStream(plan, internal_stream);
303 if(decimator_active){
307 cudaMalloc((
void **)&decim_output,
sizeof(float2)*(2*
parameters->buffer_len));
318 process_ptr = &RX_buffer_demodulator::process_nodsp;
319 clr_ptr = &RX_buffer_demodulator::close_nodsp;
323 print_error(
"Void demodulation operation has not been implemented yet!");
335 int RX_buffer_demodulator::process_nodsp(float2** __restrict__ input_buffer, float2** __restrict__ output_buffer){
337 std::memcpy(*output_buffer, *input_buffer,
parameters->buffer_len*
sizeof(float2));
342 int RX_buffer_demodulator::process_chirp(float2** __restrict__ input_buffer, float2** __restrict__ output_buffer){
347 cudaMemcpyAsync(input, *input_buffer,
parameters->buffer_len*
sizeof(float2),cudaMemcpyHostToDevice, internal_stream);
359 if(decimator_active){
363 cublas_decim(output,decim_output,profile,&zero,&one,valid_size,ppt, &handle);
367 cudaMemcpyAsync(*output_buffer, decim_output,
sizeof(float2)*valid_size, cudaMemcpyDeviceToHost, internal_stream);
369 spare_size = vna_helper->
new0;
387 cudaMemcpyAsync(*output_buffer, output,
sizeof(float2)*
parameters->buffer_len, cudaMemcpyDeviceToHost, internal_stream);
393 cudaStreamSynchronize(internal_stream);
400 int RX_buffer_demodulator::process_direct(float2** __restrict__ input_buffer, float2** __restrict__ output_buffer){
404 cudaMemcpyAsync(direct_input, *input_buffer,
parameters->buffer_len*
sizeof(float2),cudaMemcpyHostToDevice, internal_stream);
408 DIRECT_tone_frquencies,
411 DIRECT_current_index,
421 for(
size_t i = 0; i<
parameters->wave_type.size(); i++) DIRECT_FIR[i]->run_fir((direct_output)+(i*
parameters->buffer_len), (FIR_output)+i*output_channel_len);
429 FIR_output,output_channel_len,
431 direct_input,output_channel_len,
440 DIRECT_current_index = DIRECT_current_index %
parameters->rate;
456 cudaMemcpyAsync(*output_buffer, transposed,
sizeof(float2)*DIRECT_output_size, cudaMemcpyDeviceToHost, internal_stream);
457 ret = DIRECT_output_size;
460 cudaMemcpyAsync(*output_buffer, transposed,
sizeof(float2)*ret, cudaMemcpyDeviceToHost, internal_stream);
462 cudaStreamSynchronize(internal_stream);
466 void RX_buffer_demodulator::close_direct(){
467 cudaStreamDestroy(internal_stream);
468 cudaFree(DIRECT_tone_frquencies);
469 cudaFree(DIRECT_tone_phases);
470 cudaFree(direct_input);
471 cudaFree(direct_output);
472 cudaFree(transposed);
475 for(
size_t k=0; k<
parameters->wave_type.size(); k++)
delete(DIRECT_FIR[k]);
478 cudaFree(FIR_output);
485 int RX_buffer_demodulator::process_pfb(float2** __restrict__ input_buffer, float2** __restrict__ output_buffer){
487 int output_buffer_valid_len;
491 raw_input+buf_setting->
new_0,
494 cudaMemcpyHostToDevice,internal_stream);
500 cufftExecC2C(plan, input, output+spare_size, CUFFT_FORWARD);
510 if(decimator_active){
513 spare_size = pfb_decim_helper->
new_0;
521 pfb_decim_helper->
new_0,
529 cudaMemcpyAsync(*output_buffer,reduced_output,
530 parameters->wave_type.size()*batching*
sizeof(float2),
531 cudaMemcpyDeviceToHost,internal_stream);
541 cudaMemcpyAsync(*output_buffer,reduced_output,
542 parameters->wave_type.size()*batching*
sizeof(float2),
543 cudaMemcpyDeviceToHost,internal_stream);
554 cudaStreamSynchronize(internal_stream);
561 return output_buffer_valid_len;
567 int RX_buffer_demodulator::process_pfb_spec(float2** __restrict__ input_buffer, float2** __restrict__ output_buffer){
569 int output_buffer_valid_len;
573 raw_input+buf_setting->
new_0,
576 cudaMemcpyHostToDevice,internal_stream);
582 cufftExecC2C(plan, input, output+pfb_out, CUFFT_FORWARD);
592 if(decimator_active){
603 int input_len = output_len *
parameters->decim;
617 cudaMemcpyAsync(*output_buffer,decim_output,
619 cudaMemcpyDeviceToHost,internal_stream);
623 output_buffer_valid_len = output_len;
629 cudaMemcpyAsync(*output_buffer,output,
631 cudaMemcpyDeviceToHost,internal_stream);
637 output_buffer_valid_len = buf_setting->
copy_size;
644 cudaStreamSynchronize(internal_stream);
647 return output_buffer_valid_len;
650 void RX_buffer_demodulator::close_pfb(){
652 cudaStreamDestroy(internal_stream);
656 cudaFree(reduced_output);
659 if(decimator_active){
660 delete(pfb_decim_helper);
661 cudaFree(decim_output);
666 void RX_buffer_demodulator::close_pfb_spec(){
668 cudaStreamDestroy(internal_stream);
674 if(decimator_active){
675 delete(pfb_decim_helper);
676 cudaFree(decim_output);
681 void RX_buffer_demodulator::close_chirp(){
682 cudaStreamDestroy(internal_stream);
683 cudaFree(d_parameter);
686 if(decimator_active){
687 cudaFree(decim_output);
692 void RX_buffer_demodulator::close_nodsp(){cudaStreamDestroy(internal_stream);}
696 void RX_buffer_demodulator::upload_multitone_parameters(){
714 tone_bins = (
int*)malloc(h_param.
eff_n_tones*
sizeof(
int));
716 std::vector<double> bin_axis(
parameters->fft_tones);
720 for(
size_t i = 0; i<bin_axis.size(); i++){
721 bin_axis[i] = i*bin_size - bin_size * (
parameters->fft_tones/2);
723 if((
parameters->freq[u] < bin_axis[i] + bin_size) && (
parameters->freq[u] > bin_axis[i] - bin_size) ){
743 cudaMemcpy(h_param.
tones, tone_bins, h_param.
eff_n_tones*
sizeof(
int),cudaMemcpyHostToDevice);
747 std::stringstream ss;
748 ss<<
"Polyphase filter bank diagnostic:"<<std::endl<<
"frequ\tbin"<<std::endl;
750 std::cout<<ss.str()<<std::endl;
760 cudaMemcpy(d_params, &h_param,
sizeof(
filter_param),cudaMemcpyHostToDevice);
void chirp_demodulator_wrapper(float2 *__restrict__ input, float2 *__restrict__ output, unsigned int output_size, unsigned long int last_index, chirp_parameter *__restrict__ info, cudaStream_t internal_stream)
void decimate_pfb(float2 *__restrict__ input, float2 *__restrict__ output, int decim, int nfft, int output_length, cudaStream_t stram_f)
void tone_select_wrapper(float2 *__restrict__ input, float2 *__restrict__ output, filter_param *__restrict__ filter_info, int effective_batching, cudaStream_t internal_stream)
void update(int current_batch)
unsigned long int num_steps
int * tones
How many samples per each tone are present in the device buffer.
float2 * make_flat_window(int length, int side, bool diagnostic)
Creates a flattop window in the GPU memory.
float2 * window
Polyphase filter parameter wrapper and utility variables for buffer reminder.
int n_tones
Total length of the device buffer.
void print_error(std::string text)
float2 * make_sinc_window(int length, float fc, bool diagnostic=false, bool host_ret=false)
RX_buffer_demodulator(param *init_parameters, bool init_diagnostic=false)
Initialization method for the class called when a new command is received. iagnostic allows to print ...
void move_buffer_wrapper(float2 *__restrict__ from, float2 *__restrict__ to, int size, int from_offset, int to_offset, cudaStream_t internal_stream)
int length
Pointer to an already initialized window.
float fcut
PFB cut-off frequency fo the window. 1.f is Nyquist at the higher sampling frequency. this parameter will be movoed to the param struct soon.
int process(float2 **__restrict__ in, float2 **__restrict__ out)
PAcket handler for DSP class. This method process information pointed by the in parameter and write t...
void polyphase_filter_wrapper(float2 *__restrict__ input, float2 *__restrict__ output, filter_param *__restrict__ filter_info, cudaStream_t internal_stream)
int eff_n_tones
Must be an array containing the fft bin number corresponding to the tone frequency.
void print_warning(std::string text)
int average_buffer
How many points to calculate in the FFT.
void direct_demodulator_wrapper(int *__restrict tone_frequencies, int *__restrict tone_phases, int wavetablelen, size_t index_counter, size_t single_tone_length, size_t total_length, float2 *__restrict input, float2 *__restrict output, cudaStream_t internal_stream)
int batching
How many buffer are averaged (length of the window has to be average_buffer * n_tones) ...
void close()
Wrapper to the correct cleaning function.
void decimate_spectra(float2 *__restrict__ input, float2 *__restrict__ output, int decim, int nfft, int input_len, int output_len, cudaStream_t stram_f)
void cublas_decim(float2 *__restrict__ input, float2 *__restrict__ output, float2 *__restrict__ profile, cuComplex *__restrict__ zero, cuComplex *__restrict__ one, int ppt, int n_freqs, cublasHandle_t *__restrict__ handle)