GNSS-SDR 0.0.21
An Open Source GNSS Software Defined Receiver
Loading...
Searching...
No Matches
cuda_multicorrelator.h
Go to the documentation of this file.
1/*!
2 * \file cuda_multicorrelator.h
3 * \brief Highly optimized CUDA GPU vector multiTAP correlator class
4 * \authors <ul>
5 * <li> Javier Arribas, 2015. jarribas(at)cttc.es
6 * </ul>
7 *
8 * Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
9 *
10 * -----------------------------------------------------------------------------
11 *
12 * GNSS-SDR is a Global Navigation Satellite System software-defined receiver.
13 * This file is part of GNSS-SDR.
14 *
15 * Copyright (C) 2010-2020 (see AUTHORS file for a list of contributors)
16 * SPDX-License-Identifier: GPL-3.0-or-later
17 *
18 * -----------------------------------------------------------------------------
19 */
20
21#ifndef GNSS_SDR_CUDA_MULTICORRELATOR_H
22#define GNSS_SDR_CUDA_MULTICORRELATOR_H
23
24
25#ifdef __CUDACC__
26#define CUDA_CALLABLE_MEMBER_GLOBAL __global__
27#define CUDA_CALLABLE_MEMBER_DEVICE __device__
28#else
29#define CUDA_CALLABLE_MEMBER_GLOBAL
30#define CUDA_CALLABLE_MEMBER_DEVICE
31#endif
32
33#include <complex>
34#include <cuda.h>
35#include <cuda_runtime.h>
36
37/** \addtogroup Tracking
38 * \{ */
39/** \addtogroup Tracking_libs
40 * \{ */
41
42
43// GPU new internal data types for complex numbers
44
45struct GPU_Complex
46{
47 float r;
48 float i;
49 CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex() {};
50 CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex(float a, float b) : r(a), i(b) {}
51 CUDA_CALLABLE_MEMBER_DEVICE float magnitude2(void) { return r * r + i * i; }
52 CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a)
53 {
54#ifdef __CUDACC__
55 return GPU_Complex(__fmul_rn(r, a.r) - __fmul_rn(i, a.i), __fmul_rn(i, a.r) + __fmul_rn(r, a.i));
56#else
57 return GPU_Complex(r * a.r - i * a.i, i * a.r + r * a.i);
58#endif
59 }
60 CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a)
61 {
62 return GPU_Complex(r + a.r, i + a.i);
63 }
64 CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a)
65 {
66 r += a.r;
67 i += a.i;
68 }
69 CUDA_CALLABLE_MEMBER_DEVICE void multiply_acc(const GPU_Complex& a, const GPU_Complex& b)
70 {
71 // c=a*b+c
72 // real part
73 // c.r=(a.r*b.r - a.i*b.i)+c.r
74#ifdef __CUDACC__
75 r = __fmaf_rn(a.r, b.r, r);
76 r = __fmaf_rn(-a.i, b.i, r);
77 // imag part
78 i = __fmaf_rn(a.i, b.r, i);
79 i = __fmaf_rn(a.r, b.i, i);
80#else
81 r = (a.r * b.r - a.i * b.i) + r;
82 i = (a.i * b.r - a.r * b.i) + i;
83#endif
84 }
85};
86
87
88struct GPU_Complex_Short
89{
90 float r;
91 float i;
92 CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short(short int a, short int b) : r(a), i(b) {}
93 CUDA_CALLABLE_MEMBER_DEVICE float magnitude2(void)
94 {
95 return r * r + i * i;
96 }
97 CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a)
98 {
99 return GPU_Complex_Short(r * a.r - i * a.i, i * a.r + r * a.i);
100 }
101 CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a)
102 {
103 return GPU_Complex_Short(r + a.r, i + a.i);
104 }
105};
106
107
108/*!
109 * \brief Class that implements carrier wipe-off and correlators using NVIDIA CUDA GPU accelerators.
110 */
111class cuda_multicorrelator
112{
113public:
114 cuda_multicorrelator();
115 bool init_cuda_integrated_resampler(
116 int signal_length_samples,
117 int code_length_chips,
118 int n_correlators);
119 bool set_local_code_and_taps(
120 int code_length_chips,
121 const std::complex<float>* local_codes_in,
122 float* shifts_chips,
123 int n_correlators);
124 bool set_input_output_vectors(
125 std::complex<float>* corr_out,
126 std::complex<float>* sig_in);
127
128 bool free_cuda();
129 bool Carrier_wipeoff_multicorrelator_resampler_cuda(
130 float rem_carrier_phase_in_rad,
131 float phase_step_rad,
132 float code_phase_step_chips,
133 float rem_code_phase_chips,
134 int signal_length_samples,
135 int n_correlators);
136
137private:
138 cudaStream_t stream1;
139 // cudaStream_t stream2;
140
141 // Allocate the device input vectors
142 GPU_Complex* d_sig_in;
143 GPU_Complex* d_nco_in;
144 GPU_Complex* d_sig_doppler_wiped;
145 GPU_Complex* d_local_codes_in;
146 GPU_Complex* d_corr_out;
147
148 std::complex<float>* d_sig_in_cpu;
149 std::complex<float>* d_corr_out_cpu;
150
151 float* d_shifts_chips;
152 int* d_shifts_samples;
153 int d_code_length_chips;
154
155 int selected_gps_device;
156 int threadsPerBlock;
157 int blocksPerGrid;
158
159 int num_gpu_devices;
160 int selected_device;
161};
162
163
164/** \} */
165/** \} */
166#endif // GNSS_SDR_CUDA_MULTICORRELATOR_H