GNSS-SDR  0.0.19
An Open Source GNSS Software Defined Receiver
cuda_multicorrelator.h
Go to the documentation of this file.
1 /*!
2  * \file cuda_multicorrelator.h
3  * \brief Highly optimized CUDA GPU vector multiTAP correlator class
4  * \authors <ul>
5  * <li> Javier Arribas, 2015. jarribas(at)cttc.es
6  * </ul>
7  *
8  * Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
9  *
10  * -----------------------------------------------------------------------------
11  *
12  * GNSS-SDR is a Global Navigation Satellite System software-defined receiver.
13  * This file is part of GNSS-SDR.
14  *
15  * Copyright (C) 2010-2020 (see AUTHORS file for a list of contributors)
16  * SPDX-License-Identifier: GPL-3.0-or-later
17  *
18  * -----------------------------------------------------------------------------
19  */
20 
21 #ifndef GNSS_SDR_CUDA_MULTICORRELATOR_H
22 #define GNSS_SDR_CUDA_MULTICORRELATOR_H
23 
24 
25 #ifdef __CUDACC__
26 #define CUDA_CALLABLE_MEMBER_GLOBAL __global__
27 #define CUDA_CALLABLE_MEMBER_DEVICE __device__
28 #else
29 #define CUDA_CALLABLE_MEMBER_GLOBAL
30 #define CUDA_CALLABLE_MEMBER_DEVICE
31 #endif
32 
33 #include <complex>
34 #include <cuda.h>
35 #include <cuda_runtime.h>
36 
37 /** \addtogroup Tracking
38  * \{ */
39 /** \addtogroup Tracking_libs
40  * \{ */
41 
42 
43 // GPU new internal data types for complex numbers
44 
46 {
47  float r;
48  float i;
49  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex(){};
50  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex(float a, float b) : r(a), i(b) {}
51  CUDA_CALLABLE_MEMBER_DEVICE float magnitude2(void) { return r * r + i * i; }
52  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a)
53  {
54 #ifdef __CUDACC__
55  return GPU_Complex(__fmul_rn(r, a.r) - __fmul_rn(i, a.i), __fmul_rn(i, a.r) + __fmul_rn(r, a.i));
56 #else
57  return GPU_Complex(r * a.r - i * a.i, i * a.r + r * a.i);
58 #endif
59  }
60  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a)
61  {
62  return GPU_Complex(r + a.r, i + a.i);
63  }
64  CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a)
65  {
66  r += a.r;
67  i += a.i;
68  }
69  CUDA_CALLABLE_MEMBER_DEVICE void multiply_acc(const GPU_Complex& a, const GPU_Complex& b)
70  {
71  // c=a*b+c
72  // real part
73  // c.r=(a.r*b.r - a.i*b.i)+c.r
74 #ifdef __CUDACC__
75  r = __fmaf_rn(a.r, b.r, r);
76  r = __fmaf_rn(-a.i, b.i, r);
77  // imag part
78  i = __fmaf_rn(a.i, b.r, i);
79  i = __fmaf_rn(a.r, b.i, i);
80 #else
81  r = (a.r * b.r - a.i * b.i) + r;
82  i = (a.i * b.r - a.r * b.i) + i;
83 #endif
84  }
85 };
86 
87 
89 {
90  float r;
91  float i;
92  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short(short int a, short int b) : r(a), i(b) {}
93  CUDA_CALLABLE_MEMBER_DEVICE float magnitude2(void)
94  {
95  return r * r + i * i;
96  }
97  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a)
98  {
99  return GPU_Complex_Short(r * a.r - i * a.i, i * a.r + r * a.i);
100  }
101  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a)
102  {
103  return GPU_Complex_Short(r + a.r, i + a.i);
104  }
105 };
106 
107 
108 /*!
109  * \brief Class that implements carrier wipe-off and correlators using NVIDIA CUDA GPU accelerators.
110  */
112 {
113 public:
115  bool init_cuda_integrated_resampler(
116  int signal_length_samples,
117  int code_length_chips,
118  int n_correlators);
119  bool set_local_code_and_taps(
120  int code_length_chips,
121  const std::complex<float>* local_codes_in,
122  float* shifts_chips,
123  int n_correlators);
124  bool set_input_output_vectors(
125  std::complex<float>* corr_out,
126  std::complex<float>* sig_in);
127 
128  bool free_cuda();
129  bool Carrier_wipeoff_multicorrelator_resampler_cuda(
130  float rem_carrier_phase_in_rad,
131  float phase_step_rad,
132  float code_phase_step_chips,
133  float rem_code_phase_chips,
134  int signal_length_samples,
135  int n_correlators);
136 
137 private:
138  cudaStream_t stream1;
139  // cudaStream_t stream2;
140 
141  // Allocate the device input vectors
142  GPU_Complex* d_sig_in;
143  GPU_Complex* d_nco_in;
144  GPU_Complex* d_sig_doppler_wiped;
145  GPU_Complex* d_local_codes_in;
146  GPU_Complex* d_corr_out;
147 
148  std::complex<float>* d_sig_in_cpu;
149  std::complex<float>* d_corr_out_cpu;
150 
151  float* d_shifts_chips;
152  int* d_shifts_samples;
153  int d_code_length_chips;
154 
155  int selected_gps_device;
156  int threadsPerBlock;
157  int blocksPerGrid;
158 
159  int num_gpu_devices;
160  int selected_device;
161 };
162 
163 
164 /** \} */
165 /** \} */
166 #endif // GNSS_SDR_CUDA_MULTICORRELATOR_H
Class that implements carrier wipe-off and correlators using NVIDIA CUDA GPU accelerators.