GNSS-SDR  0.0.13
An Open Source GNSS Software Defined Receiver
cuda_multicorrelator.h
Go to the documentation of this file.
1 /*!
2  * \file cuda_multicorrelator.h
3  * \brief Highly optimized CUDA GPU vector multiTAP correlator class
4  * \authors <ul>
5  * <li> Javier Arribas, 2015. jarribas(at)cttc.es
6  * </ul>
7  *
8  * Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
9  *
10  * -----------------------------------------------------------------------------
11  *
12  * Copyright (C) 2010-2020 (see AUTHORS file for a list of contributors)
13  *
14  * GNSS-SDR is a software defined Global Navigation
15  * Satellite Systems receiver
16  *
17  * This file is part of GNSS-SDR.
18  *
19  * SPDX-License-Identifier: GPL-3.0-or-later
20  *
21  * -----------------------------------------------------------------------------
22  */
23 
24 #ifndef GNSS_SDR_CUDA_MULTICORRELATOR_H
25 #define GNSS_SDR_CUDA_MULTICORRELATOR_H
26 
27 
28 #ifdef __CUDACC__
29 #define CUDA_CALLABLE_MEMBER_GLOBAL __global__
30 #define CUDA_CALLABLE_MEMBER_DEVICE __device__
31 #else
32 #define CUDA_CALLABLE_MEMBER_GLOBAL
33 #define CUDA_CALLABLE_MEMBER_DEVICE
34 #endif
35 
36 #include <complex>
37 #include <cuda.h>
38 #include <cuda_runtime.h>
39 
40 // GPU new internal data types for complex numbers
41 
43 {
44  float r;
45  float i;
46  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex(){};
47  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex(float a, float b) : r(a), i(b) {}
48  CUDA_CALLABLE_MEMBER_DEVICE float magnitude2(void) { return r * r + i * i; }
49  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a)
50  {
51 #ifdef __CUDACC__
52  return GPU_Complex(__fmul_rn(r, a.r) - __fmul_rn(i, a.i), __fmul_rn(i, a.r) + __fmul_rn(r, a.i));
53 #else
54  return GPU_Complex(r * a.r - i * a.i, i * a.r + r * a.i);
55 #endif
56  }
57  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a)
58  {
59  return GPU_Complex(r + a.r, i + a.i);
60  }
61  CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a)
62  {
63  r += a.r;
64  i += a.i;
65  }
66  CUDA_CALLABLE_MEMBER_DEVICE void multiply_acc(const GPU_Complex& a, const GPU_Complex& b)
67  {
68  // c=a*b+c
69  // real part
70  // c.r=(a.r*b.r - a.i*b.i)+c.r
71 #ifdef __CUDACC__
72  r = __fmaf_rn(a.r, b.r, r);
73  r = __fmaf_rn(-a.i, b.i, r);
74  // imag part
75  i = __fmaf_rn(a.i, b.r, i);
76  i = __fmaf_rn(a.r, b.i, i);
77 #else
78  r = (a.r * b.r - a.i * b.i) + r;
79  i = (a.i * b.r - a.r * b.i) + i;
80 #endif
81  }
82 };
83 
84 
86 {
87  float r;
88  float i;
89  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short(short int a, short int b) : r(a), i(b) {}
90  CUDA_CALLABLE_MEMBER_DEVICE float magnitude2(void)
91  {
92  return r * r + i * i;
93  }
94  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a)
95  {
96  return GPU_Complex_Short(r * a.r - i * a.i, i * a.r + r * a.i);
97  }
98  CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a)
99  {
100  return GPU_Complex_Short(r + a.r, i + a.i);
101  }
102 };
103 
104 
105 /*!
106  * \brief Class that implements carrier wipe-off and correlators using NVIDIA CUDA GPU accelerators.
107  */
109 {
110 public:
112  bool init_cuda_integrated_resampler(
113  int signal_length_samples,
114  int code_length_chips,
115  int n_correlators);
116  bool set_local_code_and_taps(
117  int code_length_chips,
118  const std::complex<float>* local_codes_in,
119  float* shifts_chips,
120  int n_correlators);
121  bool set_input_output_vectors(
122  std::complex<float>* corr_out,
123  std::complex<float>* sig_in);
124 
125  bool free_cuda();
126  bool Carrier_wipeoff_multicorrelator_resampler_cuda(
127  float rem_carrier_phase_in_rad,
128  float phase_step_rad,
129  float code_phase_step_chips,
130  float rem_code_phase_chips,
131  int signal_length_samples,
132  int n_correlators);
133 
134 private:
135  cudaStream_t stream1;
136  // cudaStream_t stream2;
137 
138  // Allocate the device input vectors
139  GPU_Complex* d_sig_in;
140  GPU_Complex* d_nco_in;
141  GPU_Complex* d_sig_doppler_wiped;
142  GPU_Complex* d_local_codes_in;
143  GPU_Complex* d_corr_out;
144 
145  std::complex<float>* d_sig_in_cpu;
146  std::complex<float>* d_corr_out_cpu;
147 
148  float* d_shifts_chips;
149  int* d_shifts_samples;
150  int d_code_length_chips;
151 
152  int selected_gps_device;
153  int threadsPerBlock;
154  int blocksPerGrid;
155 
156  int num_gpu_devices;
157  int selected_device;
158 };
159 
160 
161 #endif // GNSS_SDR_CUDA_MULTICORRELATOR_H
Class that implements carrier wipe-off and correlators using NVIDIA CUDA GPU accelerators.