17 #ifndef KOKKOS_BITSET_HPP 18 #define KOKKOS_BITSET_HPP 19 #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE 20 #define KOKKOS_IMPL_PUBLIC_INCLUDE 21 #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET 24 #include <Kokkos_Core.hpp> 25 #include <Kokkos_Functional.hpp> 27 #include <impl/Kokkos_Bitset_impl.hpp> 31 template <
typename Device = Kokkos::DefaultExecutionSpace>
34 template <
typename Device = Kokkos::DefaultExecutionSpace>
37 template <
typename DstDevice,
typename SrcDevice>
40 template <
typename DstDevice,
typename SrcDevice>
43 template <
typename DstDevice,
typename SrcDevice>
47 template <
typename Device>
50 using execution_space =
typename Device::execution_space;
51 using size_type =
unsigned int;
53 static constexpr
unsigned BIT_SCAN_REVERSE = 1u;
54 static constexpr
unsigned MOVE_HINT_BACKWARD = 2u;
56 static constexpr
unsigned BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u;
57 static constexpr
unsigned BIT_SCAN_REVERSE_MOVE_HINT_FORWARD =
59 static constexpr
unsigned BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD =
61 static constexpr
unsigned BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD =
62 BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
66 block_size =
static_cast<unsigned>(
sizeof(unsigned) * CHAR_BIT)
68 enum :
unsigned { block_mask = block_size - 1u };
70 block_shift = Kokkos::Impl::integral_power_of_two(block_size)
78 m_last_block_mask(0u),
79 m_blocks(
"Bitset", ((m_size + block_mask) >> block_shift)) {
80 for (
int i = 0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
81 m_last_block_mask |= 1u << i;
85 KOKKOS_DEFAULTED_FUNCTION
88 KOKKOS_DEFAULTED_FUNCTION
91 KOKKOS_DEFAULTED_FUNCTION
94 KOKKOS_DEFAULTED_FUNCTION
97 KOKKOS_DEFAULTED_FUNCTION
102 KOKKOS_FORCEINLINE_FUNCTION
103 unsigned size()
const {
return m_size; }
108 Impl::BitsetCount<Bitset<Device> > f(*
this);
115 Kokkos::deep_copy(m_blocks, ~0u);
117 if (m_last_block_mask) {
119 Kokkos::Impl::DeepCopy<typename Device::memory_space, Kokkos::HostSpace>(
120 m_blocks.data() + (m_blocks.
extent(0) - 1u), &m_last_block_mask,
123 "Bitset::set: fence after clearing unused bits copying from " 130 void reset() { Kokkos::deep_copy(m_blocks, 0u); }
134 void clear() { Kokkos::deep_copy(m_blocks, 0u); }
138 KOKKOS_FORCEINLINE_FUNCTION
139 bool set(
unsigned i)
const {
141 unsigned* block_ptr = &m_blocks[i >> block_shift];
142 const unsigned mask = 1u << static_cast<int>(i & block_mask);
144 return !(atomic_fetch_or(block_ptr, mask) & mask);
151 KOKKOS_FORCEINLINE_FUNCTION
154 unsigned* block_ptr = &m_blocks[i >> block_shift];
155 const unsigned mask = 1u << static_cast<int>(i & block_mask);
157 return atomic_fetch_and(block_ptr, ~mask) & mask;
164 KOKKOS_FORCEINLINE_FUNCTION
167 #ifdef KOKKOS_ENABLE_SYCL 168 const unsigned block = Kokkos::atomic_load(&m_blocks[i >> block_shift]);
170 const unsigned block = volatile_load(&m_blocks[i >> block_shift]);
172 const unsigned mask = 1u << static_cast<int>(i & block_mask);
181 KOKKOS_FORCEINLINE_FUNCTION
188 KOKKOS_INLINE_FUNCTION
191 unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
const {
192 const unsigned block_idx =
193 (hint >> block_shift) < m_blocks.
extent(0) ? (hint >> block_shift) : 0;
194 const unsigned offset = hint & block_mask;
195 #ifdef KOKKOS_ENABLE_SYCL 196 unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
198 unsigned block = volatile_load(&m_blocks[block_idx]);
200 block = !m_last_block_mask || (block_idx < (m_blocks.
extent(0) - 1))
202 : block & m_last_block_mask;
204 return find_any_helper(block_idx, offset, block, scan_direction);
211 KOKKOS_INLINE_FUNCTION
214 unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD)
const {
215 const unsigned block_idx = hint >> block_shift;
216 const unsigned offset = hint & block_mask;
217 #ifdef KOKKOS_ENABLE_SYCL 218 unsigned block = Kokkos::atomic_load(&m_blocks[block_idx]);
220 unsigned block = volatile_load(&m_blocks[block_idx]);
222 block = !m_last_block_mask || (block_idx < (m_blocks.
extent(0) - 1))
224 : ~block & m_last_block_mask;
226 return find_any_helper(block_idx, offset, block, scan_direction);
229 KOKKOS_INLINE_FUNCTION constexpr
bool is_allocated()
const {
230 return m_blocks.is_allocated();
234 KOKKOS_FORCEINLINE_FUNCTION
236 unsigned offset,
unsigned block,
237 unsigned scan_direction)
const {
241 result.second = update_hint(block_idx, offset, scan_direction);
244 scan_block((block_idx << block_shift), offset, block, scan_direction);
249 KOKKOS_FORCEINLINE_FUNCTION
250 unsigned scan_block(
unsigned block_start,
int offset,
unsigned block,
251 unsigned scan_direction)
const {
252 offset = !(scan_direction & BIT_SCAN_REVERSE)
254 : (offset + block_mask) & block_mask;
255 block = Impl::rotate_right(block, offset);
256 return (((!(scan_direction & BIT_SCAN_REVERSE)
257 ? Impl::bit_scan_forward(block)
258 : Impl::int_log2(block)) +
264 KOKKOS_FORCEINLINE_FUNCTION
265 unsigned update_hint(
long long block_idx,
unsigned offset,
266 unsigned scan_direction)
const {
267 block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
268 block_idx = block_idx >= 0 ? block_idx : m_blocks.
extent(0) - 1;
270 block_idx < static_cast<long long>(m_blocks.
extent(0)) ? block_idx : 0;
272 return static_cast<unsigned>(block_idx) * block_size + offset;
277 unsigned m_last_block_mask;
278 View<unsigned*, Device, MemoryTraits<RandomAccess> > m_blocks;
281 template <
typename DDevice>
284 template <
typename DDevice>
285 friend class ConstBitset;
287 template <
typename Bitset>
288 friend struct Impl::BitsetCount;
290 template <
typename DstDevice,
typename SrcDevice>
291 friend void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice>
const& src);
293 template <
typename DstDevice,
typename SrcDevice>
294 friend void deep_copy(Bitset<DstDevice>& dst,
295 ConstBitset<SrcDevice>
const& src);
300 template <
typename Device>
303 using execution_space =
typename Device::execution_space;
304 using size_type =
unsigned int;
307 enum { block_size =
static_cast<unsigned>(
sizeof(unsigned) * CHAR_BIT) };
308 enum { block_mask = block_size - 1u };
309 enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
313 ConstBitset() : m_size(0) {}
316 ConstBitset(Bitset<Device>
const& rhs)
317 : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
320 ConstBitset(ConstBitset<Device>
const& rhs)
321 : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
324 ConstBitset<Device>& operator=(Bitset<Device>
const& rhs) {
325 this->m_size = rhs.m_size;
326 this->m_blocks = rhs.m_blocks;
332 ConstBitset<Device>& operator=(ConstBitset<Device>
const& rhs) {
333 this->m_size = rhs.m_size;
334 this->m_blocks = rhs.m_blocks;
339 KOKKOS_FORCEINLINE_FUNCTION
340 unsigned size()
const {
return m_size; }
342 unsigned count()
const {
343 Impl::BitsetCount<ConstBitset<Device> > f(*
this);
347 KOKKOS_FORCEINLINE_FUNCTION
348 bool test(
unsigned i)
const {
350 const unsigned block = m_blocks[i >> block_shift];
351 const unsigned mask = 1u << static_cast<int>(i & block_mask);
359 View<const unsigned*, Device, MemoryTraits<RandomAccess> > m_blocks;
362 template <
typename DDevice>
363 friend class ConstBitset;
365 template <
typename Bitset>
366 friend struct Impl::BitsetCount;
368 template <
typename DstDevice,
typename SrcDevice>
369 friend void deep_copy(Bitset<DstDevice>& dst,
370 ConstBitset<SrcDevice>
const& src);
372 template <
typename DstDevice,
typename SrcDevice>
373 friend void deep_copy(ConstBitset<DstDevice>& dst,
374 ConstBitset<SrcDevice>
const& src);
377 template <
typename DstDevice,
typename SrcDevice>
378 void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice>
const& src) {
379 if (dst.size() != src.size()) {
380 Kokkos::Impl::throw_runtime_exception(
381 "Error: Cannot deep_copy bitsets of different sizes!");
384 Kokkos::fence(
"Bitset::deep_copy: fence before copy operation");
385 Kokkos::Impl::DeepCopy<
typename DstDevice::memory_space,
386 typename SrcDevice::memory_space>(
387 dst.m_blocks.data(), src.m_blocks.data(),
388 sizeof(unsigned) * src.m_blocks.extent(0));
389 Kokkos::fence(
"Bitset::deep_copy: fence after copy operation");
392 template <
typename DstDevice,
typename SrcDevice>
393 void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice>
const& src) {
394 if (dst.size() != src.size()) {
395 Kokkos::Impl::throw_runtime_exception(
396 "Error: Cannot deep_copy bitsets of different sizes!");
399 Kokkos::fence(
"Bitset::deep_copy: fence before copy operation");
400 Kokkos::Impl::DeepCopy<
typename DstDevice::memory_space,
401 typename SrcDevice::memory_space>(
402 dst.m_blocks.data(), src.m_blocks.data(),
403 sizeof(unsigned) * src.m_blocks.extent(0));
404 Kokkos::fence(
"Bitset::deep_copy: fence after copy operation");
407 template <
typename DstDevice,
typename SrcDevice>
408 void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice>
const& src) {
409 if (dst.size() != src.size()) {
410 Kokkos::Impl::throw_runtime_exception(
411 "Error: Cannot deep_copy bitsets of different sizes!");
414 Kokkos::fence(
"Bitset::deep_copy: fence before copy operation");
415 Kokkos::Impl::DeepCopy<
typename DstDevice::memory_space,
416 typename SrcDevice::memory_space>(
417 dst.m_blocks.data(), src.m_blocks.data(),
418 sizeof(unsigned) * src.m_blocks.extent(0));
419 Kokkos::fence(
"Bitset::deep_copy: fence after copy operation");
424 #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET 425 #undef KOKKOS_IMPL_PUBLIC_INCLUDE 426 #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_BITSET 428 #endif // KOKKOS_BITSET_HPP A thread safe view to a bitset.
KOKKOS_FORCEINLINE_FUNCTION unsigned max_hint() const
Bitset(unsigned arg_size=0u)
Replacement for std::pair that works on CUDA devices.
KOKKOS_FORCEINLINE_FUNCTION bool test(unsigned i) const
KOKKOS_FORCEINLINE_FUNCTION unsigned size() const
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_unset_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const
KOKKOS_INLINE_FUNCTION Kokkos::pair< bool, unsigned > find_any_set_near(unsigned hint, unsigned scan_direction=BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const
KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< std::is_integral< iType >::value, size_t > extent(const iType &r) const noexcept
rank() to be implemented
KOKKOS_FORCEINLINE_FUNCTION bool reset(unsigned i) const