1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "Utils.h"
13 
14 #include "Interface.h"
15 #include "Mapping.h"
16 
17 #pragma omp declare target
18 
19 using namespace _OMP;
20 
21 namespace _OMP {
22 /// Helper to keep code alive without introducing a performance penalty.
23 __attribute__((used, weak, optnone)) void keepAlive() {
24   __kmpc_get_hardware_thread_id_in_block();
25   __kmpc_get_hardware_num_threads_in_block();
26   __kmpc_barrier_simple_spmd(nullptr, 0);
27 }
28 } // namespace _OMP
29 
30 namespace impl {
31 
32 /// AMDGCN Implementation
33 ///
34 ///{
35 #pragma omp begin declare variant match(device = {arch(amdgcn)})
36 
37 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
38   static_assert(sizeof(unsigned long) == 8, "");
39   *LowBits = (uint32_t)(Val & 0x00000000FFFFFFFFUL);
40   *HighBits = (uint32_t)((Val & 0xFFFFFFFF00000000UL) >> 32);
41 }
42 
43 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
44   return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
45 }
46 
47 #pragma omp end declare variant
48 
49 /// NVPTX Implementation
50 ///
51 ///{
52 #pragma omp begin declare variant match(                                       \
53     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
54 
55 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
56   uint32_t LowBitsLocal, HighBitsLocal;
57   asm("mov.b64 {%0,%1}, %2;"
58       : "=r"(LowBitsLocal), "=r"(HighBitsLocal)
59       : "l"(Val));
60   *LowBits = LowBitsLocal;
61   *HighBits = HighBitsLocal;
62 }
63 
64 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
65   uint64_t Val;
66   asm("mov.b64 %0, {%1,%2};" : "=l"(Val) : "r"(LowBits), "r"(HighBits));
67   return Val;
68 }
69 
70 #pragma omp end declare variant
71 
72 /// AMDGCN Implementation
73 ///
74 ///{
75 #pragma omp begin declare variant match(device = {arch(amdgcn)})
76 
77 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
78   int Width = mapping::getWarpSize();
79   int Self = mapping::getThreadIdInWarp();
80   int Index = SrcLane + (Self & ~(Width - 1));
81   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
82 }
83 
84 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
85                     int32_t Width) {
86   int Self = mapping::getThreadIdInWarp();
87   int Index = Self + LaneDelta;
88   Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index;
89   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
90 }
91 
92 #pragma omp end declare variant
93 ///}
94 
95 /// NVPTX Implementation
96 ///
97 ///{
98 #pragma omp begin declare variant match(                                       \
99     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
100 
101 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
102   return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f);
103 }
104 
105 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
106   int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f;
107   return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
108 }
109 
110 #pragma omp end declare variant
111 } // namespace impl
112 
113 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) {
114   return impl::Pack(LowBits, HighBits);
115 }
116 
117 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
118   impl::Unpack(Val, &LowBits, &HighBits);
119 }
120 
121 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
122   return impl::shuffle(Mask, Var, SrcLane);
123 }
124 
125 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
126                            int32_t Width) {
127   return impl::shuffleDown(Mask, Var, Delta, Width);
128 }
129 
130 extern "C" {
131 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) {
132   return impl::shuffleDown(lanes::All, Val, Delta, SrcLane);
133 }
134 
135 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
136   uint32_t lo, hi;
137   utils::unpack(Val, lo, hi);
138   hi = impl::shuffleDown(lanes::All, hi, Delta, Width);
139   lo = impl::shuffleDown(lanes::All, lo, Delta, Width);
140   return utils::pack(lo, hi);
141 }
142 }
143 
144 #pragma omp end declare target
145