1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "Utils.h"
13 
14 #include "Interface.h"
15 #include "Mapping.h"
16 
17 #pragma omp declare target
18 
19 using namespace _OMP;
20 
21 namespace _OMP {
22 /// Helper to keep code alive without introducing a performance penalty.
23 __attribute__((used, weak, optnone)) void keepAlive() {
24   __kmpc_barrier_simple_spmd(nullptr, 0);
25 }
26 } // namespace _OMP
27 
28 namespace impl {
29 
30 /// AMDGCN Implementation
31 ///
32 ///{
33 #pragma omp begin declare variant match(device = {arch(amdgcn)})
34 
35 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
36   *LowBits = (uint32_t)(Val & UINT64_C(0x00000000FFFFFFFF));
37   *HighBits = (uint32_t)((Val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
38 }
39 
40 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
41   return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
42 }
43 
44 #pragma omp end declare variant
45 
46 /// NVPTX Implementation
47 ///
48 ///{
49 #pragma omp begin declare variant match(                                       \
50     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
51 
52 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
53   uint32_t LowBitsLocal, HighBitsLocal;
54   asm("mov.b64 {%0,%1}, %2;"
55       : "=r"(LowBitsLocal), "=r"(HighBitsLocal)
56       : "l"(Val));
57   *LowBits = LowBitsLocal;
58   *HighBits = HighBitsLocal;
59 }
60 
61 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
62   uint64_t Val;
63   asm("mov.b64 %0, {%1,%2};" : "=l"(Val) : "r"(LowBits), "r"(HighBits));
64   return Val;
65 }
66 
67 #pragma omp end declare variant
68 
69 /// AMDGCN Implementation
70 ///
71 ///{
72 #pragma omp begin declare variant match(device = {arch(amdgcn)})
73 
74 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
75   int Width = mapping::getWarpSize();
76   int Self = mapping::getgetThreadIdInWarp();
77   int Index = SrcLane + (Self & ~(Width - 1));
78   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
79 }
80 
81 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
82                     int32_t Width) {
83   int Self = mapping::getThreadIdInWarp();
84   int Index = Self + LaneDelta;
85   Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index;
86   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
87 }
88 
89 #pragma omp end declare variant
90 ///}
91 
92 /// NVPTX Implementation
93 ///
94 ///{
95 #pragma omp begin declare variant match(                                       \
96     device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
97 
98 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
99   return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f);
100 }
101 
102 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
103   int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f;
104   return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
105 }
106 
107 #pragma omp end declare variant
108 } // namespace impl
109 
110 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) {
111   return impl::Pack(LowBits, HighBits);
112 }
113 
114 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
115   impl::Unpack(Val, &LowBits, &HighBits);
116 }
117 
118 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
119   return impl::shuffle(Mask, Var, SrcLane);
120 }
121 
122 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
123                            int32_t Width) {
124   return impl::shuffleDown(Mask, Var, Delta, Width);
125 }
126 
127 extern "C" {
128 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) {
129   return impl::shuffleDown(lanes::All, Val, Delta, SrcLane);
130 }
131 
132 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
133   uint32_t lo, hi;
134   utils::unpack(Val, lo, hi);
135   hi = impl::shuffleDown(lanes::All, hi, Delta, Width);
136   lo = impl::shuffleDown(lanes::All, lo, Delta, Width);
137   return utils::pack(lo, hi);
138 }
139 }
140 
141 #pragma omp end declare target
142