1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "Utils.h" 13 14 #include "Interface.h" 15 #include "Mapping.h" 16 17 #pragma omp declare target 18 19 using namespace _OMP; 20 21 namespace _OMP { 22 /// Helper to keep code alive without introducing a performance penalty. 23 __attribute__((used, weak, optnone)) void keepAlive() { 24 __kmpc_barrier_simple_spmd(nullptr, 0); 25 } 26 } // namespace _OMP 27 28 namespace impl { 29 30 /// AMDGCN Implementation 31 /// 32 ///{ 33 #pragma omp begin declare variant match(device = {arch(amdgcn)}) 34 35 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { 36 *LowBits = (uint32_t)(Val & UINT64_C(0x00000000FFFFFFFF)); 37 *HighBits = (uint32_t)((Val & UINT64_C(0xFFFFFFFF00000000)) >> 32); 38 } 39 40 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { 41 return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits; 42 } 43 44 #pragma omp end declare variant 45 46 /// NVPTX Implementation 47 /// 48 ///{ 49 #pragma omp begin declare variant match( \ 50 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) 51 52 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { 53 uint32_t LowBitsLocal, HighBitsLocal; 54 asm("mov.b64 {%0,%1}, %2;" 55 : "=r"(LowBitsLocal), "=r"(HighBitsLocal) 56 : "l"(Val)); 57 *LowBits = LowBitsLocal; 58 *HighBits = HighBitsLocal; 59 } 60 61 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { 62 uint64_t Val; 63 asm("mov.b64 %0, {%1,%2};" : "=l"(Val) : "r"(LowBits), "r"(HighBits)); 64 return Val; 65 } 66 67 #pragma omp end declare variant 68 69 /// AMDGCN Implementation 70 /// 71 ///{ 72 #pragma omp begin declare variant match(device = {arch(amdgcn)}) 73 74 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 75 int Width = mapping::getWarpSize(); 76 int Self = mapping::getgetThreadIdInWarp(); 77 int Index = SrcLane + (Self & ~(Width - 1)); 78 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 79 } 80 81 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, 82 int32_t Width) { 83 int Self = mapping::getThreadIdInWarp(); 84 int Index = Self + LaneDelta; 85 Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index; 86 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 87 } 88 89 #pragma omp end declare variant 90 ///} 91 92 /// NVPTX Implementation 93 /// 94 ///{ 95 #pragma omp begin declare variant match( \ 96 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) 97 98 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 99 return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f); 100 } 101 102 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) { 103 int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f; 104 return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); 105 } 106 107 #pragma omp end declare variant 108 } // namespace impl 109 110 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { 111 return impl::Pack(LowBits, HighBits); 112 } 113 114 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) { 115 impl::Unpack(Val, &LowBits, &HighBits); 116 } 117 118 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 119 return impl::shuffle(Mask, Var, SrcLane); 120 } 121 122 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, 123 int32_t Width) { 124 return impl::shuffleDown(Mask, Var, Delta, Width); 125 } 126 127 extern "C" { 128 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { 129 return impl::shuffleDown(lanes::All, Val, Delta, SrcLane); 130 } 131 132 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { 133 uint32_t lo, hi; 134 utils::unpack(Val, lo, hi); 135 hi = impl::shuffleDown(lanes::All, hi, Delta, Width); 136 lo = impl::shuffleDown(lanes::All, lo, Delta, Width); 137 return utils::pack(lo, hi); 138 } 139 } 140 141 #pragma omp end declare target 142