1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "Utils.h" 13 14 #include "Interface.h" 15 #include "Mapping.h" 16 17 #pragma omp declare target 18 19 using namespace _OMP; 20 21 namespace _OMP { 22 /// Helper to keep code alive without introducing a performance penalty. 23 __attribute__((used, weak, optnone)) void keepAlive() { 24 __kmpc_get_hardware_thread_id_in_block(); 25 __kmpc_barrier_simple_spmd(nullptr, 0); 26 } 27 } // namespace _OMP 28 29 namespace impl { 30 31 /// AMDGCN Implementation 32 /// 33 ///{ 34 #pragma omp begin declare variant match(device = {arch(amdgcn)}) 35 36 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { 37 *LowBits = (uint32_t)(Val & UINT64_C(0x00000000FFFFFFFF)); 38 *HighBits = (uint32_t)((Val & UINT64_C(0xFFFFFFFF00000000)) >> 32); 39 } 40 41 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { 42 return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits; 43 } 44 45 #pragma omp end declare variant 46 47 /// NVPTX Implementation 48 /// 49 ///{ 50 #pragma omp begin declare variant match( \ 51 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) 52 53 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { 54 uint32_t LowBitsLocal, HighBitsLocal; 55 asm("mov.b64 {%0,%1}, %2;" 56 : "=r"(LowBitsLocal), "=r"(HighBitsLocal) 57 : "l"(Val)); 58 *LowBits = LowBitsLocal; 59 *HighBits = HighBitsLocal; 60 } 61 62 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { 63 uint64_t Val; 64 asm("mov.b64 %0, {%1,%2};" : "=l"(Val) : "r"(LowBits), "r"(HighBits)); 65 return Val; 66 } 67 68 #pragma omp end declare variant 69 70 /// AMDGCN Implementation 71 /// 72 ///{ 73 #pragma omp begin declare variant match(device = {arch(amdgcn)}) 74 75 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 76 int Width = mapping::getWarpSize(); 77 int Self = mapping::getgetThreadIdInWarp(); 78 int Index = SrcLane + (Self & ~(Width - 1)); 79 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 80 } 81 82 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, 83 int32_t Width) { 84 int Self = mapping::getThreadIdInWarp(); 85 int Index = Self + LaneDelta; 86 Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index; 87 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 88 } 89 90 #pragma omp end declare variant 91 ///} 92 93 /// NVPTX Implementation 94 /// 95 ///{ 96 #pragma omp begin declare variant match( \ 97 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) 98 99 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 100 return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f); 101 } 102 103 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) { 104 int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f; 105 return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); 106 } 107 108 #pragma omp end declare variant 109 } // namespace impl 110 111 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { 112 return impl::Pack(LowBits, HighBits); 113 } 114 115 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) { 116 impl::Unpack(Val, &LowBits, &HighBits); 117 } 118 119 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 120 return impl::shuffle(Mask, Var, SrcLane); 121 } 122 123 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, 124 int32_t Width) { 125 return impl::shuffleDown(Mask, Var, Delta, Width); 126 } 127 128 extern "C" { 129 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { 130 return impl::shuffleDown(lanes::All, Val, Delta, SrcLane); 131 } 132 133 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { 134 uint32_t lo, hi; 135 utils::unpack(Val, lo, hi); 136 hi = impl::shuffleDown(lanes::All, hi, Delta, Width); 137 lo = impl::shuffleDown(lanes::All, lo, Delta, Width); 138 return utils::pack(lo, hi); 139 } 140 } 141 142 #pragma omp end declare target 143