1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "Utils.h" 13 14 #include "Interface.h" 15 #include "Mapping.h" 16 17 #pragma omp declare target 18 19 using namespace _OMP; 20 21 namespace _OMP { 22 /// Helper to keep code alive without introducing a performance penalty. 23 __attribute__((used, weak, optnone)) void keepAlive() { 24 __kmpc_get_hardware_thread_id_in_block(); 25 __kmpc_get_hardware_num_threads_in_block(); 26 __kmpc_barrier_simple_spmd(nullptr, 0); 27 } 28 } // namespace _OMP 29 30 namespace impl { 31 32 /// AMDGCN Implementation 33 /// 34 ///{ 35 #pragma omp begin declare variant match(device = {arch(amdgcn)}) 36 37 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { 38 static_assert(sizeof(unsigned long) == 8, ""); 39 *LowBits = (uint32_t)(Val & 0x00000000FFFFFFFFUL); 40 *HighBits = (uint32_t)((Val & 0xFFFFFFFF00000000UL) >> 32); 41 } 42 43 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { 44 return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits; 45 } 46 47 #pragma omp end declare variant 48 49 /// NVPTX Implementation 50 /// 51 ///{ 52 #pragma omp begin declare variant match( \ 53 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) 54 55 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { 56 uint32_t LowBitsLocal, HighBitsLocal; 57 asm("mov.b64 {%0,%1}, %2;" 58 : "=r"(LowBitsLocal), "=r"(HighBitsLocal) 59 : "l"(Val)); 60 *LowBits = LowBitsLocal; 61 *HighBits = HighBitsLocal; 62 } 63 64 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { 65 uint64_t Val; 66 asm("mov.b64 %0, {%1,%2};" : "=l"(Val) : "r"(LowBits), "r"(HighBits)); 67 return Val; 68 } 69 70 #pragma omp end declare variant 71 72 /// AMDGCN Implementation 73 /// 74 ///{ 75 #pragma omp begin declare variant match(device = {arch(amdgcn)}) 76 77 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 78 int Width = mapping::getWarpSize(); 79 int Self = mapping::getThreadIdInWarp(); 80 int Index = SrcLane + (Self & ~(Width - 1)); 81 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 82 } 83 84 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, 85 int32_t Width) { 86 int Self = mapping::getThreadIdInWarp(); 87 int Index = Self + LaneDelta; 88 Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index; 89 return __builtin_amdgcn_ds_bpermute(Index << 2, Var); 90 } 91 92 #pragma omp end declare variant 93 ///} 94 95 /// NVPTX Implementation 96 /// 97 ///{ 98 #pragma omp begin declare variant match( \ 99 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) 100 101 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 102 return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f); 103 } 104 105 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) { 106 int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f; 107 return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); 108 } 109 110 #pragma omp end declare variant 111 } // namespace impl 112 113 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { 114 return impl::Pack(LowBits, HighBits); 115 } 116 117 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) { 118 impl::Unpack(Val, &LowBits, &HighBits); 119 } 120 121 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { 122 return impl::shuffle(Mask, Var, SrcLane); 123 } 124 125 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, 126 int32_t Width) { 127 return impl::shuffleDown(Mask, Var, Delta, Width); 128 } 129 130 extern "C" { 131 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { 132 return impl::shuffleDown(lanes::All, Val, Delta, SrcLane); 133 } 134 135 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { 136 uint32_t lo, hi; 137 utils::unpack(Val, lo, hi); 138 hi = impl::shuffleDown(lanes::All, hi, Delta, Width); 139 lo = impl::shuffleDown(lanes::All, lo, Delta, Width); 140 return utils::pack(lo, hi); 141 } 142 } 143 144 #pragma omp end declare target 145