1# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waits %s -o - | FileCheck %s 2 3--- | 4 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4, 5 <4 x i32> addrspace(1)* %global16, 6 i32 addrspace(4)* %flat4, 7 <4 x i32> addrspace(4)* %flat16) { 8 ret void 9 } 10 11 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() { 12 ret void 13 } 14 15 define amdgpu_kernel void @single_branch_successor_not_next_block() { 16 ret void 17 } 18 19... 20--- 21 22# CHECK-LABEL: name: flat_zero_waitcnt 23 24# CHECK-LABEL: bb.0: 25# CHECK: FLAT_LOAD_DWORD 26# CHECK: FLAT_LOAD_DWORDX4 27# Global loads will return in order so we should: 28# s_waitcnt vmcnt(1) lgkmcnt(0) 29# CHECK-NEXT: S_WAITCNT 113 30 31# CHECK-LABEL: bb.1: 32# CHECK: FLAT_LOAD_DWORD 33# CHECK: S_WAITCNT 3952 34# CHECK: FLAT_LOAD_DWORDX4 35# The first load has no mem operand, so we should assume it accesses the flat 36# address space. 37# s_waitcnt vmcnt(0) lgkmcnt(0) 38# CHECK-NEXT: S_WAITCNT 127 39 40# CHECK-LABEL: bb.2: 41# CHECK: FLAT_LOAD_DWORD 42# CHECK: S_WAITCNT 3952 43# CHECK: FLAT_LOAD_DWORDX4 44 45# One outstand loads access the flat address space. 46# s_waitcnt vmcnt(0) lgkmcnt(0) 47# CHECK-NEXT: S_WAITCNT 127 48 49name: flat_zero_waitcnt 50 51body: | 52 bb.0: 53 successors: %bb.1 54 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) 55 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) 56 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec 57 S_BRANCH %bb.1 58 59 bb.1: 60 successors: %bb.2 61 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr 62 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) 63 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec 64 S_BRANCH %bb.2 65 66 bb.2: 67 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) 68 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) 69 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec 70 S_ENDPGM 71... 72--- 73# There is only a single fallthrough successor block, so there's no 74# need to wait immediately. 75 76# CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait 77# CHECK: %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2 78# CHECK-NOT: S_WAITCNT 79 80# CHECK: bb.1: 81# CHECK-NEXT: V_LSHLREV_B64 82# CHECK-NEXT: S_WAITCNT 112 83# CHECK-NEXT: FLAT_STORE_DWORD 84name: single_fallthrough_successor_no_end_block_wait 85 86body: | 87 bb.0: 88 successors: %bb.1 89 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr 90 91 bb.1: 92 %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec 93 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr 94 S_ENDPGM 95... 96--- 97# The block has a single predecessor with a single successor, but it 98# is not the next block so it's non-obvious that the wait is not needed. 99 100 101# CHECK-LABEL: name: single_branch_successor_not_next_block 102# CHECK: %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2 103# CHECK-NEXT: S_WAITCNT 112 104 105# CHECK: bb.1 106# CHECK-NEXT: FLAT_STORE_DWORD 107# CHECK-NEXT: S_ENDPGM 108 109# CHECK: bb.2: 110# CHECK-NEXT: V_LSHLREV_B64 111# CHECK-NEXT: FLAT_STORE_DWORD 112name: single_branch_successor_not_next_block 113 114body: | 115 bb.0: 116 successors: %bb.2 117 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr 118 S_BRANCH %bb.2 119 120 bb.1: 121 FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr 122 S_ENDPGM 123 124 bb.2: 125 %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec 126 FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr 127 S_ENDPGM 128... 129