1# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waits %s -o - | FileCheck %s 2 3--- | 4 define void @flat_zero_waitcnt(i32 addrspace(1)* %global4, 5 <4 x i32> addrspace(1)* %global16, 6 i32 addrspace(4)* %flat4, 7 <4 x i32> addrspace(4)* %flat16) { 8 ret void 9 } 10... 11--- 12 13# CHECK-LABEL: name: flat_zero_waitcnt 14 15# CHECK-LABEL: bb.0: 16# CHECK: FLAT_LOAD_DWORD 17# CHECK: FLAT_LOAD_DWORDX4 18# Global loads will return in order so we should: 19# s_waitcnt vmcnt(1) lgkmcnt(0) 20# CHECK-NEXT: S_WAITCNT 113 21 22# CHECK-LABEL: bb.1: 23# CHECK: FLAT_LOAD_DWORD 24# CHECK: FLAT_LOAD_DWORDX4 25# The first load has no mem operand, so we should assume it accesses the flat 26# address space. 27# s_waitcnt vmcnt(0) lgkmcnt(0) 28# CHECK-NEXT: S_WAITCNT 112 29 30# CHECK-LABEL: bb.2: 31# CHECK: FLAT_LOAD_DWORD 32# CHECK: FLAT_LOAD_DWORDX4 33# One outstand loads access the flat address space. 34# s_waitcnt vmcnt(0) lgkmcnt(0) 35# CHECK-NEXT: S_WAITCNT 112 36 37name: flat_zero_waitcnt 38 39body: | 40 bb.0: 41 successors: %bb.1 42 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) 43 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) 44 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec 45 S_BRANCH %bb.1 46 47 bb.1: 48 successors: %bb.2 49 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr 50 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) 51 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec 52 S_BRANCH %bb.2 53 54 bb.2: 55 %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) 56 %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) 57 %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec 58 S_ENDPGM 59... 60