1# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waits  %s -o - | FileCheck %s
2
3--- |
4  define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
5                                 <4 x i32> addrspace(1)* %global16,
6                                 i32 addrspace(4)* %flat4,
7                                 <4 x i32> addrspace(4)* %flat16) {
8    ret void
9  }
10
11  define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
12    ret void
13  }
14
15  define amdgpu_kernel void @single_branch_successor_not_next_block() {
16    ret void
17  }
18
19...
20---
21
22# CHECK-LABEL: name: flat_zero_waitcnt
23
24# CHECK-LABEL: bb.0:
25# CHECK: FLAT_LOAD_DWORD
26# CHECK: FLAT_LOAD_DWORDX4
27# Global loads will return in order so we should:
28# s_waitcnt vmcnt(1) lgkmcnt(0)
29# CHECK-NEXT: S_WAITCNT 113
30
31# CHECK-LABEL: bb.1:
32# CHECK: FLAT_LOAD_DWORD
33# CHECK: S_WAITCNT 3952
34# CHECK: FLAT_LOAD_DWORDX4
35# The first load has no mem operand, so we should assume it accesses the flat
36# address space.
37# s_waitcnt vmcnt(0) lgkmcnt(0)
38# CHECK-NEXT: S_WAITCNT 127
39
40# CHECK-LABEL: bb.2:
41# CHECK: FLAT_LOAD_DWORD
42# CHECK: S_WAITCNT 3952
43# CHECK: FLAT_LOAD_DWORDX4
44
45# One outstand loads access the flat address space.
46# s_waitcnt vmcnt(0) lgkmcnt(0)
47# CHECK-NEXT: S_WAITCNT 127
48
49name: flat_zero_waitcnt
50
51body: |
52  bb.0:
53    successors: %bb.1
54    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4)
55    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
56    %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
57    S_BRANCH %bb.1
58
59  bb.1:
60    successors: %bb.2
61    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
62    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16)
63    %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
64    S_BRANCH %bb.2
65
66  bb.2:
67    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4)
68    %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16)
69    %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec
70    S_ENDPGM
71...
72---
73# There is only a single fallthrough successor block, so there's no
74# need to wait immediately.
75
76# CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait
77# CHECK:   %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2
78# CHECK-NOT: S_WAITCNT
79
80# CHECK: bb.1:
81# CHECK-NEXT: V_LSHLREV_B64
82# CHECK-NEXT: S_WAITCNT 112
83# CHECK-NEXT: FLAT_STORE_DWORD
84name: single_fallthrough_successor_no_end_block_wait
85
86body: |
87  bb.0:
88    successors: %bb.1
89    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
90
91  bb.1:
92    %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
93    FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
94    S_ENDPGM
95...
96---
97# The block has a single predecessor with a single successor, but it
98# is not the next block so it's non-obvious that the wait is not needed.
99
100
101# CHECK-LABEL: name: single_branch_successor_not_next_block
102# CHECK:   %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2
103# CHECK-NEXT: S_WAITCNT 112
104
105# CHECK: bb.1
106# CHECK-NEXT: FLAT_STORE_DWORD
107# CHECK-NEXT: S_ENDPGM
108
109# CHECK: bb.2:
110# CHECK-NEXT: V_LSHLREV_B64
111# CHECK-NEXT: FLAT_STORE_DWORD
112name: single_branch_successor_not_next_block
113
114body: |
115  bb.0:
116    successors: %bb.2
117    %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr
118   S_BRANCH %bb.2
119
120  bb.1:
121    FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr
122    S_ENDPGM
123
124  bb.2:
125     %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec
126    FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr
127    S_ENDPGM
128...
129