1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
3
4--- |
5  define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
6                                 <4 x i32> addrspace(1)* %global16,
7                                 i32* %flat4,
8                                 <4 x i32>* %flat16) {
9    ret void
10  }
11
12  define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
13    ret void
14  }
15
16  define amdgpu_kernel void @single_branch_successor_not_next_block() {
17    ret void
18  }
19
20  define amdgpu_kernel void @preexisting_waitcnt() {
21    ret void
22  }
23
24  define amdgpu_kernel void @bundle_no_waitcnt() {
25    ret void
26  }
27
28  define amdgpu_kernel void @preexisting_waitcnt_in_bundle() {
29    ret void
30  }
31
32  define amdgpu_kernel void @insert_in_bundle() {
33    ret void
34  }
35
36  define amdgpu_kernel void @exit_bundle() {
37    ret void
38  }
39
40  define amdgpu_kernel void @cross_bundle() {
41    ret void
42  }
43
44...
45---
46
47
48# Global loads will return in order so we should:
49# s_waitcnt vmcnt(1)
50
51# s_waitcnt vmcnt(0)
52
53# s_waitcnt vmcnt(0)
54
55name: flat_zero_waitcnt
56
57body: |
58  ; GCN-LABEL: name: flat_zero_waitcnt
59  ; GCN: bb.0:
60  ; GCN:   successors: %bb.1(0x80000000)
61  ; GCN:   S_WAITCNT 0
62  ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1)
63  ; GCN:   $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
64  ; GCN:   S_WAITCNT 3953
65  ; GCN:   $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
66  ; GCN:   S_BRANCH %bb.1
67  ; GCN: bb.1:
68  ; GCN:   successors: %bb.2(0x80000000)
69  ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
70  ; GCN:   S_WAITCNT 3952
71  ; GCN:   $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
72  ; GCN:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
73  ; GCN:   S_BRANCH %bb.2
74  ; GCN: bb.2:
75  ; GCN:   S_WAITCNT 49279
76  ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
77  ; GCN:   S_WAITCNT 3952
78  ; GCN:   $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
79  ; GCN:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
80  ; GCN:   S_ENDPGM 0
81  bb.0:
82    successors: %bb.1
83    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4)
84    $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
85    $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
86    S_BRANCH %bb.1
87
88  bb.1:
89    successors: %bb.2
90    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
91    $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
92    $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
93    S_BRANCH %bb.2
94
95  bb.2:
96    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
97    $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
98    $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
99    S_ENDPGM 0
100...
101---
102# There is only a single fallthrough successor block, so there's no
103# need to wait immediately.
104
105
106name: single_fallthrough_successor_no_end_block_wait
107
108body: |
109  ; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait
110  ; GCN: bb.0:
111  ; GCN:   successors: %bb.1(0x80000000)
112  ; GCN:   S_WAITCNT 0
113  ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
114  ; GCN: bb.1:
115  ; GCN:   $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
116  ; GCN:   S_WAITCNT 112
117  ; GCN:   FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
118  ; GCN:   S_ENDPGM 0
119  bb.0:
120    successors: %bb.1
121    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
122
123  bb.1:
124    $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
125    FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
126    S_ENDPGM 0
127...
128---
129# The block has a single predecessor with a single successor, but it
130# is not the next block so it's non-obvious that the wait is not needed.
131
132
133
134
135name: single_branch_successor_not_next_block
136
137body: |
138  ; GCN-LABEL: name: single_branch_successor_not_next_block
139  ; GCN: bb.0:
140  ; GCN:   successors: %bb.2(0x80000000)
141  ; GCN:   S_WAITCNT 0
142  ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
143  ; GCN:   S_BRANCH %bb.2
144  ; GCN: bb.1:
145  ; GCN:   FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
146  ; GCN:   S_ENDPGM 0
147  ; GCN: bb.2:
148  ; GCN:   $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
149  ; GCN:   S_WAITCNT 112
150  ; GCN:   FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
151  ; GCN:   S_ENDPGM 0
152  bb.0:
153    successors: %bb.2
154    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
155   S_BRANCH %bb.2
156
157  bb.1:
158    FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
159    S_ENDPGM 0
160
161  bb.2:
162    $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
163    FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
164    S_ENDPGM 0
165...
166
167# GCN-LABEL: name: preexisting_waitcnt{{$}}
168# GCN: FLAT_LOAD_DWORD
169# GCN-NEXT: S_WAITCNT 0
170# GCN-NOT: S_WAITCNT
171name: preexisting_waitcnt
172tracksRegLiveness: true
173machineFunctionInfo:
174  isEntryFunction: true
175body: |
176  bb.0:
177    liveins: $vgpr1_vgpr2
178    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
179    S_WAITCNT 0
180    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
181
182...
183
184---
185
186name: bundle_no_waitcnt
187tracksRegLiveness: true
188machineFunctionInfo:
189  isEntryFunction: true
190body: |
191  bb.0:
192    liveins: $vgpr1_vgpr2
193    ; GCN-LABEL: name: bundle_no_waitcnt
194    ; GCN: liveins: $vgpr1_vgpr2
195    ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
196    ; GCN: BUNDLE {
197    ; GCN:   S_NOP 0
198    ; GCN:   S_NOP 0
199    ; GCN: }
200    ; GCN: S_WAITCNT 112
201    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
202    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
203    BUNDLE {
204      S_NOP 0
205      S_NOP 0
206    }
207    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
208
209...
210
211---
212
213# See the waitcnt inside the bundle and don't insert an extra
214name: preexisting_waitcnt_in_bundle
215tracksRegLiveness: true
216machineFunctionInfo:
217  isEntryFunction: true
218body: |
219  bb.0:
220    liveins: $vgpr1_vgpr2
221    ; GCN-LABEL: name: preexisting_waitcnt_in_bundle
222    ; GCN: liveins: $vgpr1_vgpr2
223    ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
224    ; GCN: BUNDLE {
225    ; GCN:   S_NOP 0
226    ; GCN:   S_WAITCNT 0
227    ; GCN: }
228    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
229    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
230    BUNDLE {
231      S_NOP 0
232      S_WAITCNT 0
233    }
234    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
235
236...
237
238---
239
240# Def and use inside bundle
241
242name: insert_in_bundle
243tracksRegLiveness: true
244machineFunctionInfo:
245  isEntryFunction: true
246body: |
247  bb.0:
248    liveins: $vgpr1_vgpr2
249    ; GCN-LABEL: name: insert_in_bundle
250    ; GCN: liveins: $vgpr1_vgpr2
251    ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
252    ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
253    ; GCN:   S_WAITCNT 112
254    ; GCN:   FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
255    ; GCN: }
256    BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
257    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
258    FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
259    }
260...
261
262---
263
264# Def is last instruction in bundle, use is outside bundle
265
266
267name: exit_bundle
268tracksRegLiveness: true
269machineFunctionInfo:
270  isEntryFunction: true
271body: |
272  bb.0:
273    liveins: $vgpr1_vgpr2
274    ; GCN-LABEL: name: exit_bundle
275    ; GCN: liveins: $vgpr1_vgpr2
276    ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
277    ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
278    ; GCN: }
279    ; GCN: S_WAITCNT 112
280    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
281    BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
282    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
283    }
284
285    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
286
287...
288
289---
290
291# Def is in bundle, use is in another bundle
292
293
294name: cross_bundle
295tracksRegLiveness: true
296machineFunctionInfo:
297  isEntryFunction: true
298body: |
299  bb.0:
300    liveins: $vgpr1_vgpr2
301    ; GCN-LABEL: name: cross_bundle
302    ; GCN: liveins: $vgpr1_vgpr2
303    ; GCN: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
304    ; GCN:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
305    ; GCN: }
306    ; GCN: S_WAITCNT 112
307    ; GCN: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
308    ; GCN:   FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
309    ; GCN: }
310    BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
311    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
312    }
313    BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
314      FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
315    }
316...
317