1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
3
4--- |
5  define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
6                                 <4 x i32> addrspace(1)* %global16,
7                                 i32* %flat4,
8                                 <4 x i32>* %flat16) {
9    ret void
10  }
11
12  define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
13    ret void
14  }
15
16  define amdgpu_kernel void @single_branch_successor_not_next_block() {
17    ret void
18  }
19
20  define amdgpu_kernel void @preexisting_waitcnt() {
21    ret void
22  }
23
24  define amdgpu_kernel void @bundle_no_waitcnt() {
25    ret void
26  }
27
28  define amdgpu_kernel void @preexisting_waitcnt_in_bundle() {
29    ret void
30  }
31
32  define amdgpu_kernel void @insert_in_bundle() {
33    ret void
34  }
35
36  define amdgpu_kernel void @exit_bundle() {
37    ret void
38  }
39
40  define amdgpu_kernel void @cross_bundle() {
41    ret void
42  }
43
44  define amdgpu_kernel void @high_register_collision() {
45    ret void
46  }
47
48...
49---
50
51
52# Global loads will return in order so we should:
53# s_waitcnt vmcnt(1)
54
55# s_waitcnt vmcnt(0)
56
57# s_waitcnt vmcnt(0)
58
59name: flat_zero_waitcnt
60
61body: |
62  ; GCN-LABEL: name: flat_zero_waitcnt
63  ; GCN: bb.0:
64  ; GCN-NEXT:   successors: %bb.1(0x80000000)
65  ; GCN-NEXT: {{  $}}
66  ; GCN-NEXT:   S_WAITCNT 0
67  ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1)
68  ; GCN-NEXT:   $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
69  ; GCN-NEXT:   S_WAITCNT 3953
70  ; GCN-NEXT:   $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
71  ; GCN-NEXT:   S_BRANCH %bb.1
72  ; GCN-NEXT: {{  $}}
73  ; GCN-NEXT: bb.1:
74  ; GCN-NEXT:   successors: %bb.2(0x80000000)
75  ; GCN-NEXT: {{  $}}
76  ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
77  ; GCN-NEXT:   S_WAITCNT 3952
78  ; GCN-NEXT:   $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1)
79  ; GCN-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
80  ; GCN-NEXT:   S_BRANCH %bb.2
81  ; GCN-NEXT: {{  $}}
82  ; GCN-NEXT: bb.2:
83  ; GCN-NEXT:   S_WAITCNT 49279
84  ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
85  ; GCN-NEXT:   S_WAITCNT 3952
86  ; GCN-NEXT:   $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
87  ; GCN-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
88  ; GCN-NEXT:   S_ENDPGM 0
89  bb.0:
90    successors: %bb.1
91    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4)
92    $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
93    $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
94    S_BRANCH %bb.1
95
96  bb.1:
97    successors: %bb.2
98    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
99    $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16)
100    $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
101    S_BRANCH %bb.2
102
103  bb.2:
104    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4)
105    $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16)
106    $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
107    S_ENDPGM 0
108...
109---
110# There is only a single fallthrough successor block, so there's no
111# need to wait immediately.
112
113
114name: single_fallthrough_successor_no_end_block_wait
115
116body: |
117  ; GCN-LABEL: name: single_fallthrough_successor_no_end_block_wait
118  ; GCN: bb.0:
119  ; GCN-NEXT:   successors: %bb.1(0x80000000)
120  ; GCN-NEXT: {{  $}}
121  ; GCN-NEXT:   S_WAITCNT 0
122  ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
123  ; GCN-NEXT: {{  $}}
124  ; GCN-NEXT: bb.1:
125  ; GCN-NEXT:   $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
126  ; GCN-NEXT:   S_WAITCNT 112
127  ; GCN-NEXT:   FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
128  ; GCN-NEXT:   S_ENDPGM 0
129  bb.0:
130    successors: %bb.1
131    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
132
133  bb.1:
134    $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
135    FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
136    S_ENDPGM 0
137...
138---
139# The block has a single predecessor with a single successor, but it
140# is not the next block so it's non-obvious that the wait is not needed.
141
142
143
144
145name: single_branch_successor_not_next_block
146
147body: |
148  ; GCN-LABEL: name: single_branch_successor_not_next_block
149  ; GCN: bb.0:
150  ; GCN-NEXT:   successors: %bb.2(0x80000000)
151  ; GCN-NEXT: {{  $}}
152  ; GCN-NEXT:   S_WAITCNT 0
153  ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
154  ; GCN-NEXT:   S_BRANCH %bb.2
155  ; GCN-NEXT: {{  $}}
156  ; GCN-NEXT: bb.1:
157  ; GCN-NEXT:   FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
158  ; GCN-NEXT:   S_ENDPGM 0
159  ; GCN-NEXT: {{  $}}
160  ; GCN-NEXT: bb.2:
161  ; GCN-NEXT:   $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
162  ; GCN-NEXT:   S_WAITCNT 112
163  ; GCN-NEXT:   FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
164  ; GCN-NEXT:   S_ENDPGM 0
165  bb.0:
166    successors: %bb.2
167    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
168   S_BRANCH %bb.2
169
170  bb.1:
171    FLAT_STORE_DWORD $vgpr8_vgpr9, $agpr10, 0, 0, implicit $exec, implicit $flat_scr
172    S_ENDPGM 0
173
174  bb.2:
175    $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
176    FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
177    S_ENDPGM 0
178...
179
180# GCN-LABEL: name: preexisting_waitcnt{{$}}
181# GCN: FLAT_LOAD_DWORD
182# GCN-NEXT: S_WAITCNT 0
183# GCN-NOT: S_WAITCNT
184name: preexisting_waitcnt
185tracksRegLiveness: true
186machineFunctionInfo:
187  isEntryFunction: true
188body: |
189  bb.0:
190    liveins: $vgpr1_vgpr2
191    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
192    S_WAITCNT 0
193    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
194
195...
196
197---
198
199name: bundle_no_waitcnt
200tracksRegLiveness: true
201machineFunctionInfo:
202  isEntryFunction: true
203body: |
204  bb.0:
205    liveins: $vgpr1_vgpr2
206    ; GCN-LABEL: name: bundle_no_waitcnt
207    ; GCN: liveins: $vgpr1_vgpr2
208    ; GCN-NEXT: {{  $}}
209    ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
210    ; GCN-NEXT: BUNDLE {
211    ; GCN-NEXT:   S_NOP 0
212    ; GCN-NEXT:   S_NOP 0
213    ; GCN-NEXT: }
214    ; GCN-NEXT: S_WAITCNT 112
215    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
216    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
217    BUNDLE {
218      S_NOP 0
219      S_NOP 0
220    }
221    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
222
223...
224
225---
226
227# See the waitcnt inside the bundle and don't insert an extra
228name: preexisting_waitcnt_in_bundle
229tracksRegLiveness: true
230machineFunctionInfo:
231  isEntryFunction: true
232body: |
233  bb.0:
234    liveins: $vgpr1_vgpr2
235    ; GCN-LABEL: name: preexisting_waitcnt_in_bundle
236    ; GCN: liveins: $vgpr1_vgpr2
237    ; GCN-NEXT: {{  $}}
238    ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
239    ; GCN-NEXT: BUNDLE {
240    ; GCN-NEXT:   S_NOP 0
241    ; GCN-NEXT:   S_WAITCNT 0
242    ; GCN-NEXT: }
243    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
244    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
245    BUNDLE {
246      S_NOP 0
247      S_WAITCNT 0
248    }
249    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
250
251...
252
253---
254
255# Def and use inside bundle
256
257name: insert_in_bundle
258tracksRegLiveness: true
259machineFunctionInfo:
260  isEntryFunction: true
261body: |
262  bb.0:
263    liveins: $vgpr1_vgpr2
264    ; GCN-LABEL: name: insert_in_bundle
265    ; GCN: liveins: $vgpr1_vgpr2
266    ; GCN-NEXT: {{  $}}
267    ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
268    ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
269    ; GCN-NEXT:   S_WAITCNT 112
270    ; GCN-NEXT:   FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
271    ; GCN-NEXT: }
272    BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
273    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
274    FLAT_STORE_DWORD $vgpr2_vgpr3, internal $agpr0, 0, 0, implicit $exec, implicit $flat_scr
275    }
276...
277
278---
279
280# Def is last instruction in bundle, use is outside bundle
281
282
283name: exit_bundle
284tracksRegLiveness: true
285machineFunctionInfo:
286  isEntryFunction: true
287body: |
288  bb.0:
289    liveins: $vgpr1_vgpr2
290    ; GCN-LABEL: name: exit_bundle
291    ; GCN: liveins: $vgpr1_vgpr2
292    ; GCN-NEXT: {{  $}}
293    ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
294    ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
295    ; GCN-NEXT: }
296    ; GCN-NEXT: S_WAITCNT 112
297    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
298    BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
299    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
300    }
301
302    FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
303
304...
305
306---
307
308# Def is in bundle, use is in another bundle
309
310
311name: cross_bundle
312tracksRegLiveness: true
313machineFunctionInfo:
314  isEntryFunction: true
315body: |
316  bb.0:
317    liveins: $vgpr1_vgpr2
318    ; GCN-LABEL: name: cross_bundle
319    ; GCN: liveins: $vgpr1_vgpr2
320    ; GCN-NEXT: {{  $}}
321    ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
322    ; GCN-NEXT:   $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
323    ; GCN-NEXT: }
324    ; GCN-NEXT: S_WAITCNT 112
325    ; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
326    ; GCN-NEXT:   FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
327    ; GCN-NEXT: }
328    BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 {
329    $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
330    }
331    BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 {
332      FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
333    }
334...
335
336---
337# agpr should be disjoint and tracked separately from vgpr
338
339name: high_register_collision
340
341body: |
342  bb.0:
343    ; GCN-LABEL: name: high_register_collision
344    ; GCN: S_WAITCNT 0
345    ; GCN-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
346    ; GCN-NEXT: $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
347    ; GCN-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
348    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
349    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
350    ; GCN-NEXT: S_ENDPGM 0
351    $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec
352    $vgpr226 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
353    $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr8_vgpr9, implicit $exec
354    FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr1, 0, 0, implicit $exec, implicit $flat_scr
355    FLAT_STORE_DWORD $vgpr4_vgpr5, $agpr0, 0, 0, implicit $exec, implicit $flat_scr
356    S_ENDPGM 0
357...
358