1# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s
2# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
3
4---
5
6# The loop contains a store and a use of a value loaded outside of the loop.
7# We expect the waitcnt for the use to be hoisted on GFX9, but not on GFX10+
8# because we have the vscnt counter.
9
10# GFX9-LABEL: waitcnt_vm_loop
11# GFX9-LABEL: bb.0:
12# GFX9: S_WAITCNT 39
13# GFX9-LABEL: bb.1:
14# GFX9-NOT: S_WAITCNT 39
15# GFX9-LABEL: bb.2:
16
17# GFX10-LABEL: waitcnt_vm_loop
18# GFX10-LABEL: bb.0:
19# GFX10-NOT: S_WAITCNT 16
20# GFX10-LABEL: bb.1:
21# GFX10: S_WAITCNT 16
22# GFX10-LABEL: bb.2:
23name:            waitcnt_vm_loop
24body:             |
25  bb.0:
26    successors: %bb.1
27
28    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
29    S_BRANCH %bb.1
30
31  bb.1:
32    successors: %bb.1, %bb.2
33
34    BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
35    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
36    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
37    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
38    S_BRANCH %bb.2
39
40  bb.2:
41    S_ENDPGM 0
42
43...
44---
45
46# Same as before, but the loop preheader has no terminator.
47
48# GFX9-LABEL: waitcnt_vm_loop_noterm
49# GFX9-LABEL: bb.0:
50# GFX9: S_WAITCNT 39
51# GFX9-LABEL: bb.1:
52# GFX9-NOT: S_WAITCNT 39
53# GFX9-LABEL: bb.2:
54
55# GFX10-LABEL: waitcnt_vm_loop_noterm
56# GFX10-LABEL: bb.0:
57# GFX10-NOT: S_WAITCNT 16
58# GFX10-LABEL: bb.1:
59# GFX10: S_WAITCNT 16
60# GFX10-LABEL: bb.2:
61name:            waitcnt_vm_loop_noterm
62body:             |
63  bb.0:
64    successors: %bb.1
65
66    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
67
68  bb.1:
69    successors: %bb.1, %bb.2
70
71    BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
72    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
73    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
74    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
75    S_BRANCH %bb.2
76
77  bb.2:
78    S_ENDPGM 0
79
80...
81---
82
83# Same as before but there is a preexisting waitcnt in the preheader.
84
85# GFX9-LABEL: waitcnt_vm_loop_noterm_wait
86# GFX9-LABEL: bb.0:
87# GFX9: S_WAITCNT 39
88# GFX9-NOT: S_WAITCNT 39
89# GFX9-LABEL: bb.1:
90# GFX9-NOT: S_WAITCNT 39
91# GFX9-LABEL: bb.2:
92name:            waitcnt_vm_loop_noterm_wait
93body:             |
94  bb.0:
95    successors: %bb.1
96
97    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
98    S_WAITCNT 3952
99
100  bb.1:
101    successors: %bb.1, %bb.2
102
103    BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
104    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
105    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
106    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
107    S_BRANCH %bb.2
108
109  bb.2:
110    S_ENDPGM 0
111
112...
113---
114
115# The loop contains a store, a load, and uses values loaded both inside and
116# outside the loop.
117# We do not expect the waitcnt to be hoisted out of the loop.
118
119# GFX9-LABEL: waitcnt_vm_loop_load
120# GFX9-LABEL: bb.0:
121# GFX9-NOT: S_WAITCNT 39
122# GFX9-LABEL: bb.1:
123# GFX9: S_WAITCNT 39
124# GFX9-LABEL: bb.2:
125
126# GFX10-LABEL: waitcnt_vm_loop_load
127# GFX10-LABEL: bb.0:
128# GFX10-NOT: S_WAITCNT 16
129# GFX10-LABEL: bb.1:
130# GFX10: S_WAITCNT 16
131# GFX10-LABEL: bb.2:
132name:            waitcnt_vm_loop_load
133body:             |
134  bb.0:
135    successors: %bb.1
136
137    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
138    S_BRANCH %bb.1
139
140  bb.1:
141    successors: %bb.1, %bb.2
142
143    BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
144    $vgpr7 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
145    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr7, implicit $exec
146    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
147    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
148    S_BRANCH %bb.2
149
150  bb.2:
151    S_ENDPGM 0
152
153...
154---
155
156# The loop contains a use of a value loaded outside of the loop, and no store
157# nor load.
158# We do not expect the waitcnt to be hoisted out of the loop.
159
160# GFX9-LABEL: waitcnt_vm_loop_no_store
161# GFX9-LABEL: bb.0:
162# GFX9-NOT: S_WAITCNT 39
163# GFX9-LABEL: bb.1:
164# GFX9: S_WAITCNT 39
165# GFX9-LABEL: bb.2:
166
167# GFX10-LABEL: waitcnt_vm_loop_no_store
168# GFX10-LABEL: bb.0:
169# GFX10-NOT: S_WAITCNT 16
170# GFX10-LABEL: bb.1:
171# GFX10: S_WAITCNT 16
172# GFX10-LABEL: bb.2:
173name:            waitcnt_vm_loop_no_store
174body:             |
175  bb.0:
176    successors: %bb.1
177
178    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
179    S_BRANCH %bb.1
180
181  bb.1:
182    successors: %bb.1, %bb.2
183
184    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
185    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
186    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
187    S_BRANCH %bb.2
188
189  bb.2:
190    S_ENDPGM 0
191
192...
193---
194
195# The loop contains a store, no load, and doesn't use any value loaded inside
196# or outside of the loop. There is only one use of the loaded value in the
197# exit block.
198# We don't expect any s_waitcnt vmcnt in the loop body or preheader, but expect
199# one in the exit block.
200
201
202# GFX9-LABEL: waitcnt_vm_loop_no_use
203# GFX9-LABEL: bb.0:
204# GFX9-NOT: S_WAITCNT 39
205# GFX9-LABEL: bb.1:
206# GFX9-NOT: S_WAITCNT 39
207# GFX9-LABEL: bb.2:
208
209# GFX10-LABEL: waitcnt_vm_loop_no_use
210# GFX10-LABEL: bb.0:
211# GFX10-NOT: S_WAITCNT 16
212# GFX10-LABEL: bb.1:
213# GFX10-NOT: S_WAITCNT 16
214# GFX10-LABEL: bb.2:
215name:            waitcnt_vm_loop_no_use
216body:             |
217  bb.0:
218    successors: %bb.1
219
220    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
221    S_BRANCH %bb.1
222
223  bb.1:
224    successors: %bb.1, %bb.2
225
226    BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
227    $vgpr1 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
228    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
229    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
230    S_BRANCH %bb.2
231
232  bb.2:
233    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
234    S_ENDPGM 0
235
236...
237---
238
239# The loop loads a value that is not used in the loop, and uses a value loaded
240# outside of the loop.
241# We expect the waitcnt to be hoisted of the loop to wait a single time before
242# the loop is executed and avoid waiting for the load to complete on each
243# iteration.
244
245# GFX9-LABEL: waitcnt_vm_loop2
246# GFX9-LABEL: bb.0:
247# GFX9: S_WAITCNT 39
248# GFX9-LABEL: bb.1:
249# GFX9-NOT: S_WAITCNT 39
250# GFX9-LABEL: bb.2:
251
252# GFX10-LABEL: waitcnt_vm_loop2
253# GFX10-LABEL: bb.0:
254# GFX10: S_WAITCNT 16
255# GFX10-LABEL: bb.1:
256# GFX10-NOT: S_WAITCNT 16
257# GFX10-LABEL: bb.2:
258name:            waitcnt_vm_loop2
259body:             |
260  bb.0:
261    successors: %bb.1
262
263    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
264    S_BRANCH %bb.1
265
266  bb.1:
267    successors: %bb.1, %bb.2
268
269    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
270    $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
271    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
272    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
273    S_BRANCH %bb.2
274
275  bb.2:
276    S_ENDPGM 0
277
278...
279---
280
281# Same as before with an additional store in the loop. We still expect the
282# waitcnt instructions to be hoisted.
283
284# GFX9-LABEL: waitcnt_vm_loop2_store
285# GFX9-LABEL: bb.0:
286# GFX9: S_WAITCNT 39
287# GFX9-LABEL: bb.1:
288# GFX9-NOT: S_WAITCNT 39
289# GFX9-LABEL: bb.2:
290
291# GFX10-LABEL: waitcnt_vm_loop2_store
292# GFX10-LABEL: bb.0:
293# GFX10: S_WAITCNT 16
294# GFX10-LABEL: bb.1:
295# GFX10-NOT: S_WAITCNT 16
296# GFX10-LABEL: bb.2:
297name:            waitcnt_vm_loop2_store
298body:             |
299  bb.0:
300    successors: %bb.1
301
302    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
303    S_BRANCH %bb.1
304
305  bb.1:
306    successors: %bb.1, %bb.2
307
308    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
309    $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
310    BUFFER_STORE_DWORD_OFFEN_exact $vgpr5, $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
311    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
312    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
313    S_BRANCH %bb.2
314
315  bb.2:
316    S_ENDPGM 0
317
318...
319---
320
321# Same as loop2 but the value loaded inside the loop is also used in the loop.
322# We do not expect the waitcnt to be hoisted out of the loop.
323
324# GFX9-LABEL: waitcnt_vm_loop2_use_in_loop
325# GFX9-LABEL: bb.0:
326# GFX9-NOT: S_WAITCNT 39
327# GFX9-LABEL: bb.1:
328# GFX9: S_WAITCNT 39
329# GFX9-LABEL: bb.2:
330
331# GFX10-LABEL: waitcnt_vm_loop2_use_in_loop
332# GFX10-LABEL: bb.0:
333# GFX10-NOT: S_WAITCNT 16
334# GFX10-LABEL: bb.1:
335# GFX10: S_WAITCNT 16
336# GFX10-LABEL: bb.2:
337name:            waitcnt_vm_loop2_use_in_loop
338body:             |
339  bb.0:
340    successors: %bb.1
341
342    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
343    S_BRANCH %bb.1
344
345  bb.1:
346    successors: %bb.1, %bb.2
347
348    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
349    $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
350    $vgpr4 = V_ADD_U32_e32 $vgpr5, $vgpr1, implicit $exec
351    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
352    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
353    S_BRANCH %bb.2
354
355  bb.2:
356    S_ENDPGM 0
357
358...
359---
360
361# The loop contains a use of a value loaded outside of the loop, but we already
362# waited for that load to complete. The loop also loads a value that is not used
363# in the loop. We do not expect any waitcnt in the loop.
364
365# GFX9-LABEL: waitcnt_vm_loop2_nowait
366# GFX9-LABEL: bb.0:
367# GFX9: S_WAITCNT 39
368# GFX9-LABEL: bb.1:
369# GFX9-NOT: S_WAITCNT 39
370# GFX9-LABEL: bb.2:
371# GFX9-NOT: S_WAITCNT 39
372# GFX9-LABEL: bb.3:
373
374# GFX10-LABEL: waitcnt_vm_loop2_nowait
375# GFX10-LABEL: bb.0:
376# GFX10: S_WAITCNT 16
377# GFX10-LABEL: bb.1:
378# GFX10-NOT: S_WAITCNT 16
379# GFX10-LABEL: bb.2:
380# GFX10-NOT: S_WAITCNT 16
381# GFX10-LABEL: bb.3:
382name:            waitcnt_vm_loop2_nowait
383body:             |
384  bb.0:
385    successors: %bb.1
386
387    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
388    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
389    S_BRANCH %bb.1
390
391  bb.1:
392    successors: %bb.2
393
394    $vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec
395    $vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec
396    $vgpr3 = V_ADD_U32_e32 $vgpr4, $vgpr5, implicit $exec
397
398    S_BRANCH %bb.2
399
400  bb.2:
401    successors: %bb.2, %bb.3
402
403    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
404    $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
405    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
406    S_CBRANCH_SCC1 %bb.2, implicit killed $scc
407    S_BRANCH %bb.3
408
409  bb.3:
410    S_ENDPGM 0
411
412...
413---
414
415# Similar test case but for register intervals.
416
417# GFX9-LABEL: waitcnt_vm_loop2_reginterval
418# GFX9-LABEL: bb.0:
419# GFX9: S_WAITCNT 39
420# GFX9-LABEL: bb.1:
421# GFX9-NOT: S_WAITCNT 39
422# GFX9-LABEL: bb.2:
423
424# GFX10-LABEL: waitcnt_vm_loop2_reginterval
425# GFX10-LABEL: bb.0:
426# GFX10: S_WAITCNT 16
427# GFX10-LABEL: bb.1:
428# GFX10-NOT: S_WAITCNT 16
429# GFX10-LABEL: bb.2:
430name:            waitcnt_vm_loop2_reginterval
431body:             |
432  bb.0:
433    successors: %bb.1
434
435    $vgpr0_vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX4 $vgpr10_vgpr11, 0, 0, implicit $exec
436
437    S_BRANCH %bb.1
438
439  bb.1:
440    successors: %bb.1, %bb.2
441
442    $vgpr10 = COPY $vgpr0
443
444    $vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
445    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
446    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
447    S_BRANCH %bb.2
448
449  bb.2:
450    S_ENDPGM 0
451
452...
453---
454
455# Similar test case but for register intervals.
456
457# GFX9-LABEL: waitcnt_vm_loop2_reginterval2
458# GFX9-LABEL: bb.0:
459# GFX9-NOT: S_WAITCNT 39
460# GFX9-LABEL: bb.1:
461# GFX9: S_WAITCNT 39
462# GFX9-LABEL: bb.2:
463
464# GFX10-LABEL: waitcnt_vm_loop2_reginterval2
465# GFX10-LABEL: bb.0:
466# GFX10-NOT: S_WAITCNT 16
467# GFX10-LABEL: bb.1:
468# GFX10: S_WAITCNT 16
469# GFX10-LABEL: bb.2:
470name:            waitcnt_vm_loop2_reginterval2
471body:             |
472  bb.0:
473    successors: %bb.1
474
475    $vgpr0_vgpr1_vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX4 $vgpr10_vgpr11, 0, 0, implicit $exec
476
477    S_BRANCH %bb.1
478
479  bb.1:
480    successors: %bb.1, %bb.2
481
482    $vgpr10 = COPY $vgpr0
483
484    $vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
485    $vgpr11 = COPY $vgpr7
486    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
487    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
488    S_BRANCH %bb.2
489
490  bb.2:
491    S_ENDPGM 0
492
493...
494---
495
496# The loop loads a value that is not used in the loop, but uses a value loaded
497# outside of it. We expect the s_waitcnt instruction to be hoisted.
498# A s_waitcnt vmcnt(0) is generated to flush in the preheader, but for this
499# specific test case, it would be better to use vmcnt(1) instead. This is
500# currently not implemented.
501
502# GFX9-LABEL: waitcnt_vm_zero
503# GFX9-LABEL: bb.0:
504# GFX9: S_WAITCNT 3952
505# GFX9-LABEL: bb.1:
506# GFX9-NOT: S_WAITCNT 39
507# GFX9-LABEL: bb.2:
508
509# GFX10-LABEL: waitcnt_vm_zero
510# GFX10-LABEL: bb.0:
511# GFX10: S_WAITCNT 16240
512# GFX10-LABEL: bb.1:
513# GFX10-NOT: S_WAITCNT 16240
514# GFX10-LABEL: bb.2:
515
516name:            waitcnt_vm_zero
517body:             |
518  bb.0:
519    successors: %bb.1
520
521    $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
522    $vgpr1 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
523    S_BRANCH %bb.1
524
525  bb.1:
526    successors: %bb.1, %bb.2
527
528    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr3, implicit $exec
529    $vgpr2 = BUFFER_LOAD_FORMAT_X_IDXEN killed $vgpr3, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
530    S_CMP_LG_U32 killed $sgpr3, $sgpr4, implicit-def $scc
531    S_CBRANCH_SCC1 %bb.1, implicit killed $scc
532    S_BRANCH %bb.2
533
534  bb.2:
535    S_ENDPGM 0
536
537...
538