1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra,si-optimize-exec-masking-pre-ra -o - %s | FileCheck %s
3
4# FIXME: Second run of the pass is a workaround for a bug in
5# -run-pass. The verifier doesn't detect broken LiveIntervals, see bug
6# 46873
7
8
9# Cannot fold this without moving the def of %7 after the and.
10---
11name:            no_fold_andn2_select_condition_live_out_phi
12tracksRegLiveness: true
13body:             |
14  ; CHECK-LABEL: name: no_fold_andn2_select_condition_live_out_phi
15  ; CHECK: bb.0:
16  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
17  ; CHECK-NEXT: {{  $}}
18  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
19  ; CHECK-NEXT:   undef %1.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
20  ; CHECK-NEXT:   S_BRANCH %bb.2
21  ; CHECK-NEXT: {{  $}}
22  ; CHECK-NEXT: bb.1:
23  ; CHECK-NEXT:   S_ENDPGM 0
24  ; CHECK-NEXT: {{  $}}
25  ; CHECK-NEXT: bb.2:
26  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
27  ; CHECK-NEXT: {{  $}}
28  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec
29  ; CHECK-NEXT:   V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
30  ; CHECK-NEXT:   %1.sub1:vreg_64 = COPY %1.sub0
31  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store (s64), addrspace 3)
32  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
33  ; CHECK-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
34  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
35  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
36  ; CHECK-NEXT:   S_BRANCH %bb.2
37  bb.0:
38    successors: %bb.2
39
40    %7:sreg_64_xexec = S_MOV_B64 -1
41    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
42    S_BRANCH %bb.2
43
44  bb.1:
45    S_ENDPGM 0
46
47  bb.2:
48    successors: %bb.1, %bb.2
49
50    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec
51    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
52    %5.sub1:vreg_64 = COPY %5.sub0
53    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
54    ATOMIC_FENCE 4, 2
55    %7:sreg_64_xexec = S_MOV_B64 0
56    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
57    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
58    S_BRANCH %bb.2
59
60...
61
62# It's OK to fold this, since the phi def is after the andn2 insert point.
63---
64name:            fold_andn2_select_condition_live_out_phi_reorder
65tracksRegLiveness: true
66body:             |
67  ; CHECK-LABEL: name: fold_andn2_select_condition_live_out_phi_reorder
68  ; CHECK: bb.0:
69  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
70  ; CHECK-NEXT: {{  $}}
71  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
72  ; CHECK-NEXT:   undef %1.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
73  ; CHECK-NEXT:   S_BRANCH %bb.2
74  ; CHECK-NEXT: {{  $}}
75  ; CHECK-NEXT: bb.1:
76  ; CHECK-NEXT:   S_ENDPGM 0
77  ; CHECK-NEXT: {{  $}}
78  ; CHECK-NEXT: bb.2:
79  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
80  ; CHECK-NEXT: {{  $}}
81  ; CHECK-NEXT:   %1.sub1:vreg_64 = COPY %1.sub0
82  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store (s64), addrspace 3)
83  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
84  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[S_MOV_B64_]], implicit-def dead $scc
85  ; CHECK-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
86  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
87  ; CHECK-NEXT:   S_BRANCH %bb.2
88  bb.0:
89    successors: %bb.2
90
91    %7:sreg_64_xexec = S_MOV_B64 -1
92    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
93    S_BRANCH %bb.2
94
95  bb.1:
96    S_ENDPGM 0
97
98  bb.2:
99    successors: %bb.1, %bb.2
100
101    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec
102    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
103    %5.sub1:vreg_64 = COPY %5.sub0
104    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
105    ATOMIC_FENCE 4, 2
106    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
107    %7:sreg_64_xexec = S_MOV_B64 0
108    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
109    S_BRANCH %bb.2
110
111...
112
113---
114name:            no_fold_andn2_select_condition_live_out_phi_physreg
115tracksRegLiveness: true
116body:             |
117  ; CHECK-LABEL: name: no_fold_andn2_select_condition_live_out_phi_physreg
118  ; CHECK: bb.0:
119  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
120  ; CHECK-NEXT: {{  $}}
121  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 -1
122  ; CHECK-NEXT:   undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
123  ; CHECK-NEXT:   S_BRANCH %bb.2
124  ; CHECK-NEXT: {{  $}}
125  ; CHECK-NEXT: bb.1:
126  ; CHECK-NEXT:   S_ENDPGM 0
127  ; CHECK-NEXT: {{  $}}
128  ; CHECK-NEXT: bb.2:
129  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
130  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
131  ; CHECK-NEXT: {{  $}}
132  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
133  ; CHECK-NEXT:   V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
134  ; CHECK-NEXT:   %0.sub1:vreg_64 = COPY %0.sub0
135  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store (s64), addrspace 3)
136  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
137  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 0
138  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
139  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
140  ; CHECK-NEXT:   S_BRANCH %bb.2
141  bb.0:
142    successors: %bb.2
143
144    $sgpr4_sgpr5 = S_MOV_B64 -1
145    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
146    S_BRANCH %bb.2
147
148  bb.1:
149    S_ENDPGM 0
150
151  bb.2:
152    successors: %bb.1, %bb.2
153    liveins: $sgpr4_sgpr5
154
155    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
156    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
157    %5.sub1:vreg_64 = COPY %5.sub0
158    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
159    ATOMIC_FENCE 4, 2
160    $sgpr4_sgpr5 = S_MOV_B64 0
161    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
162    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
163    S_BRANCH %bb.2
164
165...
166
167---
168name:            fold_andn2_select_condition_live_out_phi_physreg_reorder
169tracksRegLiveness: true
170body:             |
171  ; CHECK-LABEL: name: fold_andn2_select_condition_live_out_phi_physreg_reorder
172  ; CHECK: bb.0:
173  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
174  ; CHECK-NEXT: {{  $}}
175  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 -1
176  ; CHECK-NEXT:   undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
177  ; CHECK-NEXT:   S_BRANCH %bb.2
178  ; CHECK-NEXT: {{  $}}
179  ; CHECK-NEXT: bb.1:
180  ; CHECK-NEXT:   S_ENDPGM 0
181  ; CHECK-NEXT: {{  $}}
182  ; CHECK-NEXT: bb.2:
183  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
184  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
185  ; CHECK-NEXT: {{  $}}
186  ; CHECK-NEXT:   %0.sub1:vreg_64 = COPY %0.sub0
187  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store (s64), addrspace 3)
188  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
189  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, $sgpr4_sgpr5, implicit-def dead $scc
190  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 0
191  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
192  ; CHECK-NEXT:   S_BRANCH %bb.2
193  bb.0:
194    successors: %bb.2
195
196    $sgpr4_sgpr5 = S_MOV_B64 -1
197    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
198    S_BRANCH %bb.2
199
200  bb.1:
201    S_ENDPGM 0
202
203  bb.2:
204    successors: %bb.1, %bb.2
205    liveins: $sgpr4_sgpr5
206
207    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
208    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
209    %5.sub1:vreg_64 = COPY %5.sub0
210    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
211    ATOMIC_FENCE 4, 2
212    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
213    $sgpr4_sgpr5 = S_MOV_B64 0
214    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
215    S_BRANCH %bb.2
216
217...
218