1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; Disabled endcf collapse at -O0.
4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -O0 -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN-O0 %s
5
6; GCN-LABEL: {{^}}simple_nested_if:
7; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
8; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
9; GCN:      s_and_b64 exec, exec, vcc
10; GCN-NEXT: s_cbranch_execz [[ENDIF]]
11; GCN-NEXT: ; %bb.{{[0-9]+}}:
12; GCN:      store_dword
13; GCN-NEXT: {{^}}[[ENDIF]]:
14; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
15; GCN: ds_write_b32
16; GCN: s_endpgm
17;
18; GCN-O0-LABEL: {{^}}simple_nested_if:
19; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
20; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
21; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
22; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
23; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
24; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
25; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
26; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
27; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
28; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
29; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
30; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
31; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
32; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
33; GCN-O0:      store_dword
34; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
35; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
36; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
37; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
38; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
39; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
40; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
41; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
42; GCN-O0:      ds_write_b32
43; GCN-O0:      s_endpgm
44;
45define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
46bb:
47  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
48  %tmp1 = icmp ugt i32 %tmp, 1
49  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
50
51bb.outer.then:                                    ; preds = %bb
52  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
53  store i32 0, i32 addrspace(1)* %tmp4, align 4
54  %tmp5 = icmp eq i32 %tmp, 2
55  br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
56
57bb.inner.then:                                    ; preds = %bb.outer.then
58  %tmp7 = add i32 %tmp, 1
59  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
60  store i32 1, i32 addrspace(1)* %tmp9, align 4
61  br label %bb.outer.end
62
63bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
64  store i32 3, i32 addrspace(3)* null
65  ret void
66}
67
68; GCN-LABEL: {{^}}uncollapsable_nested_if:
69; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
70; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
71; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
72; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
73; GCN-NEXT: ; %bb.{{[0-9]+}}:
74; GCN:      store_dword
75; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
76; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
77; GCN:      store_dword
78; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
79; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
80; GCN: ds_write_b32
81; GCN: s_endpgm
82;
83; GCN-O0-LABEL: {{^}}uncollapsable_nested_if:
84; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
85; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
86; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
87; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
88; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
89; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
90; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
91; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
92; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
93; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
94; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
95; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
96; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
97; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
98; GCN-O0:      store_dword
99; GCN-O0-NEXT: s_branch [[ENDIF_INNER]]
100; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
101; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
102; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
103; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
104; GCN-O0-NEXT: s_branch [[LAST_BB:.LBB[0-9_]+]]
105; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
106; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
107; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
108; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
109; GCN-O0:      s_branch [[ENDIF_OUTER]]
110; GCN-O0-NEXT: {{^}}[[LAST_BB]]:
111; GCN-O0:      ds_write_b32
112; GCN-O0:      s_endpgm
113;
114define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
115bb:
116  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
117  %tmp1 = icmp ugt i32 %tmp, 1
118  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
119
120bb.outer.then:                                    ; preds = %bb
121  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
122  store i32 0, i32 addrspace(1)* %tmp4, align 4
123  %tmp5 = icmp eq i32 %tmp, 2
124  br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
125
126bb.inner.then:                                    ; preds = %bb.outer.then
127  %tmp7 = add i32 %tmp, 1
128  %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
129  store i32 1, i32 addrspace(1)* %tmp8, align 4
130  br label %bb.inner.end
131
132bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
133  %tmp9 = add i32 %tmp, 2
134  %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
135  store i32 2, i32 addrspace(1)* %tmp10, align 4
136  br label %bb.outer.end
137
138bb.outer.end:                                     ; preds = %bb.inner.then, %bb
139  store i32 3, i32 addrspace(3)* null
140  ret void
141}
142
143; GCN-LABEL: {{^}}nested_if_if_else:
144; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
145; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
146; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
147; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
148; GCN-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]]
149; GCN-NEXT: ; %bb.{{[0-9]+}}:
150; GCN:      store_dword
151; GCN:      {{^}}[[THEN_INNER]]:
152; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
153; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
154; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
155; GCN:      store_dword
156; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
157; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
158; GCN: ds_write_b32
159; GCN: s_endpgm
160;
161; GCN-O0-LABEL: {{^}}nested_if_if_else:
162; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
163; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
164; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
165; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
166; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
167; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
168; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
169; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
170; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
171; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
172; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_0:[0-9]+]]
173; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_1:[0-9]+]]
174; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
175; GCN-O0-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]]
176; GCN-O0-NEXT: s_branch [[TEMP_BB:.LBB[0-9_]+]]
177; GCN-O0-NEXT: {{^}}[[THEN_INNER]]:
178; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_0]]
179; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_1]]
180; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
181; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
182; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
183; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
184; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}]
185; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
186; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
187; GCN-O0:      store_dword
188; GCN-O0-NEXT: s_branch [[ENDIF_INNER]]
189; GCN-O0-NEXT: {{^}}[[TEMP_BB]]:
190; GCN-O0:      s_branch [[THEN_INNER]]
191; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
192; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
193; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
194; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
195; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
196; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
197; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
198; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
199; GCN-O0:      ds_write_b32
200; GCN-O0:      s_endpgm
201;
202define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
203bb:
204  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
205  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
206  store i32 0, i32 addrspace(1)* %tmp1, align 4
207  %tmp2 = icmp ugt i32 %tmp, 1
208  br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
209
210bb.outer.then:                                       ; preds = %bb
211  %tmp5 = icmp eq i32 %tmp, 2
212  br i1 %tmp5, label %bb.then, label %bb.else
213
214bb.then:                                             ; preds = %bb.outer.then
215  %tmp3 = add i32 %tmp, 1
216  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
217  store i32 1, i32 addrspace(1)* %tmp4, align 4
218  br label %bb.outer.end
219
220bb.else:                                             ; preds = %bb.outer.then
221  %tmp7 = add i32 %tmp, 2
222  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
223  store i32 2, i32 addrspace(1)* %tmp9, align 4
224  br label %bb.outer.end
225
226bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
227  store i32 3, i32 addrspace(3)* null
228  ret void
229}
230
231; GCN-LABEL: {{^}}nested_if_else_if:
232; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
233; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
234; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]]
235; GCN-NEXT: ; %bb.{{[0-9]+}}:
236; GCN:      store_dword
237; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
238; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]]
239; GCN-NEXT: ; %bb.{{[0-9]+}}:
240; GCN:      store_dword
241; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
242; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
243; GCN:      {{^}}[[THEN_OUTER]]:
244; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
245; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
246; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
247; GCN-NEXT: ; %bb.{{[0-9]+}}:
248; GCN:      store_dword
249; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]],
250; GCN-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]]
251; GCN-NEXT: ; %bb.{{[0-9]+}}:
252; GCN:      store_dword
253; GCN-NEXT: [[FLOW1]]:
254; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_ELSE]]
255; GCN:      s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
256; GCN:      ds_write_b32
257; GCN:      s_endpgm
258;
259; GCN-O0-LABEL: {{^}}nested_if_else_if:
260; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
261; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
262; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
263; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
264; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
265; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
266; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]]
267; GCN-O0-NEXT: s_branch [[INNER_IF_OUTER_ELSE:.LBB[0-9_]+]]
268; GCN-O0-NEXT: {{^}}[[THEN_OUTER]]:
269; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
270; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
271; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
272; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
273; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_0:[0-9]+]]
274; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_1:[0-9]+]]
275; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}]
276; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
277; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
278; GCN-O0:      store_dword
279; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
280; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_0:[0-9]+]]
281; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_1:[0-9]+]]
282; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
283; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
284; GCN-O0-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]]
285; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
286; GCN-O0:      store_dword
287; GCN-O0-NEXT: s_branch [[FLOW1]]
288; GCN-O0-NEXT: {{^}}[[INNER_IF_OUTER_ELSE]]
289; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
290; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_0:[0-9]+]]
291; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_1:[0-9]+]]
292; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
293; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
294; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]]
295; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
296; GCN-O0:      store_dword
297; GCN-O0-NEXT: {{^}}[[THEN_OUTER_FLOW]]
298; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_0]]
299; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_1]]
300; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
301; GCN-O0-NEXT: s_branch [[THEN_OUTER]]
302; GCN-O0-NEXT: {{^}}[[FLOW1]]
303; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_0]]
304; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_1]]
305; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
306; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]
307; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_0]]
308; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_1]]
309; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
310; GCN-O0:      ds_write_b32
311; GCN-O0:      s_endpgm
312;
313define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
314bb:
315  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
316  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
317  store i32 0, i32 addrspace(1)* %tmp1, align 4
318  %cc1 = icmp ugt i32 %tmp, 1
319  br i1 %cc1, label %bb.outer.then, label %bb.outer.else
320
321bb.outer.then:
322  %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
323  store i32 1, i32 addrspace(1)* %tmp2, align 4
324  %cc2 = icmp eq i32 %tmp, 2
325  br i1 %cc2, label %bb.inner.then, label %bb.outer.end
326
327bb.inner.then:
328  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
329  store i32 2, i32 addrspace(1)* %tmp3, align 4
330  br label %bb.outer.end
331
332bb.outer.else:
333  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
334  store i32 3, i32 addrspace(1)* %tmp4, align 4
335  %cc3 = icmp eq i32 %tmp, 2
336  br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
337
338bb.inner.then2:
339  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
340  store i32 4, i32 addrspace(1)* %tmp5, align 4
341  br label %bb.outer.end
342
343bb.outer.end:
344  store i32 3, i32 addrspace(3)* null
345  ret void
346}
347
348; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
349; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
350; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
351; GCN-NEXT: ; %bb.{{[0-9]+}}:
352; GCN:      store_dword
353; GCN-NEXT: {{^}}[[ENDIF]]:
354; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
355; GCN:      s_barrier
356; GCN-NEXT: s_endpgm
357;
358; GCN-O0-LABEL: {{^}}s_endpgm_unsafe_barrier:
359; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
360; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[SPILL_LANE_0:[0-9]+]]
361; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[SPILL_LANE_1:[0-9]+]]
362; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
363; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
364; GCN-O0-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
365; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
366; GCN-O0:      store_dword
367; GCN-O0-NEXT: {{^}}[[ENDIF]]:
368; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_0]]
369; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_1]]
370; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
371; GCN-O0:      s_barrier
372; GCN-O0:      s_endpgm
373;
374define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
375bb:
376  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
377  %tmp1 = icmp ugt i32 %tmp, 1
378  br i1 %tmp1, label %bb.then, label %bb.end
379
380bb.then:                                          ; preds = %bb
381  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
382  store i32 0, i32 addrspace(1)* %tmp4, align 4
383  br label %bb.end
384
385bb.end:                                           ; preds = %bb.then, %bb
386  call void @llvm.amdgcn.s.barrier()
387  ret void
388}
389
390; GCN-LABEL: {{^}}scc_liveness:
391
392; GCN: [[BB1_OUTER_LOOP:.LBB[0-9]+_[0-9]+]]:
393; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
394;
395; GCN: [[BB1_INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
396; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
397; GCN: s_andn2_b64
398; GCN-NEXT: s_cbranch_execz
399
400; GCN: [[BB1_LOOP:.LBB[0-9]+_[0-9]+]]:
401; GCN: s_andn2_b64 exec, exec,
402; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
403
404; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen
405
406; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER]], {{vcc|s\[[0-9:]+\]}}
407; GCN-NEXT: s_cbranch_execz [[BB1_OUTER_LOOP]]
408
409; GCN-NOT: s_or_b64 exec, exec
410
411; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
412; GCN: buffer_store_dword
413; GCN: buffer_store_dword
414; GCN: buffer_store_dword
415; GCN: buffer_store_dword
416; GCN: s_setpc_b64
417;
418; GCN-O0-LABEL: {{^}}scc_liveness:
419; GCN-O0-COUNT-2: buffer_store_dword
420; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0:[0-9]+]]
421; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1:[0-9]+]]
422; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]]
423; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]]
424; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
425; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
426; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
427; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
428; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
429; GCN-O0: buffer_load_dword
430; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]]
431; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]]
432; GCN-O0:      s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
433; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0:[0-9]+]]
434; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1:[0-9]+]]
435; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
436; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
437; GCN-O0-NEXT: s_mov_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
438; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
439; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
440; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
441; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]]
442; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
443; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0]]
444; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1]]
445; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
446; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
447; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_0:[0-9]+]]
448; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_1:[0-9]+]]
449; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
450; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
451; GCN-O0-NEXT: s_cbranch_execz [[FLOW2:.LBB[0-9_]+]]
452; GCN-O0: {{^}}[[FLOW2]]:
453; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_0]]
454; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_1]]
455; GCN-O0:      s_branch [[FLOW:.LBB[0-9_]+]]
456; GCN-O0: {{^}}[[FLOW]]:
457; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
458; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_0:[0-9]+]]
459; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_1:[0-9]+]]
460; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
461; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
462; GCN-O0-NEXT: s_cbranch_execz [[FLOW3:.LBB[0-9_]+]]
463; GCN-O0:      ; %bb.{{[0-9]+}}:
464; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_0:[0-9]+]]
465; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_1:[0-9]+]]
466; GCN-O0: {{^}}[[FLOW3]]:
467; GCN-O0-COUNT-4: buffer_load_dword
468; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_0]]
469; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_1]]
470; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_0]]
471; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_1]]
472; GCN-O0:      s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
473; GCN-O0-NEXT: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
474; GCN-O0-COUNT-2: s_mov_b64
475; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
476; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
477; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
478; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
479; GCN-O0-COUNT-4: buffer_store_dword
480; GCN-O0:      s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
481; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]]
482; GCN-O0:      ; %bb.{{[0-9]+}}:
483; GCN-O0-COUNT-4: buffer_store_dword
484; GCN-O0:     s_setpc_b64
485;
486define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
487bb:
488  br label %bb1
489
490bb1:                                              ; preds = %Flow1, %bb1, %bb
491  %tmp = icmp slt i32 %arg, 519
492  br i1 %tmp, label %bb2, label %bb1
493
494bb2:                                              ; preds = %bb1
495  %tmp3 = icmp eq i32 %arg, 0
496  br i1 %tmp3, label %bb4, label %bb10
497
498bb4:                                              ; preds = %bb2
499  %tmp6 = load float, float addrspace(5)* undef
500  %tmp7 = fcmp olt float %tmp6, 0.0
501  br i1 %tmp7, label %bb8, label %Flow
502
503bb8:                                              ; preds = %bb4
504  %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
505  br label %Flow
506
507Flow:                                             ; preds = %bb8, %bb4
508  %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
509  br label %bb10
510
511bb10:                                             ; preds = %Flow, %bb2
512  %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
513  br i1 %tmp3, label %bb12, label %Flow1
514
515Flow1:                                            ; preds = %bb10
516  br label %bb1
517
518bb12:                                             ; preds = %bb10
519  store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
520  ret void
521}
522
523declare i32 @llvm.amdgcn.workitem.id.x() #0
524declare void @llvm.amdgcn.s.barrier() #1
525
526attributes #0 = { nounwind readnone speculatable }
527attributes #1 = { nounwind convergent }
528attributes #2 = { nounwind }
529