1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; Disabled endcf collapse at -O0.
4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -O0 -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN-O0 %s
5
6; GCN-LABEL: {{^}}simple_nested_if:
7; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
8; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
9; GCN:      s_and_b64 exec, exec, vcc
10; GCN-NEXT: s_cbranch_execz [[ENDIF]]
11; GCN-NEXT: ; %bb.{{[0-9]+}}:
12; GCN:      store_dword
13; GCN-NEXT: {{^}}[[ENDIF]]:
14; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
15; GCN: ds_write_b32
16; GCN: s_endpgm
17;
18; GCN-O0-LABEL: {{^}}simple_nested_if:
19; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
20; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
21; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
22; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
23; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
24; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
25; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
26; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
27; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
28; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
29; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
30; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
31; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
32; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
33; GCN-O0:      store_dword
34; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
35; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
36; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
37; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
38; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
39; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
40; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
41; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
42; GCN-O0:      ds_write_b32
43; GCN-O0:      s_endpgm
44;
45define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
46bb:
47  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
48  %tmp1 = icmp ugt i32 %tmp, 1
49  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
50
51bb.outer.then:                                    ; preds = %bb
52  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
53  store i32 0, i32 addrspace(1)* %tmp4, align 4
54  %tmp5 = icmp eq i32 %tmp, 2
55  br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
56
57bb.inner.then:                                    ; preds = %bb.outer.then
58  %tmp7 = add i32 %tmp, 1
59  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
60  store i32 1, i32 addrspace(1)* %tmp9, align 4
61  br label %bb.outer.end
62
63bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
64  store i32 3, i32 addrspace(3)* null
65  ret void
66}
67
68; GCN-LABEL: {{^}}uncollapsable_nested_if:
69; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
70; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
71; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
72; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
73; GCN-NEXT: ; %bb.{{[0-9]+}}:
74; GCN:      store_dword
75; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
76; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
77; GCN:      store_dword
78; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
79; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
80; GCN: ds_write_b32
81; GCN: s_endpgm
82;
83; GCN-O0-LABEL: {{^}}uncollapsable_nested_if:
84; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
85; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
86; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
87; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
88; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
89; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
90; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
91; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
92; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
93; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
94; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
95; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
96; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
97; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
98; GCN-O0:      store_dword
99; GCN-O0-NEXT: s_branch [[ENDIF_INNER]]
100; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
101; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
102; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
103; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
104; GCN-O0-NEXT: s_branch [[LAST_BB:.LBB[0-9_]+]]
105; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
106; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
107; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
108; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
109; GCN-O0:      s_branch [[ENDIF_OUTER]]
110; GCN-O0-NEXT: {{^}}[[LAST_BB]]:
111; GCN-O0:      ds_write_b32
112; GCN-O0:      s_endpgm
113;
114define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
115bb:
116  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
117  %tmp1 = icmp ugt i32 %tmp, 1
118  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
119
120bb.outer.then:                                    ; preds = %bb
121  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
122  store i32 0, i32 addrspace(1)* %tmp4, align 4
123  %tmp5 = icmp eq i32 %tmp, 2
124  br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
125
126bb.inner.then:                                    ; preds = %bb.outer.then
127  %tmp7 = add i32 %tmp, 1
128  %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
129  store i32 1, i32 addrspace(1)* %tmp8, align 4
130  br label %bb.inner.end
131
132bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
133  %tmp9 = add i32 %tmp, 2
134  %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
135  store i32 2, i32 addrspace(1)* %tmp10, align 4
136  br label %bb.outer.end
137
138bb.outer.end:                                     ; preds = %bb.inner.then, %bb
139  store i32 3, i32 addrspace(3)* null
140  ret void
141}
142
143; GCN-LABEL: {{^}}nested_if_if_else:
144; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
145; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
146; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
147; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
148; GCN-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]]
149; GCN-NEXT: ; %bb.{{[0-9]+}}:
150; GCN:      store_dword
151; GCN:      {{^}}[[THEN_INNER]]:
152; GCN-NEXT: s_andn2_saveexec_b64 [[SAVEEXEC_INNER2]], [[SAVEEXEC_INNER2]]
153; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
154; GCN:      store_dword
155; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
156; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
157; GCN: ds_write_b32
158; GCN: s_endpgm
159;
160; GCN-O0-LABEL: {{^}}nested_if_if_else:
161; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
162; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
163; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
164; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
165; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
166; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
167; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
168; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
169; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
170; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
171; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_0:[0-9]+]]
172; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_1:[0-9]+]]
173; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
174; GCN-O0-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]]
175; GCN-O0-NEXT: s_branch [[TEMP_BB:.LBB[0-9_]+]]
176; GCN-O0-NEXT: {{^}}[[THEN_INNER]]:
177; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_0]]
178; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_1]]
179; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
180; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
181; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]]
182; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]]
183; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}]
184; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
185; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
186; GCN-O0:      store_dword
187; GCN-O0-NEXT: s_branch [[ENDIF_INNER]]
188; GCN-O0-NEXT: {{^}}[[TEMP_BB]]:
189; GCN-O0:      s_branch [[THEN_INNER]]
190; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]:
191; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]]
192; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]]
193; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
194; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]:
195; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
196; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
197; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
198; GCN-O0:      ds_write_b32
199; GCN-O0:      s_endpgm
200;
201define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
202bb:
203  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
204  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
205  store i32 0, i32 addrspace(1)* %tmp1, align 4
206  %tmp2 = icmp ugt i32 %tmp, 1
207  br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
208
209bb.outer.then:                                       ; preds = %bb
210  %tmp5 = icmp eq i32 %tmp, 2
211  br i1 %tmp5, label %bb.then, label %bb.else
212
213bb.then:                                             ; preds = %bb.outer.then
214  %tmp3 = add i32 %tmp, 1
215  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
216  store i32 1, i32 addrspace(1)* %tmp4, align 4
217  br label %bb.outer.end
218
219bb.else:                                             ; preds = %bb.outer.then
220  %tmp7 = add i32 %tmp, 2
221  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
222  store i32 2, i32 addrspace(1)* %tmp9, align 4
223  br label %bb.outer.end
224
225bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
226  store i32 3, i32 addrspace(3)* null
227  ret void
228}
229
230; GCN-LABEL: {{^}}nested_if_else_if:
231; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
232; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
233; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]]
234; GCN-NEXT: ; %bb.{{[0-9]+}}:
235; GCN:      store_dword
236; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
237; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]]
238; GCN-NEXT: ; %bb.{{[0-9]+}}:
239; GCN:      store_dword
240; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
241; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
242; GCN:      {{^}}[[THEN_OUTER]]:
243; GCN-NEXT: s_andn2_saveexec_b64 [[SAVEEXEC_OUTER2]], [[SAVEEXEC_OUTER2]]
244; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
245; GCN-NEXT: ; %bb.{{[0-9]+}}:
246; GCN:      store_dword
247; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]],
248; GCN-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]]
249; GCN-NEXT: ; %bb.{{[0-9]+}}:
250; GCN:      store_dword
251; GCN-NEXT: [[FLOW1]]:
252; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_ELSE]]
253; GCN:      s_or_b64 exec, exec, [[SAVEEXEC_OUTER2]]
254; GCN:      ds_write_b32
255; GCN:      s_endpgm
256;
257; GCN-O0-LABEL: {{^}}nested_if_else_if:
258; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
259; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
260; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
261; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]]
262; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]]
263; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
264; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]]
265; GCN-O0-NEXT: s_branch [[INNER_IF_OUTER_ELSE:.LBB[0-9_]+]]
266; GCN-O0-NEXT: {{^}}[[THEN_OUTER]]:
267; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]]
268; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]]
269; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
270; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
271; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_0:[0-9]+]]
272; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_1:[0-9]+]]
273; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}]
274; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
275; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
276; GCN-O0:      store_dword
277; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
278; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_0:[0-9]+]]
279; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_1:[0-9]+]]
280; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
281; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
282; GCN-O0-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]]
283; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
284; GCN-O0:      store_dword
285; GCN-O0-NEXT: s_branch [[FLOW1]]
286; GCN-O0-NEXT: {{^}}[[INNER_IF_OUTER_ELSE]]
287; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
288; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_0:[0-9]+]]
289; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_1:[0-9]+]]
290; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
291; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
292; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]]
293; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
294; GCN-O0:      store_dword
295; GCN-O0-NEXT: {{^}}[[THEN_OUTER_FLOW]]
296; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_0]]
297; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_1]]
298; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
299; GCN-O0-NEXT: s_branch [[THEN_OUTER]]
300; GCN-O0-NEXT: {{^}}[[FLOW1]]
301; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_0]]
302; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_1]]
303; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
304; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]
305; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_0]]
306; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_1]]
307; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
308; GCN-O0:      ds_write_b32
309; GCN-O0:      s_endpgm
310;
311define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
312bb:
313  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
314  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
315  store i32 0, i32 addrspace(1)* %tmp1, align 4
316  %cc1 = icmp ugt i32 %tmp, 1
317  br i1 %cc1, label %bb.outer.then, label %bb.outer.else
318
319bb.outer.then:
320  %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
321  store i32 1, i32 addrspace(1)* %tmp2, align 4
322  %cc2 = icmp eq i32 %tmp, 2
323  br i1 %cc2, label %bb.inner.then, label %bb.outer.end
324
325bb.inner.then:
326  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
327  store i32 2, i32 addrspace(1)* %tmp3, align 4
328  br label %bb.outer.end
329
330bb.outer.else:
331  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
332  store i32 3, i32 addrspace(1)* %tmp4, align 4
333  %cc3 = icmp eq i32 %tmp, 2
334  br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
335
336bb.inner.then2:
337  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
338  store i32 4, i32 addrspace(1)* %tmp5, align 4
339  br label %bb.outer.end
340
341bb.outer.end:
342  store i32 3, i32 addrspace(3)* null
343  ret void
344}
345
346; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
347; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
348; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
349; GCN-NEXT: ; %bb.{{[0-9]+}}:
350; GCN:      store_dword
351; GCN-NEXT: {{^}}[[ENDIF]]:
352; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
353; GCN:      s_barrier
354; GCN-NEXT: s_endpgm
355;
356; GCN-O0-LABEL: {{^}}s_endpgm_unsafe_barrier:
357; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
358; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[SPILL_LANE_0:[0-9]+]]
359; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[SPILL_LANE_1:[0-9]+]]
360; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}]
361; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
362; GCN-O0-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
363; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
364; GCN-O0:      store_dword
365; GCN-O0-NEXT: {{^}}[[ENDIF]]:
366; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_0]]
367; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_1]]
368; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
369; GCN-O0:      s_barrier
370; GCN-O0:      s_endpgm
371;
372define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
373bb:
374  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
375  %tmp1 = icmp ugt i32 %tmp, 1
376  br i1 %tmp1, label %bb.then, label %bb.end
377
378bb.then:                                          ; preds = %bb
379  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
380  store i32 0, i32 addrspace(1)* %tmp4, align 4
381  br label %bb.end
382
383bb.end:                                           ; preds = %bb.then, %bb
384  call void @llvm.amdgcn.s.barrier()
385  ret void
386}
387
388; GCN-LABEL: {{^}}scc_liveness:
389
390; GCN: [[BB1_OUTER_LOOP:.LBB[0-9]+_[0-9]+]]:
391; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
392;
393; GCN: [[BB1_INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
394; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
395; GCN: s_andn2_b64
396; GCN-NEXT: s_cbranch_execz
397
398; GCN: [[BB1_LOOP:.LBB[0-9]+_[0-9]+]]:
399; GCN: s_andn2_b64 exec, exec,
400; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
401
402; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen
403
404; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER]], {{vcc|s\[[0-9:]+\]}}
405; GCN-NEXT: s_cbranch_execz [[BB1_OUTER_LOOP]]
406
407; GCN-NOT: s_or_b64 exec, exec
408
409; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
410; GCN: buffer_store_dword
411; GCN: buffer_store_dword
412; GCN: buffer_store_dword
413; GCN: buffer_store_dword
414; GCN: s_setpc_b64
415;
416; GCN-O0-LABEL: {{^}}scc_liveness:
417; GCN-O0-COUNT-2: buffer_store_dword
418; GCN-O0-DAG:  v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0:[0-9]+]]
419; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1:[0-9]+]]
420; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]]
421; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]]
422; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
423; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
424; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
425; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
426; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
427; GCN-O0: buffer_load_dword
428; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]]
429; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]]
430; GCN-O0:      s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
431; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0:[0-9]+]]
432; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1:[0-9]+]]
433; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
434; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
435; GCN-O0-NEXT: s_mov_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]
436; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
437; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
438; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
439; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]]
440; GCN-O0-NEXT: ; %bb.{{[0-9]+}}:
441; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0]]
442; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1]]
443; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}]
444; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
445; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_0:[0-9]+]]
446; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_1:[0-9]+]]
447; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
448; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
449; GCN-O0-NEXT: s_cbranch_execz [[FLOW2:.LBB[0-9_]+]]
450; GCN-O0: {{^}}[[FLOW2]]:
451; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_0]]
452; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_1]]
453; GCN-O0:      s_branch [[FLOW:.LBB[0-9_]+]]
454; GCN-O0: {{^}}[[FLOW]]:
455; GCN-O0:      s_mov_b64 s[{{[0-9:]+}}], exec
456; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_0:[0-9]+]]
457; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_1:[0-9]+]]
458; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
459; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}]
460; GCN-O0-NEXT: s_cbranch_execz [[FLOW3:.LBB[0-9_]+]]
461; GCN-O0:      ; %bb.{{[0-9]+}}:
462; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_0:[0-9]+]]
463; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_1:[0-9]+]]
464; GCN-O0: {{^}}[[FLOW3]]:
465; GCN-O0-COUNT-4: buffer_load_dword
466; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_0]]
467; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_1]]
468; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_0]]
469; GCN-O0-DAG:  v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_1]]
470; GCN-O0:      s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
471; GCN-O0-NEXT: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
472; GCN-O0-COUNT-2: s_mov_b64
473; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
474; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
475; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
476; GCN-O0-DAG:  v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
477; GCN-O0-COUNT-4: buffer_store_dword
478; GCN-O0:      s_andn2_b64 exec, exec, s[{{[0-9:]+}}]
479; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]]
480; GCN-O0:      ; %bb.{{[0-9]+}}:
481; GCN-O0-COUNT-4: buffer_store_dword
482; GCN-O0:     s_setpc_b64
483;
484define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
485bb:
486  br label %bb1
487
488bb1:                                              ; preds = %Flow1, %bb1, %bb
489  %tmp = icmp slt i32 %arg, 519
490  br i1 %tmp, label %bb2, label %bb1
491
492bb2:                                              ; preds = %bb1
493  %tmp3 = icmp eq i32 %arg, 0
494  br i1 %tmp3, label %bb4, label %bb10
495
496bb4:                                              ; preds = %bb2
497  %tmp6 = load float, float addrspace(5)* undef
498  %tmp7 = fcmp olt float %tmp6, 0.0
499  br i1 %tmp7, label %bb8, label %Flow
500
501bb8:                                              ; preds = %bb4
502  %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
503  br label %Flow
504
505Flow:                                             ; preds = %bb8, %bb4
506  %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
507  br label %bb10
508
509bb10:                                             ; preds = %Flow, %bb2
510  %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
511  br i1 %tmp3, label %bb12, label %Flow1
512
513Flow1:                                            ; preds = %bb10
514  br label %bb1
515
516bb12:                                             ; preds = %bb10
517  store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
518  ret void
519}
520
521declare i32 @llvm.amdgcn.workitem.id.x() #0
522declare void @llvm.amdgcn.s.barrier() #1
523
524attributes #0 = { nounwind readnone speculatable }
525attributes #1 = { nounwind convergent }
526attributes #2 = { nounwind }
527