1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; Disabled endcf collapse at -O0. 4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -O0 -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN-O0 %s 5 6; GCN-LABEL: {{^}}simple_nested_if: 7; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 8; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]] 9; GCN: s_and_b64 exec, exec, vcc 10; GCN-NEXT: s_cbranch_execz [[ENDIF]] 11; GCN-NEXT: ; %bb.{{[0-9]+}}: 12; GCN: store_dword 13; GCN-NEXT: {{^}}[[ENDIF]]: 14; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 15; GCN: ds_write_b32 16; GCN: s_endpgm 17; 18; GCN-O0-LABEL: {{^}}simple_nested_if: 19; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 20; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 21; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 22; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 23; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 24; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 25; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 26; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 27; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] 28; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] 29; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 30; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 31; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 32; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 33; GCN-O0: store_dword 34; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]: 35; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]] 36; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]] 37; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 38; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]: 39; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 40; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 41; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 42; GCN-O0: ds_write_b32 43; GCN-O0: s_endpgm 44; 45define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) { 46bb: 47 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 48 %tmp1 = icmp ugt i32 %tmp, 1 49 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 50 51bb.outer.then: ; preds = %bb 52 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 53 store i32 0, i32 addrspace(1)* %tmp4, align 4 54 %tmp5 = icmp eq i32 %tmp, 2 55 br i1 %tmp5, label %bb.outer.end, label %bb.inner.then 56 57bb.inner.then: ; preds = %bb.outer.then 58 %tmp7 = add i32 %tmp, 1 59 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 60 store i32 1, i32 addrspace(1)* %tmp9, align 4 61 br label %bb.outer.end 62 63bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb 64 store i32 3, i32 addrspace(3)* null 65 ret void 66} 67 68; GCN-LABEL: {{^}}uncollapsable_nested_if: 69; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 70; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 71; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 72; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 73; GCN-NEXT: ; %bb.{{[0-9]+}}: 74; GCN: store_dword 75; GCN-NEXT: {{^}}[[ENDIF_INNER]]: 76; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]] 77; GCN: store_dword 78; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 79; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]] 80; GCN: ds_write_b32 81; GCN: s_endpgm 82; 83; GCN-O0-LABEL: {{^}}uncollapsable_nested_if: 84; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 85; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 86; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 87; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 88; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 89; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 90; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 91; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 92; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] 93; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] 94; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 95; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 96; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 97; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 98; GCN-O0: store_dword 99; GCN-O0-NEXT: s_branch [[ENDIF_INNER]] 100; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]: 101; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 102; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 103; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 104; GCN-O0-NEXT: s_branch [[LAST_BB:.LBB[0-9_]+]] 105; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]: 106; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]] 107; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]] 108; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 109; GCN-O0: s_branch [[ENDIF_OUTER]] 110; GCN-O0-NEXT: {{^}}[[LAST_BB]]: 111; GCN-O0: ds_write_b32 112; GCN-O0: s_endpgm 113; 114define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) { 115bb: 116 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 117 %tmp1 = icmp ugt i32 %tmp, 1 118 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 119 120bb.outer.then: ; preds = %bb 121 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 122 store i32 0, i32 addrspace(1)* %tmp4, align 4 123 %tmp5 = icmp eq i32 %tmp, 2 124 br i1 %tmp5, label %bb.inner.end, label %bb.inner.then 125 126bb.inner.then: ; preds = %bb.outer.then 127 %tmp7 = add i32 %tmp, 1 128 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 129 store i32 1, i32 addrspace(1)* %tmp8, align 4 130 br label %bb.inner.end 131 132bb.inner.end: ; preds = %bb.inner.then, %bb.outer.then 133 %tmp9 = add i32 %tmp, 2 134 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9 135 store i32 2, i32 addrspace(1)* %tmp10, align 4 136 br label %bb.outer.end 137 138bb.outer.end: ; preds = %bb.inner.then, %bb 139 store i32 3, i32 addrspace(3)* null 140 ret void 141} 142 143; GCN-LABEL: {{^}}nested_if_if_else: 144; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 145; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 146; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 147; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]] 148; GCN-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]] 149; GCN-NEXT: ; %bb.{{[0-9]+}}: 150; GCN: store_dword 151; GCN: {{^}}[[THEN_INNER]]: 152; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]] 153; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]] 154; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 155; GCN: store_dword 156; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 157; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]] 158; GCN: ds_write_b32 159; GCN: s_endpgm 160; 161; GCN-O0-LABEL: {{^}}nested_if_if_else: 162; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 163; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 164; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 165; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 166; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 167; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 168; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 169; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 170; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 171; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 172; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_0:[0-9]+]] 173; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_1:[0-9]+]] 174; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 175; GCN-O0-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]] 176; GCN-O0-NEXT: s_branch [[TEMP_BB:.LBB[0-9_]+]] 177; GCN-O0-NEXT: {{^}}[[THEN_INNER]]: 178; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_0]] 179; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_1]] 180; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] 181; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] 182; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] 183; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] 184; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}] 185; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 186; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 187; GCN-O0: store_dword 188; GCN-O0-NEXT: s_branch [[ENDIF_INNER]] 189; GCN-O0-NEXT: {{^}}[[TEMP_BB]]: 190; GCN-O0: s_branch [[THEN_INNER]] 191; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]: 192; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]] 193; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]] 194; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 195; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]: 196; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 197; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 198; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 199; GCN-O0: ds_write_b32 200; GCN-O0: s_endpgm 201; 202define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) { 203bb: 204 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 205 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 206 store i32 0, i32 addrspace(1)* %tmp1, align 4 207 %tmp2 = icmp ugt i32 %tmp, 1 208 br i1 %tmp2, label %bb.outer.then, label %bb.outer.end 209 210bb.outer.then: ; preds = %bb 211 %tmp5 = icmp eq i32 %tmp, 2 212 br i1 %tmp5, label %bb.then, label %bb.else 213 214bb.then: ; preds = %bb.outer.then 215 %tmp3 = add i32 %tmp, 1 216 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3 217 store i32 1, i32 addrspace(1)* %tmp4, align 4 218 br label %bb.outer.end 219 220bb.else: ; preds = %bb.outer.then 221 %tmp7 = add i32 %tmp, 2 222 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 223 store i32 2, i32 addrspace(1)* %tmp9, align 4 224 br label %bb.outer.end 225 226bb.outer.end: ; preds = %bb, %bb.then, %bb.else 227 store i32 3, i32 addrspace(3)* null 228 ret void 229} 230 231; GCN-LABEL: {{^}}nested_if_else_if: 232; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 233; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]] 234; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]] 235; GCN-NEXT: ; %bb.{{[0-9]+}}: 236; GCN: store_dword 237; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]] 238; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]] 239; GCN-NEXT: ; %bb.{{[0-9]+}}: 240; GCN: store_dword 241; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]: 242; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]] 243; GCN: {{^}}[[THEN_OUTER]]: 244; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]] 245; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]] 246; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 247; GCN-NEXT: ; %bb.{{[0-9]+}}: 248; GCN: store_dword 249; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]], 250; GCN-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]] 251; GCN-NEXT: ; %bb.{{[0-9]+}}: 252; GCN: store_dword 253; GCN-NEXT: [[FLOW1]]: 254; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_ELSE]] 255; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]] 256; GCN: ds_write_b32 257; GCN: s_endpgm 258; 259; GCN-O0-LABEL: {{^}}nested_if_else_if: 260; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 261; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 262; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 263; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 264; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 265; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 266; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]] 267; GCN-O0-NEXT: s_branch [[INNER_IF_OUTER_ELSE:.LBB[0-9_]+]] 268; GCN-O0-NEXT: {{^}}[[THEN_OUTER]]: 269; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 270; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 271; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] 272; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] 273; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_0:[0-9]+]] 274; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_1:[0-9]+]] 275; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}] 276; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 277; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 278; GCN-O0: store_dword 279; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 280; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_0:[0-9]+]] 281; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_1:[0-9]+]] 282; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 283; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 284; GCN-O0-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]] 285; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 286; GCN-O0: store_dword 287; GCN-O0-NEXT: s_branch [[FLOW1]] 288; GCN-O0-NEXT: {{^}}[[INNER_IF_OUTER_ELSE]] 289; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 290; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_0:[0-9]+]] 291; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_1:[0-9]+]] 292; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 293; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 294; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]] 295; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 296; GCN-O0: store_dword 297; GCN-O0-NEXT: {{^}}[[THEN_OUTER_FLOW]] 298; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_0]] 299; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_1]] 300; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 301; GCN-O0-NEXT: s_branch [[THEN_OUTER]] 302; GCN-O0-NEXT: {{^}}[[FLOW1]] 303; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_0]] 304; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_1]] 305; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 306; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]] 307; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_0]] 308; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_1]] 309; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 310; GCN-O0: ds_write_b32 311; GCN-O0: s_endpgm 312; 313define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) { 314bb: 315 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 316 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 317 store i32 0, i32 addrspace(1)* %tmp1, align 4 318 %cc1 = icmp ugt i32 %tmp, 1 319 br i1 %cc1, label %bb.outer.then, label %bb.outer.else 320 321bb.outer.then: 322 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1 323 store i32 1, i32 addrspace(1)* %tmp2, align 4 324 %cc2 = icmp eq i32 %tmp, 2 325 br i1 %cc2, label %bb.inner.then, label %bb.outer.end 326 327bb.inner.then: 328 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2 329 store i32 2, i32 addrspace(1)* %tmp3, align 4 330 br label %bb.outer.end 331 332bb.outer.else: 333 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3 334 store i32 3, i32 addrspace(1)* %tmp4, align 4 335 %cc3 = icmp eq i32 %tmp, 2 336 br i1 %cc3, label %bb.inner.then2, label %bb.outer.end 337 338bb.inner.then2: 339 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4 340 store i32 4, i32 addrspace(1)* %tmp5, align 4 341 br label %bb.outer.end 342 343bb.outer.end: 344 store i32 3, i32 addrspace(3)* null 345 ret void 346} 347 348; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier: 349; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 350; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]] 351; GCN-NEXT: ; %bb.{{[0-9]+}}: 352; GCN: store_dword 353; GCN-NEXT: {{^}}[[ENDIF]]: 354; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 355; GCN: s_barrier 356; GCN-NEXT: s_endpgm 357; 358; GCN-O0-LABEL: {{^}}s_endpgm_unsafe_barrier: 359; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 360; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[SPILL_LANE_0:[0-9]+]] 361; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[SPILL_LANE_1:[0-9]+]] 362; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 363; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 364; GCN-O0-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]] 365; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 366; GCN-O0: store_dword 367; GCN-O0-NEXT: {{^}}[[ENDIF]]: 368; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_0]] 369; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_1]] 370; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 371; GCN-O0: s_barrier 372; GCN-O0: s_endpgm 373; 374define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) { 375bb: 376 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 377 %tmp1 = icmp ugt i32 %tmp, 1 378 br i1 %tmp1, label %bb.then, label %bb.end 379 380bb.then: ; preds = %bb 381 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 382 store i32 0, i32 addrspace(1)* %tmp4, align 4 383 br label %bb.end 384 385bb.end: ; preds = %bb.then, %bb 386 call void @llvm.amdgcn.s.barrier() 387 ret void 388} 389 390; GCN-LABEL: {{^}}scc_liveness: 391 392; GCN: [[BB1_OUTER_LOOP:.LBB[0-9]+_[0-9]+]]: 393; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 394; 395; GCN: [[BB1_INNER_LOOP:.LBB[0-9]+_[0-9]+]]: 396; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 397; GCN: s_andn2_b64 398; GCN-NEXT: s_cbranch_execz 399 400; GCN: [[BB1_LOOP:.LBB[0-9]+_[0-9]+]]: 401; GCN: s_andn2_b64 exec, exec, 402; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] 403 404; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen 405 406; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER]], {{vcc|s\[[0-9:]+\]}} 407; GCN-NEXT: s_cbranch_execz [[BB1_OUTER_LOOP]] 408 409; GCN-NOT: s_or_b64 exec, exec 410 411; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 412; GCN: buffer_store_dword 413; GCN: buffer_store_dword 414; GCN: buffer_store_dword 415; GCN: buffer_store_dword 416; GCN: s_setpc_b64 417; 418; GCN-O0-LABEL: {{^}}scc_liveness: 419; GCN-O0-COUNT-2: buffer_store_dword 420; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0:[0-9]+]] 421; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1:[0-9]+]] 422; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]] 423; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]] 424; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]: 425; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] 426; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] 427; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] 428; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] 429; GCN-O0: buffer_load_dword 430; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]] 431; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]] 432; GCN-O0: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 433; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0:[0-9]+]] 434; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1:[0-9]+]] 435; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] 436; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] 437; GCN-O0-NEXT: s_mov_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] 438; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] 439; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] 440; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] 441; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]] 442; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 443; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0]] 444; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1]] 445; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 446; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 447; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_0:[0-9]+]] 448; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_1:[0-9]+]] 449; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 450; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 451; GCN-O0-NEXT: s_cbranch_execz [[FLOW2:.LBB[0-9_]+]] 452; GCN-O0: {{^}}[[FLOW2]]: 453; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_0]] 454; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_1]] 455; GCN-O0: s_branch [[FLOW:.LBB[0-9_]+]] 456; GCN-O0: {{^}}[[FLOW]]: 457; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 458; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_0:[0-9]+]] 459; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_1:[0-9]+]] 460; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 461; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 462; GCN-O0-NEXT: s_cbranch_execz [[FLOW3:.LBB[0-9_]+]] 463; GCN-O0: ; %bb.{{[0-9]+}}: 464; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_0:[0-9]+]] 465; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_1:[0-9]+]] 466; GCN-O0: {{^}}[[FLOW3]]: 467; GCN-O0-COUNT-4: buffer_load_dword 468; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_0]] 469; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_1]] 470; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_0]] 471; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_1]] 472; GCN-O0: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] 473; GCN-O0-NEXT: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 474; GCN-O0-COUNT-2: s_mov_b64 475; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] 476; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] 477; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] 478; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] 479; GCN-O0-COUNT-4: buffer_store_dword 480; GCN-O0: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] 481; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]] 482; GCN-O0: ; %bb.{{[0-9]+}}: 483; GCN-O0-COUNT-4: buffer_store_dword 484; GCN-O0: s_setpc_b64 485; 486define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { 487bb: 488 br label %bb1 489 490bb1: ; preds = %Flow1, %bb1, %bb 491 %tmp = icmp slt i32 %arg, 519 492 br i1 %tmp, label %bb2, label %bb1 493 494bb2: ; preds = %bb1 495 %tmp3 = icmp eq i32 %arg, 0 496 br i1 %tmp3, label %bb4, label %bb10 497 498bb4: ; preds = %bb2 499 %tmp6 = load float, float addrspace(5)* undef 500 %tmp7 = fcmp olt float %tmp6, 0.0 501 br i1 %tmp7, label %bb8, label %Flow 502 503bb8: ; preds = %bb4 504 %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 505 br label %Flow 506 507Flow: ; preds = %bb8, %bb4 508 %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] 509 br label %bb10 510 511bb10: ; preds = %Flow, %bb2 512 %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] 513 br i1 %tmp3, label %bb12, label %Flow1 514 515Flow1: ; preds = %bb10 516 br label %bb1 517 518bb12: ; preds = %bb10 519 store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16 520 ret void 521} 522 523declare i32 @llvm.amdgcn.workitem.id.x() #0 524declare void @llvm.amdgcn.s.barrier() #1 525 526attributes #0 = { nounwind readnone speculatable } 527attributes #1 = { nounwind convergent } 528attributes #2 = { nounwind } 529