1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; Disabled endcf collapse at -O0. 4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -O0 -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN-O0 %s 5 6; GCN-LABEL: {{^}}simple_nested_if: 7; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 8; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]] 9; GCN: s_and_b64 exec, exec, vcc 10; GCN-NEXT: s_cbranch_execz [[ENDIF]] 11; GCN-NEXT: ; %bb.{{[0-9]+}}: 12; GCN: store_dword 13; GCN-NEXT: {{^}}[[ENDIF]]: 14; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 15; GCN: ds_write_b32 16; GCN: s_endpgm 17; 18; GCN-O0-LABEL: {{^}}simple_nested_if: 19; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 20; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 21; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 22; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 23; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 24; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 25; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 26; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 27; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] 28; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] 29; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 30; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 31; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 32; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 33; GCN-O0: store_dword 34; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]: 35; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]] 36; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]] 37; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 38; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]: 39; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 40; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 41; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 42; GCN-O0: ds_write_b32 43; GCN-O0: s_endpgm 44; 45define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) { 46bb: 47 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 48 %tmp1 = icmp ugt i32 %tmp, 1 49 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 50 51bb.outer.then: ; preds = %bb 52 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 53 store i32 0, i32 addrspace(1)* %tmp4, align 4 54 %tmp5 = icmp eq i32 %tmp, 2 55 br i1 %tmp5, label %bb.outer.end, label %bb.inner.then 56 57bb.inner.then: ; preds = %bb.outer.then 58 %tmp7 = add i32 %tmp, 1 59 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 60 store i32 1, i32 addrspace(1)* %tmp9, align 4 61 br label %bb.outer.end 62 63bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb 64 store i32 3, i32 addrspace(3)* null 65 ret void 66} 67 68; GCN-LABEL: {{^}}uncollapsable_nested_if: 69; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 70; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 71; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 72; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 73; GCN-NEXT: ; %bb.{{[0-9]+}}: 74; GCN: store_dword 75; GCN-NEXT: {{^}}[[ENDIF_INNER]]: 76; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]] 77; GCN: store_dword 78; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 79; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]] 80; GCN: ds_write_b32 81; GCN: s_endpgm 82; 83; GCN-O0-LABEL: {{^}}uncollapsable_nested_if: 84; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 85; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 86; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 87; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 88; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 89; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 90; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 91; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 92; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] 93; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] 94; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 95; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 96; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 97; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 98; GCN-O0: store_dword 99; GCN-O0-NEXT: s_branch [[ENDIF_INNER]] 100; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]: 101; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 102; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 103; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 104; GCN-O0-NEXT: s_branch [[LAST_BB:.LBB[0-9_]+]] 105; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]: 106; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]] 107; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]] 108; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 109; GCN-O0: s_branch [[ENDIF_OUTER]] 110; GCN-O0-NEXT: {{^}}[[LAST_BB]]: 111; GCN-O0: ds_write_b32 112; GCN-O0: s_endpgm 113; 114define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) { 115bb: 116 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 117 %tmp1 = icmp ugt i32 %tmp, 1 118 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 119 120bb.outer.then: ; preds = %bb 121 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 122 store i32 0, i32 addrspace(1)* %tmp4, align 4 123 %tmp5 = icmp eq i32 %tmp, 2 124 br i1 %tmp5, label %bb.inner.end, label %bb.inner.then 125 126bb.inner.then: ; preds = %bb.outer.then 127 %tmp7 = add i32 %tmp, 1 128 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 129 store i32 1, i32 addrspace(1)* %tmp8, align 4 130 br label %bb.inner.end 131 132bb.inner.end: ; preds = %bb.inner.then, %bb.outer.then 133 %tmp9 = add i32 %tmp, 2 134 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9 135 store i32 2, i32 addrspace(1)* %tmp10, align 4 136 br label %bb.outer.end 137 138bb.outer.end: ; preds = %bb.inner.then, %bb 139 store i32 3, i32 addrspace(3)* null 140 ret void 141} 142 143; GCN-LABEL: {{^}}nested_if_if_else: 144; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 145; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 146; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 147; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]] 148; GCN-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]] 149; GCN-NEXT: ; %bb.{{[0-9]+}}: 150; GCN: store_dword 151; GCN: {{^}}[[THEN_INNER]]: 152; GCN-NEXT: s_andn2_saveexec_b64 [[SAVEEXEC_INNER2]], [[SAVEEXEC_INNER2]] 153; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 154; GCN: store_dword 155; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 156; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]] 157; GCN: ds_write_b32 158; GCN: s_endpgm 159; 160; GCN-O0-LABEL: {{^}}nested_if_if_else: 161; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 162; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 163; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 164; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 165; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 166; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 167; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 168; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 169; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 170; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 171; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_0:[0-9]+]] 172; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[THEN_SPILL_LANE_1:[0-9]+]] 173; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 174; GCN-O0-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]] 175; GCN-O0-NEXT: s_branch [[TEMP_BB:.LBB[0-9_]+]] 176; GCN-O0-NEXT: {{^}}[[THEN_INNER]]: 177; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_0]] 178; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[THEN_SPILL_LANE_1]] 179; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] 180; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] 181; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_0:[0-9]+]] 182; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_SPILL_LANE_1:[0-9]+]] 183; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}] 184; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]] 185; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 186; GCN-O0: store_dword 187; GCN-O0-NEXT: s_branch [[ENDIF_INNER]] 188; GCN-O0-NEXT: {{^}}[[TEMP_BB]]: 189; GCN-O0: s_branch [[THEN_INNER]] 190; GCN-O0-NEXT: {{^}}[[ENDIF_INNER]]: 191; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_0]] 192; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_SPILL_LANE_1]] 193; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 194; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]]: 195; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 196; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 197; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 198; GCN-O0: ds_write_b32 199; GCN-O0: s_endpgm 200; 201define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) { 202bb: 203 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 204 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 205 store i32 0, i32 addrspace(1)* %tmp1, align 4 206 %tmp2 = icmp ugt i32 %tmp, 1 207 br i1 %tmp2, label %bb.outer.then, label %bb.outer.end 208 209bb.outer.then: ; preds = %bb 210 %tmp5 = icmp eq i32 %tmp, 2 211 br i1 %tmp5, label %bb.then, label %bb.else 212 213bb.then: ; preds = %bb.outer.then 214 %tmp3 = add i32 %tmp, 1 215 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3 216 store i32 1, i32 addrspace(1)* %tmp4, align 4 217 br label %bb.outer.end 218 219bb.else: ; preds = %bb.outer.then 220 %tmp7 = add i32 %tmp, 2 221 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 222 store i32 2, i32 addrspace(1)* %tmp9, align 4 223 br label %bb.outer.end 224 225bb.outer.end: ; preds = %bb, %bb.then, %bb.else 226 store i32 3, i32 addrspace(3)* null 227 ret void 228} 229 230; GCN-LABEL: {{^}}nested_if_else_if: 231; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 232; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]] 233; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]] 234; GCN-NEXT: ; %bb.{{[0-9]+}}: 235; GCN: store_dword 236; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]] 237; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]] 238; GCN-NEXT: ; %bb.{{[0-9]+}}: 239; GCN: store_dword 240; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]: 241; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]] 242; GCN: {{^}}[[THEN_OUTER]]: 243; GCN-NEXT: s_andn2_saveexec_b64 [[SAVEEXEC_OUTER2]], [[SAVEEXEC_OUTER2]] 244; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 245; GCN-NEXT: ; %bb.{{[0-9]+}}: 246; GCN: store_dword 247; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]], 248; GCN-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]] 249; GCN-NEXT: ; %bb.{{[0-9]+}}: 250; GCN: store_dword 251; GCN-NEXT: [[FLOW1]]: 252; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_ELSE]] 253; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER2]] 254; GCN: ds_write_b32 255; GCN: s_endpgm 256; 257; GCN-O0-LABEL: {{^}}nested_if_else_if: 258; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 259; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 260; GCN-O0-NEXT: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 261; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_0:[0-9]+]] 262; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_SPILL_LANE_1:[0-9]+]] 263; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 264; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]] 265; GCN-O0-NEXT: s_branch [[INNER_IF_OUTER_ELSE:.LBB[0-9_]+]] 266; GCN-O0-NEXT: {{^}}[[THEN_OUTER]]: 267; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_0]] 268; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_SPILL_LANE_1]] 269; GCN-O0-NEXT: s_or_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] 270; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] 271; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_0:[0-9]+]] 272; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_2_SPILL_LANE_1:[0-9]+]] 273; GCN-O0-NEXT: s_xor_b64 exec, exec, s[{{[0-9:]+}}] 274; GCN-O0-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]] 275; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 276; GCN-O0: store_dword 277; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 278; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_0:[0-9]+]] 279; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[ELSE_SPILL_LANE_1:[0-9]+]] 280; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 281; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 282; GCN-O0-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]] 283; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 284; GCN-O0: store_dword 285; GCN-O0-NEXT: s_branch [[FLOW1]] 286; GCN-O0-NEXT: {{^}}[[INNER_IF_OUTER_ELSE]] 287; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 288; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_0:[0-9]+]] 289; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_IF_OUTER_ELSE_SPILL_LANE_1:[0-9]+]] 290; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 291; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 292; GCN-O0-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]] 293; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 294; GCN-O0: store_dword 295; GCN-O0-NEXT: {{^}}[[THEN_OUTER_FLOW]] 296; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_0]] 297; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_IF_OUTER_ELSE_SPILL_LANE_1]] 298; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 299; GCN-O0-NEXT: s_branch [[THEN_OUTER]] 300; GCN-O0-NEXT: {{^}}[[FLOW1]] 301; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_0]] 302; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[ELSE_SPILL_LANE_1]] 303; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 304; GCN-O0-NEXT: {{^}}[[ENDIF_OUTER]] 305; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_0]] 306; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_2_SPILL_LANE_1]] 307; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 308; GCN-O0: ds_write_b32 309; GCN-O0: s_endpgm 310; 311define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) { 312bb: 313 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 314 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 315 store i32 0, i32 addrspace(1)* %tmp1, align 4 316 %cc1 = icmp ugt i32 %tmp, 1 317 br i1 %cc1, label %bb.outer.then, label %bb.outer.else 318 319bb.outer.then: 320 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1 321 store i32 1, i32 addrspace(1)* %tmp2, align 4 322 %cc2 = icmp eq i32 %tmp, 2 323 br i1 %cc2, label %bb.inner.then, label %bb.outer.end 324 325bb.inner.then: 326 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2 327 store i32 2, i32 addrspace(1)* %tmp3, align 4 328 br label %bb.outer.end 329 330bb.outer.else: 331 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3 332 store i32 3, i32 addrspace(1)* %tmp4, align 4 333 %cc3 = icmp eq i32 %tmp, 2 334 br i1 %cc3, label %bb.inner.then2, label %bb.outer.end 335 336bb.inner.then2: 337 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4 338 store i32 4, i32 addrspace(1)* %tmp5, align 4 339 br label %bb.outer.end 340 341bb.outer.end: 342 store i32 3, i32 addrspace(3)* null 343 ret void 344} 345 346; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier: 347; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 348; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]] 349; GCN-NEXT: ; %bb.{{[0-9]+}}: 350; GCN: store_dword 351; GCN-NEXT: {{^}}[[ENDIF]]: 352; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 353; GCN: s_barrier 354; GCN-NEXT: s_endpgm 355; 356; GCN-O0-LABEL: {{^}}s_endpgm_unsafe_barrier: 357; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 358; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[SPILL_LANE_0:[0-9]+]] 359; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[SPILL_LANE_1:[0-9]+]] 360; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}] 361; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 362; GCN-O0-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]] 363; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 364; GCN-O0: store_dword 365; GCN-O0-NEXT: {{^}}[[ENDIF]]: 366; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_0]] 367; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[SPILL_LANE_1]] 368; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 369; GCN-O0: s_barrier 370; GCN-O0: s_endpgm 371; 372define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) { 373bb: 374 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 375 %tmp1 = icmp ugt i32 %tmp, 1 376 br i1 %tmp1, label %bb.then, label %bb.end 377 378bb.then: ; preds = %bb 379 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 380 store i32 0, i32 addrspace(1)* %tmp4, align 4 381 br label %bb.end 382 383bb.end: ; preds = %bb.then, %bb 384 call void @llvm.amdgcn.s.barrier() 385 ret void 386} 387 388; GCN-LABEL: {{^}}scc_liveness: 389 390; GCN: [[BB1_OUTER_LOOP:.LBB[0-9]+_[0-9]+]]: 391; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 392; 393; GCN: [[BB1_INNER_LOOP:.LBB[0-9]+_[0-9]+]]: 394; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 395; GCN: s_andn2_b64 396; GCN-NEXT: s_cbranch_execz 397 398; GCN: [[BB1_LOOP:.LBB[0-9]+_[0-9]+]]: 399; GCN: s_andn2_b64 exec, exec, 400; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] 401 402; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen 403 404; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER]], {{vcc|s\[[0-9:]+\]}} 405; GCN-NEXT: s_cbranch_execz [[BB1_OUTER_LOOP]] 406 407; GCN-NOT: s_or_b64 exec, exec 408 409; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 410; GCN: buffer_store_dword 411; GCN: buffer_store_dword 412; GCN: buffer_store_dword 413; GCN: buffer_store_dword 414; GCN: s_setpc_b64 415; 416; GCN-O0-LABEL: {{^}}scc_liveness: 417; GCN-O0-COUNT-2: buffer_store_dword 418; GCN-O0-DAG: v_writelane_b32 [[VGPR:v[0-9]+]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0:[0-9]+]] 419; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1:[0-9]+]] 420; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]] 421; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]] 422; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]: 423; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] 424; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] 425; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] 426; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] 427; GCN-O0: buffer_load_dword 428; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]] 429; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]] 430; GCN-O0: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 431; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0:[0-9]+]] 432; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1:[0-9]+]] 433; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] 434; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] 435; GCN-O0-NEXT: s_mov_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] 436; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] 437; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] 438; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] 439; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]] 440; GCN-O0-NEXT: ; %bb.{{[0-9]+}}: 441; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_0]] 442; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_OUT_EXEC_SPILL_LANE_1]] 443; GCN-O0-NEXT: s_or_b64 exec, exec, s[{{[0-9:]+}}] 444; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 445; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_0:[0-9]+]] 446; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW2_IN_EXEC_SPILL_LANE_1:[0-9]+]] 447; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 448; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 449; GCN-O0-NEXT: s_cbranch_execz [[FLOW2:.LBB[0-9_]+]] 450; GCN-O0: {{^}}[[FLOW2]]: 451; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_0]] 452; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW2_IN_EXEC_SPILL_LANE_1]] 453; GCN-O0: s_branch [[FLOW:.LBB[0-9_]+]] 454; GCN-O0: {{^}}[[FLOW]]: 455; GCN-O0: s_mov_b64 s[{{[0-9:]+}}], exec 456; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_0:[0-9]+]] 457; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW3_IN_EXEC_SPILL_LANE_1:[0-9]+]] 458; GCN-O0-NEXT: s_and_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 459; GCN-O0-NEXT: s_mov_b64 exec, s[{{[0-9:]+}}] 460; GCN-O0-NEXT: s_cbranch_execz [[FLOW3:.LBB[0-9_]+]] 461; GCN-O0: ; %bb.{{[0-9]+}}: 462; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_0:[0-9]+]] 463; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[FLOW1_OUT_EXEC_SPILL_LANE_1:[0-9]+]] 464; GCN-O0: {{^}}[[FLOW3]]: 465; GCN-O0-COUNT-4: buffer_load_dword 466; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_0]] 467; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[OUTER_LOOP_EXEC_SPILL_LANE_1]] 468; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_0]] 469; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[FLOW1_OUT_EXEC_SPILL_LANE_1]] 470; GCN-O0: s_and_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] 471; GCN-O0-NEXT: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] 472; GCN-O0-COUNT-2: s_mov_b64 473; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] 474; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] 475; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] 476; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] 477; GCN-O0-COUNT-4: buffer_store_dword 478; GCN-O0: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] 479; GCN-O0-NEXT: s_cbranch_execnz [[INNER_LOOP]] 480; GCN-O0: ; %bb.{{[0-9]+}}: 481; GCN-O0-COUNT-4: buffer_store_dword 482; GCN-O0: s_setpc_b64 483; 484define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { 485bb: 486 br label %bb1 487 488bb1: ; preds = %Flow1, %bb1, %bb 489 %tmp = icmp slt i32 %arg, 519 490 br i1 %tmp, label %bb2, label %bb1 491 492bb2: ; preds = %bb1 493 %tmp3 = icmp eq i32 %arg, 0 494 br i1 %tmp3, label %bb4, label %bb10 495 496bb4: ; preds = %bb2 497 %tmp6 = load float, float addrspace(5)* undef 498 %tmp7 = fcmp olt float %tmp6, 0.0 499 br i1 %tmp7, label %bb8, label %Flow 500 501bb8: ; preds = %bb4 502 %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 503 br label %Flow 504 505Flow: ; preds = %bb8, %bb4 506 %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] 507 br label %bb10 508 509bb10: ; preds = %Flow, %bb2 510 %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] 511 br i1 %tmp3, label %bb12, label %Flow1 512 513Flow1: ; preds = %bb10 514 br label %bb1 515 516bb12: ; preds = %bb10 517 store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16 518 ret void 519} 520 521declare i32 @llvm.amdgcn.workitem.id.x() #0 522declare void @llvm.amdgcn.s.barrier() #1 523 524attributes #0 = { nounwind readnone speculatable } 525attributes #1 = { nounwind convergent } 526attributes #2 = { nounwind } 527