1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s 3 4; Uses llvm.amdgcn.break 5 6; OPT-LABEL: @break_loop( 7; OPT: bb1: 8; OPT: icmp slt i32 9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 10 11; OPT: bb4: 12; OPT: load volatile 13; OPT: icmp slt i32 14; OPT: xor i1 %cmp1 15; OPT: br label %Flow 16 17; OPT: Flow: 18; OPT: call i64 @llvm.amdgcn.if.break.i64.i64( 19; OPT: call i1 @llvm.amdgcn.loop.i64(i64 20; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1 21 22; OPT: bb9: 23; OPT: call void @llvm.amdgcn.end.cf.i64(i64 24 25; GCN-LABEL: {{^}}break_loop: 26; GCN: s_mov_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} 27 28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 29; GCN: s_add_i32 s4, s4, 1 30; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec 31; GCN: s_cmp_gt_i32 s4, -1 32; GCN: s_cbranch_scc1 [[FLOW:BB[0-9]+_[0-9]+]] 33 34; GCN: ; %bb4 35; GCN: buffer_load_dword 36; GCN: v_cmp_ge_i32_e32 vcc 37; GCN: s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec 38; GCN: s_and_b64 [[BROKEN_MASK:s\[[0-9]+:[0-9]+\]]], vcc, exec 39; GCN: s_or_b64 [[INNER_MASK]], [[INNER_MASK]], [[BROKEN_MASK]] 40 41; GCN: [[FLOW]]: ; %Flow 42; GCN: ; in Loop: Header=BB0_1 Depth=1 43; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]] 44; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]] 45; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]] 46; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]] 47; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] 48 49; GCN: ; %bb.4: ; %bb9 50; GCN-NEXT: s_endpgm 51define amdgpu_kernel void @break_loop(i32 %arg) #0 { 52bb: 53 %id = call i32 @llvm.amdgcn.workitem.id.x() 54 %tmp = sub i32 %id, %arg 55 br label %bb1 56 57bb1: 58 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 59 %lsr.iv.next = add i32 %lsr.iv, 1 60 %cmp0 = icmp slt i32 %lsr.iv.next, 0 61 br i1 %cmp0, label %bb4, label %bb9 62 63bb4: 64 %load = load volatile i32, i32 addrspace(1)* undef, align 4 65 %cmp1 = icmp slt i32 %tmp, %load 66 br i1 %cmp1, label %bb1, label %bb9 67 68bb9: 69 ret void 70} 71 72; OPT-LABEL: @undef_phi_cond_break_loop( 73; OPT: bb1: 74; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 75; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 76; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 77; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 78; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 79 80; OPT: bb4: 81; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 82; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 83; OPT-NEXT: br label %Flow 84 85; OPT: Flow: 86; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 87; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 88; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) 89; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 90; OPT-NEXT: br i1 %1, label %bb9, label %bb1 91 92; OPT: bb9: ; preds = %Flow 93; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 94; OPT-NEXT: store volatile i32 7 95; OPT-NEXT: ret void 96define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 97bb: 98 %id = call i32 @llvm.amdgcn.workitem.id.x() 99 %tmp = sub i32 %id, %arg 100 br label %bb1 101 102bb1: ; preds = %Flow, %bb 103 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 104 %lsr.iv.next = add i32 %lsr.iv, 1 105 %cmp0 = icmp slt i32 %lsr.iv.next, 0 106 br i1 %cmp0, label %bb4, label %Flow 107 108bb4: ; preds = %bb1 109 %load = load volatile i32, i32 addrspace(1)* undef, align 4 110 %cmp1 = icmp sge i32 %tmp, %load 111 br label %Flow 112 113Flow: ; preds = %bb4, %bb1 114 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 115 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 116 br i1 %tmp3, label %bb9, label %bb1 117 118bb9: ; preds = %Flow 119 store volatile i32 7, i32 addrspace(3)* undef 120 ret void 121} 122 123; FIXME: ConstantExpr compare of address to null folds away 124@lds = addrspace(3) global i32 undef 125 126; OPT-LABEL: @constexpr_phi_cond_break_loop( 127; OPT: bb1: 128; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 129; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 130; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 131; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 132; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 133 134; OPT: bb4: 135; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 136; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 137; OPT-NEXT: br label %Flow 138 139; OPT: Flow: 140; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 141; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 142; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) 143; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 144; OPT-NEXT: br i1 %1, label %bb9, label %bb1 145 146; OPT: bb9: ; preds = %Flow 147; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 148; OPT-NEXT: store volatile i32 7 149; OPT-NEXT: ret void 150define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 151bb: 152 %id = call i32 @llvm.amdgcn.workitem.id.x() 153 %tmp = sub i32 %id, %arg 154 br label %bb1 155 156bb1: ; preds = %Flow, %bb 157 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 158 %lsr.iv.next = add i32 %lsr.iv, 1 159 %cmp0 = icmp slt i32 %lsr.iv.next, 0 160 br i1 %cmp0, label %bb4, label %Flow 161 162bb4: ; preds = %bb1 163 %load = load volatile i32, i32 addrspace(1)* undef, align 4 164 %cmp1 = icmp sge i32 %tmp, %load 165 br label %Flow 166 167Flow: ; preds = %bb4, %bb1 168 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 169 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 170 br i1 %tmp3, label %bb9, label %bb1 171 172bb9: ; preds = %Flow 173 store volatile i32 7, i32 addrspace(3)* undef 174 ret void 175} 176 177; OPT-LABEL: @true_phi_cond_break_loop( 178; OPT: bb1: 179; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 180; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 181; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 182; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 183; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 184 185; OPT: bb4: 186; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 187; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 188; OPT-NEXT: br label %Flow 189 190; OPT: Flow: 191; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 192; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 193; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) 194; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 195; OPT-NEXT: br i1 %1, label %bb9, label %bb1 196 197; OPT: bb9: ; preds = %Flow 198; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 199; OPT-NEXT: store volatile i32 7 200; OPT-NEXT: ret void 201define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 202bb: 203 %id = call i32 @llvm.amdgcn.workitem.id.x() 204 %tmp = sub i32 %id, %arg 205 br label %bb1 206 207bb1: ; preds = %Flow, %bb 208 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 209 %lsr.iv.next = add i32 %lsr.iv, 1 210 %cmp0 = icmp slt i32 %lsr.iv.next, 0 211 br i1 %cmp0, label %bb4, label %Flow 212 213bb4: ; preds = %bb1 214 %load = load volatile i32, i32 addrspace(1)* undef, align 4 215 %cmp1 = icmp sge i32 %tmp, %load 216 br label %Flow 217 218Flow: ; preds = %bb4, %bb1 219 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 220 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 221 br i1 %tmp3, label %bb9, label %bb1 222 223bb9: ; preds = %Flow 224 store volatile i32 7, i32 addrspace(3)* undef 225 ret void 226} 227 228; OPT-LABEL: @false_phi_cond_break_loop( 229; OPT: bb1: 230; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 231; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 232; OPT-NOT: call 233; OPT: br i1 %cmp0, label %bb4, label %Flow 234 235; OPT: bb4: 236; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 237; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 238; OPT-NEXT: br label %Flow 239 240; OPT: Flow: 241; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 242; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 243; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) 244; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 245; OPT-NEXT: br i1 %1, label %bb9, label %bb1 246 247; OPT: bb9: ; preds = %Flow 248; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 249; OPT-NEXT: store volatile i32 7 250; OPT-NEXT: ret void 251define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 252bb: 253 %id = call i32 @llvm.amdgcn.workitem.id.x() 254 %tmp = sub i32 %id, %arg 255 br label %bb1 256 257bb1: ; preds = %Flow, %bb 258 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 259 %lsr.iv.next = add i32 %lsr.iv, 1 260 %cmp0 = icmp slt i32 %lsr.iv.next, 0 261 br i1 %cmp0, label %bb4, label %Flow 262 263bb4: ; preds = %bb1 264 %load = load volatile i32, i32 addrspace(1)* undef, align 4 265 %cmp1 = icmp sge i32 %tmp, %load 266 br label %Flow 267 268Flow: ; preds = %bb4, %bb1 269 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 270 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 271 br i1 %tmp3, label %bb9, label %bb1 272 273bb9: ; preds = %Flow 274 store volatile i32 7, i32 addrspace(3)* undef 275 ret void 276} 277 278; Swap order of branches in flow block so that the true phi is 279; continue. 280 281; OPT-LABEL: @invert_true_phi_cond_break_loop( 282; OPT: bb1: 283; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ] 284; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 285; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 286; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 287; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 288 289; OPT: bb4: 290; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 291; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 292; OPT-NEXT: br label %Flow 293 294; OPT: Flow: 295; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 296; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 297; OPT-NEXT: %0 = xor i1 %tmp3, true 298; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %0, i64 %phi.broken) 299; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1) 300; OPT-NEXT: br i1 %2, label %bb9, label %bb1 301 302; OPT: bb9: 303; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1) 304; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 305; OPT-NEXT: ret void 306define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 307bb: 308 %id = call i32 @llvm.amdgcn.workitem.id.x() 309 %tmp = sub i32 %id, %arg 310 br label %bb1 311 312bb1: ; preds = %Flow, %bb 313 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 314 %lsr.iv.next = add i32 %lsr.iv, 1 315 %cmp0 = icmp slt i32 %lsr.iv.next, 0 316 br i1 %cmp0, label %bb4, label %Flow 317 318bb4: ; preds = %bb1 319 %load = load volatile i32, i32 addrspace(1)* undef, align 4 320 %cmp1 = icmp sge i32 %tmp, %load 321 br label %Flow 322 323Flow: ; preds = %bb4, %bb1 324 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 325 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 326 br i1 %tmp3, label %bb1, label %bb9 327 328bb9: ; preds = %Flow 329 store volatile i32 7, i32 addrspace(3)* undef 330 ret void 331} 332 333declare i32 @llvm.amdgcn.workitem.id.x() #1 334 335attributes #0 = { nounwind } 336attributes #1 = { nounwind readnone } 337