1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s 3 4; Uses llvm.amdgcn.break 5 6; OPT-LABEL: @break_loop( 7; OPT: bb1: 8; OPT: icmp slt i32 9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 10 11; OPT: bb4: 12; OPT: load volatile 13; OPT: icmp slt i32 14; OPT: xor i1 %cmp1 15; OPT: br label %Flow 16 17; OPT: Flow: 18; OPT: call i64 @llvm.amdgcn.if.break.i64( 19; OPT: call i1 @llvm.amdgcn.loop.i64(i64 20; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1 21 22; OPT: bb9: 23; OPT: call void @llvm.amdgcn.end.cf.i64(i64 24 25; GCN-LABEL: {{^}}break_loop: 26; GCN: s_mov_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} 27 28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 29; GCN: s_add_i32 s6, s6, 1 30; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec 31; GCN: s_cmp_gt_i32 s6, -1 32; GCN: s_cbranch_scc1 [[FLOW:BB[0-9]+_[0-9]+]] 33 34; GCN: ; %bb4 35; GCN: buffer_load_dword 36; GCN: v_cmp_ge_i32_e32 vcc 37; GCN: s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec 38; GCN: s_and_b64 [[BROKEN_MASK:s\[[0-9]+:[0-9]+\]]], vcc, exec 39; GCN: s_or_b64 [[INNER_MASK]], [[INNER_MASK]], [[BROKEN_MASK]] 40 41; GCN: [[FLOW]]: ; %Flow 42; GCN: ; in Loop: Header=BB0_1 Depth=1 43; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]] 44; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]] 45; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]] 46; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] 47 48; GCN: ; %bb.4: ; %bb9 49; GCN-NEXT: s_endpgm 50define amdgpu_kernel void @break_loop(i32 %arg) #0 { 51bb: 52 %id = call i32 @llvm.amdgcn.workitem.id.x() 53 %tmp = sub i32 %id, %arg 54 br label %bb1 55 56bb1: 57 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 58 %lsr.iv.next = add i32 %lsr.iv, 1 59 %cmp0 = icmp slt i32 %lsr.iv.next, 0 60 br i1 %cmp0, label %bb4, label %bb9 61 62bb4: 63 %load = load volatile i32, i32 addrspace(1)* undef, align 4 64 %cmp1 = icmp slt i32 %tmp, %load 65 br i1 %cmp1, label %bb1, label %bb9 66 67bb9: 68 ret void 69} 70 71; OPT-LABEL: @undef_phi_cond_break_loop( 72; OPT: bb1: 73; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 74; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 75; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 76; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 77; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 78 79; OPT: bb4: 80; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 81; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 82; OPT-NEXT: br label %Flow 83 84; OPT: Flow: 85; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 86; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 87; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) 88; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 89; OPT-NEXT: br i1 %1, label %bb9, label %bb1 90 91; OPT: bb9: ; preds = %Flow 92; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 93; OPT-NEXT: store volatile i32 7 94; OPT-NEXT: ret void 95define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 96bb: 97 %id = call i32 @llvm.amdgcn.workitem.id.x() 98 %tmp = sub i32 %id, %arg 99 br label %bb1 100 101bb1: ; preds = %Flow, %bb 102 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 103 %lsr.iv.next = add i32 %lsr.iv, 1 104 %cmp0 = icmp slt i32 %lsr.iv.next, 0 105 br i1 %cmp0, label %bb4, label %Flow 106 107bb4: ; preds = %bb1 108 %load = load volatile i32, i32 addrspace(1)* undef, align 4 109 %cmp1 = icmp sge i32 %tmp, %load 110 br label %Flow 111 112Flow: ; preds = %bb4, %bb1 113 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 114 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 115 br i1 %tmp3, label %bb9, label %bb1 116 117bb9: ; preds = %Flow 118 store volatile i32 7, i32 addrspace(3)* undef 119 ret void 120} 121 122; FIXME: ConstantExpr compare of address to null folds away 123@lds = addrspace(3) global i32 undef 124 125; OPT-LABEL: @constexpr_phi_cond_break_loop( 126; OPT: bb1: 127; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 128; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 129; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 130; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 131; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 132 133; OPT: bb4: 134; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 135; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 136; OPT-NEXT: br label %Flow 137 138; OPT: Flow: 139; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 140; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 141; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) 142; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 143; OPT-NEXT: br i1 %1, label %bb9, label %bb1 144 145; OPT: bb9: ; preds = %Flow 146; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 147; OPT-NEXT: store volatile i32 7 148; OPT-NEXT: ret void 149define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 150bb: 151 %id = call i32 @llvm.amdgcn.workitem.id.x() 152 %tmp = sub i32 %id, %arg 153 br label %bb1 154 155bb1: ; preds = %Flow, %bb 156 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 157 %lsr.iv.next = add i32 %lsr.iv, 1 158 %cmp0 = icmp slt i32 %lsr.iv.next, 0 159 br i1 %cmp0, label %bb4, label %Flow 160 161bb4: ; preds = %bb1 162 %load = load volatile i32, i32 addrspace(1)* undef, align 4 163 %cmp1 = icmp sge i32 %tmp, %load 164 br label %Flow 165 166Flow: ; preds = %bb4, %bb1 167 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 168 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 169 br i1 %tmp3, label %bb9, label %bb1 170 171bb9: ; preds = %Flow 172 store volatile i32 7, i32 addrspace(3)* undef 173 ret void 174} 175 176; OPT-LABEL: @true_phi_cond_break_loop( 177; OPT: bb1: 178; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 179; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 180; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 181; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 182; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 183 184; OPT: bb4: 185; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 186; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 187; OPT-NEXT: br label %Flow 188 189; OPT: Flow: 190; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 191; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 192; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) 193; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 194; OPT-NEXT: br i1 %1, label %bb9, label %bb1 195 196; OPT: bb9: ; preds = %Flow 197; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 198; OPT-NEXT: store volatile i32 7 199; OPT-NEXT: ret void 200define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 201bb: 202 %id = call i32 @llvm.amdgcn.workitem.id.x() 203 %tmp = sub i32 %id, %arg 204 br label %bb1 205 206bb1: ; preds = %Flow, %bb 207 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 208 %lsr.iv.next = add i32 %lsr.iv, 1 209 %cmp0 = icmp slt i32 %lsr.iv.next, 0 210 br i1 %cmp0, label %bb4, label %Flow 211 212bb4: ; preds = %bb1 213 %load = load volatile i32, i32 addrspace(1)* undef, align 4 214 %cmp1 = icmp sge i32 %tmp, %load 215 br label %Flow 216 217Flow: ; preds = %bb4, %bb1 218 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 219 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 220 br i1 %tmp3, label %bb9, label %bb1 221 222bb9: ; preds = %Flow 223 store volatile i32 7, i32 addrspace(3)* undef 224 ret void 225} 226 227; OPT-LABEL: @false_phi_cond_break_loop( 228; OPT: bb1: 229; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ] 230; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 231; OPT-NOT: call 232; OPT: br i1 %cmp0, label %bb4, label %Flow 233 234; OPT: bb4: 235; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 236; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 237; OPT-NEXT: br label %Flow 238 239; OPT: Flow: 240; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 241; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 242; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken) 243; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) 244; OPT-NEXT: br i1 %1, label %bb9, label %bb1 245 246; OPT: bb9: ; preds = %Flow 247; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) 248; OPT-NEXT: store volatile i32 7 249; OPT-NEXT: ret void 250define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 251bb: 252 %id = call i32 @llvm.amdgcn.workitem.id.x() 253 %tmp = sub i32 %id, %arg 254 br label %bb1 255 256bb1: ; preds = %Flow, %bb 257 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 258 %lsr.iv.next = add i32 %lsr.iv, 1 259 %cmp0 = icmp slt i32 %lsr.iv.next, 0 260 br i1 %cmp0, label %bb4, label %Flow 261 262bb4: ; preds = %bb1 263 %load = load volatile i32, i32 addrspace(1)* undef, align 4 264 %cmp1 = icmp sge i32 %tmp, %load 265 br label %Flow 266 267Flow: ; preds = %bb4, %bb1 268 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 269 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 270 br i1 %tmp3, label %bb9, label %bb1 271 272bb9: ; preds = %Flow 273 store volatile i32 7, i32 addrspace(3)* undef 274 ret void 275} 276 277; Swap order of branches in flow block so that the true phi is 278; continue. 279 280; OPT-LABEL: @invert_true_phi_cond_break_loop( 281; OPT: bb1: 282; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ] 283; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 284; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 285; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 286; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 287 288; OPT: bb4: 289; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 290; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 291; OPT-NEXT: br label %Flow 292 293; OPT: Flow: 294; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 295; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 296; OPT-NEXT: %0 = xor i1 %tmp3, true 297; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64(i1 %0, i64 %phi.broken) 298; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1) 299; OPT-NEXT: br i1 %2, label %bb9, label %bb1 300 301; OPT: bb9: 302; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1) 303; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 304; OPT-NEXT: ret void 305define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 306bb: 307 %id = call i32 @llvm.amdgcn.workitem.id.x() 308 %tmp = sub i32 %id, %arg 309 br label %bb1 310 311bb1: ; preds = %Flow, %bb 312 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 313 %lsr.iv.next = add i32 %lsr.iv, 1 314 %cmp0 = icmp slt i32 %lsr.iv.next, 0 315 br i1 %cmp0, label %bb4, label %Flow 316 317bb4: ; preds = %bb1 318 %load = load volatile i32, i32 addrspace(1)* undef, align 4 319 %cmp1 = icmp sge i32 %tmp, %load 320 br label %Flow 321 322Flow: ; preds = %bb4, %bb1 323 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 324 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 325 br i1 %tmp3, label %bb1, label %bb9 326 327bb9: ; preds = %Flow 328 store volatile i32 7, i32 addrspace(3)* undef 329 ret void 330} 331 332declare i32 @llvm.amdgcn.workitem.id.x() #1 333 334attributes #0 = { nounwind } 335attributes #1 = { nounwind readnone } 336