1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; Uses llvm.amdgcn.break 5 6; OPT-LABEL: @break_loop( 7; OPT: bb1: 8; OPT: call i64 @llvm.amdgcn.break(i64 9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 10 11; OPT: bb4: 12; OPT: load volatile 13; OPT: xor i1 %cmp1 14; OPT: call i64 @llvm.amdgcn.if.break( 15; OPT: br label %Flow 16 17; OPT: Flow: 18; OPT: call i1 @llvm.amdgcn.loop(i64 19; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1 20 21; OPT: bb9: 22; OPT: call void @llvm.amdgcn.end.cf(i64 23 24; TODO: Can remove exec fixes in return block 25; GCN-LABEL: {{^}}break_loop: 26; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} 27 28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 29; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]] 30; GCN: v_cmp_lt_i32_e32 vcc, -1 31; GCN: s_and_b64 vcc, exec, vcc 32; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]] 33 34; GCN: ; BB#2: ; %bb4 35; GCN: buffer_load_dword 36; GCN: v_cmp_ge_i32_e32 vcc, 37; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]] 38 39; GCN: [[FLOW]]: 40; GCN: s_mov_b64 [[INITMASK]], [[MASK]] 41; GCN: s_andn2_b64 exec, exec, [[MASK]] 42; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] 43 44; GCN: ; BB#4: ; %bb9 45; GCN-NEXT: s_or_b64 exec, exec, [[MASK]] 46; GCN-NEXT: s_endpgm 47define amdgpu_kernel void @break_loop(i32 %arg) #0 { 48bb: 49 %id = call i32 @llvm.amdgcn.workitem.id.x() 50 %tmp = sub i32 %id, %arg 51 br label %bb1 52 53bb1: 54 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 55 %lsr.iv.next = add i32 %lsr.iv, 1 56 %cmp0 = icmp slt i32 %lsr.iv.next, 0 57 br i1 %cmp0, label %bb4, label %bb9 58 59bb4: 60 %load = load volatile i32, i32 addrspace(1)* undef, align 4 61 %cmp1 = icmp slt i32 %tmp, %load 62 br i1 %cmp1, label %bb1, label %bb9 63 64bb9: 65 ret void 66} 67 68; OPT-LABEL: @undef_phi_cond_break_loop( 69; OPT: bb1: 70; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 71; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 72; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken) 73; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 74 75; OPT: bb4: 76; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 77; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 78; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 79; OPT-NEXT: br label %Flow 80 81; OPT: Flow: 82; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] 83; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 84; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 85; OPT-NEXT: br i1 %2, label %bb9, label %bb1 86 87; OPT: bb9: ; preds = %Flow 88; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 89; OPT-NEXT: store volatile i32 7 90; OPT-NEXT: ret void 91define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 92bb: 93 %id = call i32 @llvm.amdgcn.workitem.id.x() 94 %tmp = sub i32 %id, %arg 95 br label %bb1 96 97bb1: ; preds = %Flow, %bb 98 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 99 %lsr.iv.next = add i32 %lsr.iv, 1 100 %cmp0 = icmp slt i32 %lsr.iv.next, 0 101 br i1 %cmp0, label %bb4, label %Flow 102 103bb4: ; preds = %bb1 104 %load = load volatile i32, i32 addrspace(1)* undef, align 4 105 %cmp1 = icmp sge i32 %tmp, %load 106 br label %Flow 107 108Flow: ; preds = %bb4, %bb1 109 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 110 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 111 br i1 %tmp3, label %bb9, label %bb1 112 113bb9: ; preds = %Flow 114 store volatile i32 7, i32 addrspace(3)* undef 115 ret void 116} 117 118; FIXME: ConstantExpr compare of address to null folds away 119@lds = addrspace(3) global i32 undef 120 121; OPT-LABEL: @constexpr_phi_cond_break_loop( 122; OPT: bb1: 123; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 124; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 125; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken) 126; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 127 128; OPT: bb4: 129; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 130; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 131; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 132; OPT-NEXT: br label %Flow 133 134; OPT: Flow: 135; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] 136; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 137; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 138; OPT-NEXT: br i1 %2, label %bb9, label %bb1 139 140; OPT: bb9: ; preds = %Flow 141; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 142; OPT-NEXT: store volatile i32 7 143; OPT-NEXT: ret void 144define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 145bb: 146 %id = call i32 @llvm.amdgcn.workitem.id.x() 147 %tmp = sub i32 %id, %arg 148 br label %bb1 149 150bb1: ; preds = %Flow, %bb 151 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 152 %lsr.iv.next = add i32 %lsr.iv, 1 153 %cmp0 = icmp slt i32 %lsr.iv.next, 0 154 br i1 %cmp0, label %bb4, label %Flow 155 156bb4: ; preds = %bb1 157 %load = load volatile i32, i32 addrspace(1)* undef, align 4 158 %cmp1 = icmp sge i32 %tmp, %load 159 br label %Flow 160 161Flow: ; preds = %bb4, %bb1 162 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 163 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 164 br i1 %tmp3, label %bb9, label %bb1 165 166bb9: ; preds = %Flow 167 store volatile i32 7, i32 addrspace(3)* undef 168 ret void 169} 170 171; OPT-LABEL: @true_phi_cond_break_loop( 172; OPT: bb1: 173; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 174; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 175; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken) 176; OPT: br i1 %cmp0, label %bb4, label %Flow 177 178; OPT: bb4: 179; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 180; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 181; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 182; OPT-NEXT: br label %Flow 183 184; OPT: Flow: 185; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] 186; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 187; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 188; OPT-NEXT: br i1 %2, label %bb9, label %bb1 189 190; OPT: bb9: ; preds = %Flow 191; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 192; OPT-NEXT: store volatile i32 7 193; OPT-NEXT: ret void 194define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 195bb: 196 %id = call i32 @llvm.amdgcn.workitem.id.x() 197 %tmp = sub i32 %id, %arg 198 br label %bb1 199 200bb1: ; preds = %Flow, %bb 201 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 202 %lsr.iv.next = add i32 %lsr.iv, 1 203 %cmp0 = icmp slt i32 %lsr.iv.next, 0 204 br i1 %cmp0, label %bb4, label %Flow 205 206bb4: ; preds = %bb1 207 %load = load volatile i32, i32 addrspace(1)* undef, align 4 208 %cmp1 = icmp sge i32 %tmp, %load 209 br label %Flow 210 211Flow: ; preds = %bb4, %bb1 212 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 213 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 214 br i1 %tmp3, label %bb9, label %bb1 215 216bb9: ; preds = %Flow 217 store volatile i32 7, i32 addrspace(3)* undef 218 ret void 219} 220 221; OPT-LABEL: @false_phi_cond_break_loop( 222; OPT: bb1: 223; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 224; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 225; OPT-NOT: call 226; OPT: br i1 %cmp0, label %bb4, label %Flow 227 228; OPT: bb4: 229; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 230; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 231; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 232; OPT-NEXT: br label %Flow 233 234; OPT: Flow: 235; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ] 236; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 237; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 238; OPT-NEXT: br i1 %1, label %bb9, label %bb1 239 240; OPT: bb9: ; preds = %Flow 241; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 242; OPT-NEXT: store volatile i32 7 243; OPT-NEXT: ret void 244define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 245bb: 246 %id = call i32 @llvm.amdgcn.workitem.id.x() 247 %tmp = sub i32 %id, %arg 248 br label %bb1 249 250bb1: ; preds = %Flow, %bb 251 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 252 %lsr.iv.next = add i32 %lsr.iv, 1 253 %cmp0 = icmp slt i32 %lsr.iv.next, 0 254 br i1 %cmp0, label %bb4, label %Flow 255 256bb4: ; preds = %bb1 257 %load = load volatile i32, i32 addrspace(1)* undef, align 4 258 %cmp1 = icmp sge i32 %tmp, %load 259 br label %Flow 260 261Flow: ; preds = %bb4, %bb1 262 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 263 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 264 br i1 %tmp3, label %bb9, label %bb1 265 266bb9: ; preds = %Flow 267 store volatile i32 7, i32 addrspace(3)* undef 268 ret void 269} 270 271; Swap order of branches in flow block so that the true phi is 272; continue. 273 274; OPT-LABEL: @invert_true_phi_cond_break_loop( 275; OPT: bb1: 276; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ] 277; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 278; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 279; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 280; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 281 282; OPT: bb4: 283; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 284; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 285; OPT-NEXT: br label %Flow 286 287; OPT: Flow: 288; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 289; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 290; OPT-NEXT: %0 = xor i1 %tmp3, true 291; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken) 292; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1) 293; OPT-NEXT: br i1 %2, label %bb9, label %bb1 294 295; OPT: bb9: 296; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1) 297; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 298; OPT-NEXT: ret void 299define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 300bb: 301 %id = call i32 @llvm.amdgcn.workitem.id.x() 302 %tmp = sub i32 %id, %arg 303 br label %bb1 304 305bb1: ; preds = %Flow, %bb 306 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 307 %lsr.iv.next = add i32 %lsr.iv, 1 308 %cmp0 = icmp slt i32 %lsr.iv.next, 0 309 br i1 %cmp0, label %bb4, label %Flow 310 311bb4: ; preds = %bb1 312 %load = load volatile i32, i32 addrspace(1)* undef, align 4 313 %cmp1 = icmp sge i32 %tmp, %load 314 br label %Flow 315 316Flow: ; preds = %bb4, %bb1 317 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 318 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 319 br i1 %tmp3, label %bb1, label %bb9 320 321bb9: ; preds = %Flow 322 store volatile i32 7, i32 addrspace(3)* undef 323 ret void 324} 325 326declare i32 @llvm.amdgcn.workitem.id.x() #1 327 328attributes #0 = { nounwind } 329attributes #1 = { nounwind readnone } 330