1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s 3 4; Uses llvm.amdgcn.break 5 6define amdgpu_kernel void @break_loop(i32 %arg) #0 { 7; OPT-LABEL: @break_loop( 8; OPT-NEXT: bb: 9; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 10; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 11; OPT-NEXT: br label [[BB1:%.*]] 12; OPT: bb1: 13; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 14; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 15; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 16; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 17; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 18; OPT: bb4: 19; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 20; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]] 21; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true 22; OPT-NEXT: br label [[FLOW]] 23; OPT: Flow: 24; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ] 25; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) 26; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) 27; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] 28; OPT: bb9: 29; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) 30; OPT-NEXT: ret void 31; 32; GCN-LABEL: break_loop: 33; GCN: ; %bb.0: ; %bb 34; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 35; GCN-NEXT: s_mov_b64 s[0:1], 0 36; GCN-NEXT: s_mov_b32 s2, -1 37; GCN-NEXT: s_waitcnt lgkmcnt(0) 38; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 39; GCN-NEXT: s_mov_b32 s3, 0xf000 40; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 41; GCN-NEXT: ; implicit-def: $sgpr6 42; GCN-NEXT: BB0_1: ; %bb1 43; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 44; GCN-NEXT: s_add_i32 s6, s6, 1 45; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 46; GCN-NEXT: s_cmp_gt_i32 s6, -1 47; GCN-NEXT: s_cbranch_scc1 BB0_3 48; GCN-NEXT: ; %bb.2: ; %bb4 49; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 50; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 51; GCN-NEXT: s_waitcnt vmcnt(0) 52; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 53; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 54; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 55; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 56; GCN-NEXT: BB0_3: ; %Flow 57; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 58; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 59; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 60; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 61; GCN-NEXT: s_cbranch_execnz BB0_1 62; GCN-NEXT: ; %bb.4: ; %bb9 63; GCN-NEXT: s_endpgm 64bb: 65 %id = call i32 @llvm.amdgcn.workitem.id.x() 66 %my.tmp = sub i32 %id, %arg 67 br label %bb1 68 69bb1: 70 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 71 %lsr.iv.next = add i32 %lsr.iv, 1 72 %cmp0 = icmp slt i32 %lsr.iv.next, 0 73 br i1 %cmp0, label %bb4, label %bb9 74 75bb4: 76 %load = load volatile i32, i32 addrspace(1)* undef, align 4 77 %cmp1 = icmp slt i32 %my.tmp, %load 78 br i1 %cmp1, label %bb1, label %bb9 79 80bb9: 81 ret void 82} 83 84define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 85; OPT-LABEL: @undef_phi_cond_break_loop( 86; OPT-NEXT: bb: 87; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 88; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 89; OPT-NEXT: br label [[BB1:%.*]] 90; OPT: bb1: 91; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 92; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 93; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 94; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 95; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 96; OPT: bb4: 97; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 98; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 99; OPT-NEXT: br label [[FLOW]] 100; OPT: Flow: 101; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ] 102; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 103; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 104; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 105; OPT: bb9: 106; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 107; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 108; OPT-NEXT: ret void 109; 110; GCN-LABEL: undef_phi_cond_break_loop: 111; GCN: ; %bb.0: ; %bb 112; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 113; GCN-NEXT: s_mov_b64 s[0:1], 0 114; GCN-NEXT: s_mov_b32 s2, -1 115; GCN-NEXT: s_waitcnt lgkmcnt(0) 116; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 117; GCN-NEXT: s_mov_b32 s3, 0xf000 118; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 119; GCN-NEXT: ; implicit-def: $sgpr4 120; GCN-NEXT: BB1_1: ; %bb1 121; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 122; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 123; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec 124; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 125; GCN-NEXT: s_cmp_gt_i32 s4, -1 126; GCN-NEXT: s_cbranch_scc1 BB1_3 127; GCN-NEXT: ; %bb.2: ; %bb4 128; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 129; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 130; GCN-NEXT: s_waitcnt vmcnt(0) 131; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 132; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 133; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 134; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 135; GCN-NEXT: BB1_3: ; %Flow 136; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 137; GCN-NEXT: s_add_i32 s4, s4, 1 138; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7] 139; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 140; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 141; GCN-NEXT: s_cbranch_execnz BB1_1 142; GCN-NEXT: ; %bb.4: ; %bb9 143; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 144; GCN-NEXT: v_mov_b32_e32 v0, 7 145; GCN-NEXT: s_mov_b32 m0, -1 146; GCN-NEXT: ds_write_b32 v0, v0 147; GCN-NEXT: s_endpgm 148bb: 149 %id = call i32 @llvm.amdgcn.workitem.id.x() 150 %my.tmp = sub i32 %id, %arg 151 br label %bb1 152 153bb1: ; preds = %Flow, %bb 154 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 155 %lsr.iv.next = add i32 %lsr.iv, 1 156 %cmp0 = icmp slt i32 %lsr.iv.next, 0 157 br i1 %cmp0, label %bb4, label %Flow 158 159bb4: ; preds = %bb1 160 %load = load volatile i32, i32 addrspace(1)* undef, align 4 161 %cmp1 = icmp sge i32 %my.tmp, %load 162 br label %Flow 163 164Flow: ; preds = %bb4, %bb1 165 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 166 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 167 br i1 %my.tmp3, label %bb9, label %bb1 168 169bb9: ; preds = %Flow 170 store volatile i32 7, i32 addrspace(3)* undef 171 ret void 172} 173 174; FIXME: ConstantExpr compare of address to null folds away 175@lds = addrspace(3) global i32 undef 176 177define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 178; OPT-LABEL: @constexpr_phi_cond_break_loop( 179; OPT-NEXT: bb: 180; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 181; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 182; OPT-NEXT: br label [[BB1:%.*]] 183; OPT: bb1: 184; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 185; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 186; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 187; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 188; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 189; OPT: bb4: 190; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 191; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 192; OPT-NEXT: br label [[FLOW]] 193; OPT: Flow: 194; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ] 195; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 196; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 197; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 198; OPT: bb9: 199; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 200; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 201; OPT-NEXT: ret void 202; 203; GCN-LABEL: constexpr_phi_cond_break_loop: 204; GCN: ; %bb.0: ; %bb 205; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 206; GCN-NEXT: s_mov_b64 s[0:1], 0 207; GCN-NEXT: s_mov_b32 s2, -1 208; GCN-NEXT: s_waitcnt lgkmcnt(0) 209; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 210; GCN-NEXT: s_mov_b32 s3, 0xf000 211; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 212; GCN-NEXT: ; implicit-def: $sgpr6 213; GCN-NEXT: BB2_1: ; %bb1 214; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 215; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 216; GCN-NEXT: s_cmp_gt_i32 s6, -1 217; GCN-NEXT: s_cbranch_scc1 BB2_3 218; GCN-NEXT: ; %bb.2: ; %bb4 219; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 220; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 221; GCN-NEXT: s_waitcnt vmcnt(0) 222; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 223; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 224; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 225; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 226; GCN-NEXT: BB2_3: ; %Flow 227; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 228; GCN-NEXT: s_add_i32 s6, s6, 1 229; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 230; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 231; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 232; GCN-NEXT: s_cbranch_execnz BB2_1 233; GCN-NEXT: ; %bb.4: ; %bb9 234; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 235; GCN-NEXT: v_mov_b32_e32 v0, 7 236; GCN-NEXT: s_mov_b32 m0, -1 237; GCN-NEXT: ds_write_b32 v0, v0 238; GCN-NEXT: s_endpgm 239bb: 240 %id = call i32 @llvm.amdgcn.workitem.id.x() 241 %my.tmp = sub i32 %id, %arg 242 br label %bb1 243 244bb1: ; preds = %Flow, %bb 245 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 246 %lsr.iv.next = add i32 %lsr.iv, 1 247 %cmp0 = icmp slt i32 %lsr.iv.next, 0 248 br i1 %cmp0, label %bb4, label %Flow 249 250bb4: ; preds = %bb1 251 %load = load volatile i32, i32 addrspace(1)* undef, align 4 252 %cmp1 = icmp sge i32 %my.tmp, %load 253 br label %Flow 254 255Flow: ; preds = %bb4, %bb1 256 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 257 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 258 br i1 %my.tmp3, label %bb9, label %bb1 259 260bb9: ; preds = %Flow 261 store volatile i32 7, i32 addrspace(3)* undef 262 ret void 263} 264 265define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 266; OPT-LABEL: @true_phi_cond_break_loop( 267; OPT-NEXT: bb: 268; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 269; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 270; OPT-NEXT: br label [[BB1:%.*]] 271; OPT: bb1: 272; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 273; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 274; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 275; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 276; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 277; OPT: bb4: 278; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 279; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 280; OPT-NEXT: br label [[FLOW]] 281; OPT: Flow: 282; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 283; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 284; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 285; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 286; OPT: bb9: 287; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 288; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 289; OPT-NEXT: ret void 290; 291; GCN-LABEL: true_phi_cond_break_loop: 292; GCN: ; %bb.0: ; %bb 293; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 294; GCN-NEXT: s_mov_b64 s[0:1], 0 295; GCN-NEXT: s_mov_b32 s2, -1 296; GCN-NEXT: s_waitcnt lgkmcnt(0) 297; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 298; GCN-NEXT: s_mov_b32 s3, 0xf000 299; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 300; GCN-NEXT: ; implicit-def: $sgpr6 301; GCN-NEXT: BB3_1: ; %bb1 302; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 303; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 304; GCN-NEXT: s_cmp_gt_i32 s6, -1 305; GCN-NEXT: s_cbranch_scc1 BB3_3 306; GCN-NEXT: ; %bb.2: ; %bb4 307; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 308; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 309; GCN-NEXT: s_waitcnt vmcnt(0) 310; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 311; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 312; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 313; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 314; GCN-NEXT: BB3_3: ; %Flow 315; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 316; GCN-NEXT: s_add_i32 s6, s6, 1 317; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 318; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 319; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 320; GCN-NEXT: s_cbranch_execnz BB3_1 321; GCN-NEXT: ; %bb.4: ; %bb9 322; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 323; GCN-NEXT: v_mov_b32_e32 v0, 7 324; GCN-NEXT: s_mov_b32 m0, -1 325; GCN-NEXT: ds_write_b32 v0, v0 326; GCN-NEXT: s_endpgm 327bb: 328 %id = call i32 @llvm.amdgcn.workitem.id.x() 329 %my.tmp = sub i32 %id, %arg 330 br label %bb1 331 332bb1: ; preds = %Flow, %bb 333 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 334 %lsr.iv.next = add i32 %lsr.iv, 1 335 %cmp0 = icmp slt i32 %lsr.iv.next, 0 336 br i1 %cmp0, label %bb4, label %Flow 337 338bb4: ; preds = %bb1 339 %load = load volatile i32, i32 addrspace(1)* undef, align 4 340 %cmp1 = icmp sge i32 %my.tmp, %load 341 br label %Flow 342 343Flow: ; preds = %bb4, %bb1 344 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 345 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 346 br i1 %my.tmp3, label %bb9, label %bb1 347 348bb9: ; preds = %Flow 349 store volatile i32 7, i32 addrspace(3)* undef 350 ret void 351} 352 353define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 354; OPT-LABEL: @false_phi_cond_break_loop( 355; OPT-NEXT: bb: 356; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 357; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 358; OPT-NEXT: br label [[BB1:%.*]] 359; OPT: bb1: 360; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 361; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 362; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 363; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 364; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 365; OPT: bb4: 366; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 367; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 368; OPT-NEXT: br label [[FLOW]] 369; OPT: Flow: 370; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ] 371; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 372; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 373; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 374; OPT: bb9: 375; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 376; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 377; OPT-NEXT: ret void 378; 379; GCN-LABEL: false_phi_cond_break_loop: 380; GCN: ; %bb.0: ; %bb 381; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 382; GCN-NEXT: s_mov_b64 s[0:1], 0 383; GCN-NEXT: s_mov_b32 s2, -1 384; GCN-NEXT: s_waitcnt lgkmcnt(0) 385; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 386; GCN-NEXT: s_mov_b32 s3, 0xf000 387; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 388; GCN-NEXT: ; implicit-def: $sgpr6 389; GCN-NEXT: BB4_1: ; %bb1 390; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 391; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 392; GCN-NEXT: s_cmp_gt_i32 s6, -1 393; GCN-NEXT: s_cbranch_scc1 BB4_3 394; GCN-NEXT: ; %bb.2: ; %bb4 395; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 396; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 397; GCN-NEXT: s_waitcnt vmcnt(0) 398; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 399; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 400; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 401; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 402; GCN-NEXT: BB4_3: ; %Flow 403; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 404; GCN-NEXT: s_add_i32 s6, s6, 1 405; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 406; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 407; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 408; GCN-NEXT: s_cbranch_execnz BB4_1 409; GCN-NEXT: ; %bb.4: ; %bb9 410; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 411; GCN-NEXT: v_mov_b32_e32 v0, 7 412; GCN-NEXT: s_mov_b32 m0, -1 413; GCN-NEXT: ds_write_b32 v0, v0 414; GCN-NEXT: s_endpgm 415bb: 416 %id = call i32 @llvm.amdgcn.workitem.id.x() 417 %my.tmp = sub i32 %id, %arg 418 br label %bb1 419 420bb1: ; preds = %Flow, %bb 421 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 422 %lsr.iv.next = add i32 %lsr.iv, 1 423 %cmp0 = icmp slt i32 %lsr.iv.next, 0 424 br i1 %cmp0, label %bb4, label %Flow 425 426bb4: ; preds = %bb1 427 %load = load volatile i32, i32 addrspace(1)* undef, align 4 428 %cmp1 = icmp sge i32 %my.tmp, %load 429 br label %Flow 430 431Flow: ; preds = %bb4, %bb1 432 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 433 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 434 br i1 %my.tmp3, label %bb9, label %bb1 435 436bb9: ; preds = %Flow 437 store volatile i32 7, i32 addrspace(3)* undef 438 ret void 439} 440 441; Swap order of branches in flow block so that the true phi is 442; continue. 443 444define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 445; OPT-LABEL: @invert_true_phi_cond_break_loop( 446; OPT-NEXT: bb: 447; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 448; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 449; OPT-NEXT: br label [[BB1:%.*]] 450; OPT: bb1: 451; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 452; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 453; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 454; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 455; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 456; OPT: bb4: 457; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 458; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 459; OPT-NEXT: br label [[FLOW]] 460; OPT: Flow: 461; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 462; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true 463; OPT-NEXT: [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]]) 464; OPT-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]]) 465; OPT-NEXT: br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]] 466; OPT: bb9: 467; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]]) 468; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 469; OPT-NEXT: ret void 470; 471; GCN-LABEL: invert_true_phi_cond_break_loop: 472; GCN: ; %bb.0: ; %bb 473; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 474; GCN-NEXT: s_mov_b64 s[0:1], 0 475; GCN-NEXT: s_mov_b32 s2, -1 476; GCN-NEXT: s_waitcnt lgkmcnt(0) 477; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 478; GCN-NEXT: s_mov_b32 s3, 0xf000 479; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 480; GCN-NEXT: ; implicit-def: $sgpr6 481; GCN-NEXT: BB5_1: ; %bb1 482; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 483; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 484; GCN-NEXT: s_cmp_gt_i32 s6, -1 485; GCN-NEXT: s_cbranch_scc1 BB5_3 486; GCN-NEXT: ; %bb.2: ; %bb4 487; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 488; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 489; GCN-NEXT: s_waitcnt vmcnt(0) 490; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 491; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 492; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 493; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 494; GCN-NEXT: BB5_3: ; %Flow 495; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 496; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1 497; GCN-NEXT: s_add_i32 s6, s6, 1 498; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9] 499; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 500; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 501; GCN-NEXT: s_cbranch_execnz BB5_1 502; GCN-NEXT: ; %bb.4: ; %bb9 503; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 504; GCN-NEXT: v_mov_b32_e32 v0, 7 505; GCN-NEXT: s_mov_b32 m0, -1 506; GCN-NEXT: ds_write_b32 v0, v0 507; GCN-NEXT: s_endpgm 508bb: 509 %id = call i32 @llvm.amdgcn.workitem.id.x() 510 %my.tmp = sub i32 %id, %arg 511 br label %bb1 512 513bb1: ; preds = %Flow, %bb 514 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 515 %lsr.iv.next = add i32 %lsr.iv, 1 516 %cmp0 = icmp slt i32 %lsr.iv.next, 0 517 br i1 %cmp0, label %bb4, label %Flow 518 519bb4: ; preds = %bb1 520 %load = load volatile i32, i32 addrspace(1)* undef, align 4 521 %cmp1 = icmp sge i32 %my.tmp, %load 522 br label %Flow 523 524Flow: ; preds = %bb4, %bb1 525 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 526 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 527 br i1 %my.tmp3, label %bb1, label %bb9 528 529bb9: ; preds = %Flow 530 store volatile i32 7, i32 addrspace(3)* undef 531 ret void 532} 533 534declare i32 @llvm.amdgcn.workitem.id.x() #1 535 536attributes #0 = { nounwind } 537attributes #1 = { nounwind readnone } 538