1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s 3 4; Uses llvm.amdgcn.break 5 6define amdgpu_kernel void @break_loop(i32 %arg) #0 { 7; OPT-LABEL: @break_loop( 8; OPT-NEXT: bb: 9; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 10; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 11; OPT-NEXT: br label [[BB1:%.*]] 12; OPT: bb1: 13; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 14; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 15; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 16; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 17; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 18; OPT: bb4: 19; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 20; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 21; OPT-NEXT: br label [[FLOW]] 22; OPT: Flow: 23; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 24; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) 25; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) 26; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] 27; OPT: bb9: 28; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) 29; OPT-NEXT: ret void 30; 31; GCN-LABEL: break_loop: 32; GCN: ; %bb.0: ; %bb 33; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 34; GCN-NEXT: s_mov_b64 s[0:1], 0 35; GCN-NEXT: s_mov_b32 s2, -1 36; GCN-NEXT: s_waitcnt lgkmcnt(0) 37; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 38; GCN-NEXT: s_mov_b32 s3, 0xf000 39; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 40; GCN-NEXT: ; implicit-def: $sgpr6 41; GCN-NEXT: .LBB0_1: ; %bb1 42; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 43; GCN-NEXT: s_add_i32 s6, s6, 1 44; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 45; GCN-NEXT: s_cmp_gt_i32 s6, -1 46; GCN-NEXT: s_cbranch_scc1 .LBB0_3 47; GCN-NEXT: ; %bb.2: ; %bb4 48; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 49; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 50; GCN-NEXT: s_waitcnt vmcnt(0) 51; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 52; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 53; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 54; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 55; GCN-NEXT: .LBB0_3: ; %Flow 56; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 57; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 58; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 59; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 60; GCN-NEXT: s_cbranch_execnz .LBB0_1 61; GCN-NEXT: ; %bb.4: ; %bb9 62; GCN-NEXT: s_endpgm 63bb: 64 %id = call i32 @llvm.amdgcn.workitem.id.x() 65 %my.tmp = sub i32 %id, %arg 66 br label %bb1 67 68bb1: 69 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 70 %lsr.iv.next = add i32 %lsr.iv, 1 71 %cmp0 = icmp slt i32 %lsr.iv.next, 0 72 br i1 %cmp0, label %bb4, label %bb9 73 74bb4: 75 %load = load volatile i32, i32 addrspace(1)* undef, align 4 76 %cmp1 = icmp slt i32 %my.tmp, %load 77 br i1 %cmp1, label %bb1, label %bb9 78 79bb9: 80 ret void 81} 82 83define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 84; OPT-LABEL: @undef_phi_cond_break_loop( 85; OPT-NEXT: bb: 86; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 87; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 88; OPT-NEXT: br label [[BB1:%.*]] 89; OPT: bb1: 90; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 91; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 92; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 93; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 94; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 95; OPT: bb4: 96; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 97; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 98; OPT-NEXT: br label [[FLOW]] 99; OPT: Flow: 100; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ] 101; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 102; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 103; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 104; OPT: bb9: 105; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 106; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 107; OPT-NEXT: ret void 108; 109; GCN-LABEL: undef_phi_cond_break_loop: 110; GCN: ; %bb.0: ; %bb 111; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 112; GCN-NEXT: s_mov_b64 s[0:1], 0 113; GCN-NEXT: s_mov_b32 s2, -1 114; GCN-NEXT: s_waitcnt lgkmcnt(0) 115; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 116; GCN-NEXT: s_mov_b32 s3, 0xf000 117; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 118; GCN-NEXT: ; implicit-def: $sgpr6 119; GCN-NEXT: .LBB1_1: ; %bb1 120; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 121; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 122; GCN-NEXT: s_cmp_gt_i32 s6, -1 123; GCN-NEXT: s_cbranch_scc1 .LBB1_3 124; GCN-NEXT: ; %bb.2: ; %bb4 125; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 126; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 127; GCN-NEXT: s_waitcnt vmcnt(0) 128; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 129; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 130; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 131; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 132; GCN-NEXT: .LBB1_3: ; %Flow 133; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 134; GCN-NEXT: s_add_i32 s6, s6, 1 135; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 136; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 137; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 138; GCN-NEXT: s_cbranch_execnz .LBB1_1 139; GCN-NEXT: ; %bb.4: ; %bb9 140; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 141; GCN-NEXT: v_mov_b32_e32 v0, 7 142; GCN-NEXT: s_mov_b32 m0, -1 143; GCN-NEXT: ds_write_b32 v0, v0 144; GCN-NEXT: s_endpgm 145bb: 146 %id = call i32 @llvm.amdgcn.workitem.id.x() 147 %my.tmp = sub i32 %id, %arg 148 br label %bb1 149 150bb1: ; preds = %Flow, %bb 151 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 152 %lsr.iv.next = add i32 %lsr.iv, 1 153 %cmp0 = icmp slt i32 %lsr.iv.next, 0 154 br i1 %cmp0, label %bb4, label %Flow 155 156bb4: ; preds = %bb1 157 %load = load volatile i32, i32 addrspace(1)* undef, align 4 158 %cmp1 = icmp sge i32 %my.tmp, %load 159 br label %Flow 160 161Flow: ; preds = %bb4, %bb1 162 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 163 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 164 br i1 %my.tmp3, label %bb9, label %bb1 165 166bb9: ; preds = %Flow 167 store volatile i32 7, i32 addrspace(3)* undef 168 ret void 169} 170 171; FIXME: ConstantExpr compare of address to null folds away 172@lds = addrspace(3) global i32 undef 173 174define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 175; OPT-LABEL: @constexpr_phi_cond_break_loop( 176; OPT-NEXT: bb: 177; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 178; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 179; OPT-NEXT: br label [[BB1:%.*]] 180; OPT: bb1: 181; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 182; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 183; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 184; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 185; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 186; OPT: bb4: 187; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 188; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 189; OPT-NEXT: br label [[FLOW]] 190; OPT: Flow: 191; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ] 192; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 193; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 194; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 195; OPT: bb9: 196; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 197; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 198; OPT-NEXT: ret void 199; 200; GCN-LABEL: constexpr_phi_cond_break_loop: 201; GCN: ; %bb.0: ; %bb 202; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 203; GCN-NEXT: s_mov_b64 s[0:1], 0 204; GCN-NEXT: s_mov_b32 s2, -1 205; GCN-NEXT: s_waitcnt lgkmcnt(0) 206; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 207; GCN-NEXT: s_mov_b32 s3, 0xf000 208; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 209; GCN-NEXT: ; implicit-def: $sgpr6 210; GCN-NEXT: .LBB2_1: ; %bb1 211; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 212; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 213; GCN-NEXT: s_cmp_gt_i32 s6, -1 214; GCN-NEXT: s_cbranch_scc1 .LBB2_3 215; GCN-NEXT: ; %bb.2: ; %bb4 216; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 217; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 218; GCN-NEXT: s_waitcnt vmcnt(0) 219; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 220; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 221; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 222; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 223; GCN-NEXT: .LBB2_3: ; %Flow 224; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 225; GCN-NEXT: s_add_i32 s6, s6, 1 226; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 227; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 228; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 229; GCN-NEXT: s_cbranch_execnz .LBB2_1 230; GCN-NEXT: ; %bb.4: ; %bb9 231; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 232; GCN-NEXT: v_mov_b32_e32 v0, 7 233; GCN-NEXT: s_mov_b32 m0, -1 234; GCN-NEXT: ds_write_b32 v0, v0 235; GCN-NEXT: s_endpgm 236bb: 237 %id = call i32 @llvm.amdgcn.workitem.id.x() 238 %my.tmp = sub i32 %id, %arg 239 br label %bb1 240 241bb1: ; preds = %Flow, %bb 242 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 243 %lsr.iv.next = add i32 %lsr.iv, 1 244 %cmp0 = icmp slt i32 %lsr.iv.next, 0 245 br i1 %cmp0, label %bb4, label %Flow 246 247bb4: ; preds = %bb1 248 %load = load volatile i32, i32 addrspace(1)* undef, align 4 249 %cmp1 = icmp sge i32 %my.tmp, %load 250 br label %Flow 251 252Flow: ; preds = %bb4, %bb1 253 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 254 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 255 br i1 %my.tmp3, label %bb9, label %bb1 256 257bb9: ; preds = %Flow 258 store volatile i32 7, i32 addrspace(3)* undef 259 ret void 260} 261 262define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 263; OPT-LABEL: @true_phi_cond_break_loop( 264; OPT-NEXT: bb: 265; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 266; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 267; OPT-NEXT: br label [[BB1:%.*]] 268; OPT: bb1: 269; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 270; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 271; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 272; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 273; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 274; OPT: bb4: 275; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 276; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 277; OPT-NEXT: br label [[FLOW]] 278; OPT: Flow: 279; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 280; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 281; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 282; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 283; OPT: bb9: 284; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 285; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 286; OPT-NEXT: ret void 287; 288; GCN-LABEL: true_phi_cond_break_loop: 289; GCN: ; %bb.0: ; %bb 290; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 291; GCN-NEXT: s_mov_b64 s[0:1], 0 292; GCN-NEXT: s_mov_b32 s2, -1 293; GCN-NEXT: s_waitcnt lgkmcnt(0) 294; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 295; GCN-NEXT: s_mov_b32 s3, 0xf000 296; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 297; GCN-NEXT: ; implicit-def: $sgpr6 298; GCN-NEXT: .LBB3_1: ; %bb1 299; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 300; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 301; GCN-NEXT: s_cmp_gt_i32 s6, -1 302; GCN-NEXT: s_cbranch_scc1 .LBB3_3 303; GCN-NEXT: ; %bb.2: ; %bb4 304; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 305; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 306; GCN-NEXT: s_waitcnt vmcnt(0) 307; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 308; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 309; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 310; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 311; GCN-NEXT: .LBB3_3: ; %Flow 312; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 313; GCN-NEXT: s_add_i32 s6, s6, 1 314; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 315; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 316; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 317; GCN-NEXT: s_cbranch_execnz .LBB3_1 318; GCN-NEXT: ; %bb.4: ; %bb9 319; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 320; GCN-NEXT: v_mov_b32_e32 v0, 7 321; GCN-NEXT: s_mov_b32 m0, -1 322; GCN-NEXT: ds_write_b32 v0, v0 323; GCN-NEXT: s_endpgm 324bb: 325 %id = call i32 @llvm.amdgcn.workitem.id.x() 326 %my.tmp = sub i32 %id, %arg 327 br label %bb1 328 329bb1: ; preds = %Flow, %bb 330 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 331 %lsr.iv.next = add i32 %lsr.iv, 1 332 %cmp0 = icmp slt i32 %lsr.iv.next, 0 333 br i1 %cmp0, label %bb4, label %Flow 334 335bb4: ; preds = %bb1 336 %load = load volatile i32, i32 addrspace(1)* undef, align 4 337 %cmp1 = icmp sge i32 %my.tmp, %load 338 br label %Flow 339 340Flow: ; preds = %bb4, %bb1 341 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 342 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 343 br i1 %my.tmp3, label %bb9, label %bb1 344 345bb9: ; preds = %Flow 346 store volatile i32 7, i32 addrspace(3)* undef 347 ret void 348} 349 350define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 351; OPT-LABEL: @false_phi_cond_break_loop( 352; OPT-NEXT: bb: 353; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 354; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 355; OPT-NEXT: br label [[BB1:%.*]] 356; OPT: bb1: 357; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 358; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 359; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 360; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 361; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 362; OPT: bb4: 363; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 364; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 365; OPT-NEXT: br label [[FLOW]] 366; OPT: Flow: 367; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ] 368; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 369; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 370; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 371; OPT: bb9: 372; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 373; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 374; OPT-NEXT: ret void 375; 376; GCN-LABEL: false_phi_cond_break_loop: 377; GCN: ; %bb.0: ; %bb 378; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 379; GCN-NEXT: s_mov_b64 s[0:1], 0 380; GCN-NEXT: s_mov_b32 s2, -1 381; GCN-NEXT: s_waitcnt lgkmcnt(0) 382; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 383; GCN-NEXT: s_mov_b32 s3, 0xf000 384; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 385; GCN-NEXT: ; implicit-def: $sgpr6 386; GCN-NEXT: .LBB4_1: ; %bb1 387; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 388; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 389; GCN-NEXT: s_cmp_gt_i32 s6, -1 390; GCN-NEXT: s_cbranch_scc1 .LBB4_3 391; GCN-NEXT: ; %bb.2: ; %bb4 392; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 393; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 394; GCN-NEXT: s_waitcnt vmcnt(0) 395; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 396; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 397; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 398; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 399; GCN-NEXT: .LBB4_3: ; %Flow 400; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 401; GCN-NEXT: s_add_i32 s6, s6, 1 402; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 403; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 404; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 405; GCN-NEXT: s_cbranch_execnz .LBB4_1 406; GCN-NEXT: ; %bb.4: ; %bb9 407; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 408; GCN-NEXT: v_mov_b32_e32 v0, 7 409; GCN-NEXT: s_mov_b32 m0, -1 410; GCN-NEXT: ds_write_b32 v0, v0 411; GCN-NEXT: s_endpgm 412bb: 413 %id = call i32 @llvm.amdgcn.workitem.id.x() 414 %my.tmp = sub i32 %id, %arg 415 br label %bb1 416 417bb1: ; preds = %Flow, %bb 418 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 419 %lsr.iv.next = add i32 %lsr.iv, 1 420 %cmp0 = icmp slt i32 %lsr.iv.next, 0 421 br i1 %cmp0, label %bb4, label %Flow 422 423bb4: ; preds = %bb1 424 %load = load volatile i32, i32 addrspace(1)* undef, align 4 425 %cmp1 = icmp sge i32 %my.tmp, %load 426 br label %Flow 427 428Flow: ; preds = %bb4, %bb1 429 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 430 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 431 br i1 %my.tmp3, label %bb9, label %bb1 432 433bb9: ; preds = %Flow 434 store volatile i32 7, i32 addrspace(3)* undef 435 ret void 436} 437 438; Swap order of branches in flow block so that the true phi is 439; continue. 440 441define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 442; OPT-LABEL: @invert_true_phi_cond_break_loop( 443; OPT-NEXT: bb: 444; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 445; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 446; OPT-NEXT: br label [[BB1:%.*]] 447; OPT: bb1: 448; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 449; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 450; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 451; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 452; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 453; OPT: bb4: 454; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 455; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 456; OPT-NEXT: br label [[FLOW]] 457; OPT: Flow: 458; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 459; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true 460; OPT-NEXT: [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]]) 461; OPT-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]]) 462; OPT-NEXT: br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]] 463; OPT: bb9: 464; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]]) 465; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 466; OPT-NEXT: ret void 467; 468; GCN-LABEL: invert_true_phi_cond_break_loop: 469; GCN: ; %bb.0: ; %bb 470; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 471; GCN-NEXT: s_mov_b64 s[0:1], 0 472; GCN-NEXT: s_mov_b32 s2, -1 473; GCN-NEXT: s_waitcnt lgkmcnt(0) 474; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 475; GCN-NEXT: s_mov_b32 s3, 0xf000 476; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 477; GCN-NEXT: ; implicit-def: $sgpr6 478; GCN-NEXT: .LBB5_1: ; %bb1 479; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 480; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 481; GCN-NEXT: s_cmp_gt_i32 s6, -1 482; GCN-NEXT: s_cbranch_scc1 .LBB5_3 483; GCN-NEXT: ; %bb.2: ; %bb4 484; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 485; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc 486; GCN-NEXT: s_waitcnt vmcnt(0) 487; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 488; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 489; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 490; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 491; GCN-NEXT: .LBB5_3: ; %Flow 492; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 493; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1 494; GCN-NEXT: s_add_i32 s6, s6, 1 495; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9] 496; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 497; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 498; GCN-NEXT: s_cbranch_execnz .LBB5_1 499; GCN-NEXT: ; %bb.4: ; %bb9 500; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 501; GCN-NEXT: v_mov_b32_e32 v0, 7 502; GCN-NEXT: s_mov_b32 m0, -1 503; GCN-NEXT: ds_write_b32 v0, v0 504; GCN-NEXT: s_endpgm 505bb: 506 %id = call i32 @llvm.amdgcn.workitem.id.x() 507 %my.tmp = sub i32 %id, %arg 508 br label %bb1 509 510bb1: ; preds = %Flow, %bb 511 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 512 %lsr.iv.next = add i32 %lsr.iv, 1 513 %cmp0 = icmp slt i32 %lsr.iv.next, 0 514 br i1 %cmp0, label %bb4, label %Flow 515 516bb4: ; preds = %bb1 517 %load = load volatile i32, i32 addrspace(1)* undef, align 4 518 %cmp1 = icmp sge i32 %my.tmp, %load 519 br label %Flow 520 521Flow: ; preds = %bb4, %bb1 522 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 523 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 524 br i1 %my.tmp3, label %bb1, label %bb9 525 526bb9: ; preds = %Flow 527 store volatile i32 7, i32 addrspace(3)* undef 528 ret void 529} 530 531declare i32 @llvm.amdgcn.workitem.id.x() #1 532 533attributes #0 = { nounwind } 534attributes #1 = { nounwind readnone } 535