1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 3; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 4; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s 5 6; Uses llvm.amdgcn.break 7 8define amdgpu_kernel void @break_loop(i32 %arg) #0 { 9; OPT-LABEL: @break_loop( 10; OPT-NEXT: bb: 11; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 12; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 13; OPT-NEXT: br label [[BB1:%.*]] 14; OPT: bb1: 15; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 16; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ] 17; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 18; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 19; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 20; OPT: bb4: 21; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 22; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]] 23; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true 24; OPT-NEXT: br label [[FLOW]] 25; OPT: Flow: 26; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ] 27; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) 28; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) 29; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] 30; OPT: bb9: 31; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]) 32; OPT-NEXT: ret void 33; 34; GCN-LABEL: break_loop: 35; GCN: ; %bb.0: ; %bb 36; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 37; GCN-NEXT: s_mov_b64 s[0:1], 0 38; GCN-NEXT: s_mov_b32 s2, -1 39; GCN-NEXT: s_waitcnt lgkmcnt(0) 40; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 41; GCN-NEXT: s_mov_b32 s3, 0xf000 42; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 43; GCN-NEXT: ; implicit-def: $sgpr6 44; GCN-NEXT: BB0_1: ; %bb1 45; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 46; GCN-NEXT: s_add_i32 s6, s6, 1 47; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 48; GCN-NEXT: s_cmp_gt_i32 s6, -1 49; GCN-NEXT: s_cbranch_scc1 BB0_3 50; GCN-NEXT: ; %bb.2: ; %bb4 51; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 52; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 53; GCN-NEXT: s_waitcnt vmcnt(0) 54; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 55; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 56; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 57; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 58; GCN-NEXT: BB0_3: ; %Flow 59; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1 60; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 61; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 62; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 63; GCN-NEXT: s_cbranch_execnz BB0_1 64; GCN-NEXT: ; %bb.4: ; %bb9 65; GCN-NEXT: s_endpgm 66bb: 67 %id = call i32 @llvm.amdgcn.workitem.id.x() 68 %my.tmp = sub i32 %id, %arg 69 br label %bb1 70 71bb1: 72 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 73 %lsr.iv.next = add i32 %lsr.iv, 1 74 %cmp0 = icmp slt i32 %lsr.iv.next, 0 75 br i1 %cmp0, label %bb4, label %bb9 76 77bb4: 78 %load = load volatile i32, i32 addrspace(1)* undef, align 4 79 %cmp1 = icmp slt i32 %my.tmp, %load 80 br i1 %cmp1, label %bb1, label %bb9 81 82bb9: 83 ret void 84} 85 86define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 87; OPT-LABEL: @undef_phi_cond_break_loop( 88; OPT-NEXT: bb: 89; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 90; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 91; OPT-NEXT: br label [[BB1:%.*]] 92; OPT: bb1: 93; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 94; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 95; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 96; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 97; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 98; OPT: bb4: 99; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 100; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 101; OPT-NEXT: br label [[FLOW]] 102; OPT: Flow: 103; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 104; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ] 105; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 106; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 107; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 108; OPT: bb9: 109; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 110; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 111; OPT-NEXT: ret void 112; 113; GCN-LABEL: undef_phi_cond_break_loop: 114; GCN: ; %bb.0: ; %bb 115; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 116; GCN-NEXT: s_mov_b64 s[0:1], 0 117; GCN-NEXT: s_mov_b32 s2, -1 118; GCN-NEXT: s_waitcnt lgkmcnt(0) 119; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 120; GCN-NEXT: s_mov_b32 s3, 0xf000 121; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 122; GCN-NEXT: ; implicit-def: $sgpr4 123; GCN-NEXT: BB1_1: ; %bb1 124; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 125; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 126; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec 127; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 128; GCN-NEXT: s_cmp_gt_i32 s4, -1 129; GCN-NEXT: s_cbranch_scc1 BB1_3 130; GCN-NEXT: ; %bb.2: ; %bb4 131; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 132; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 133; GCN-NEXT: s_waitcnt vmcnt(0) 134; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 135; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec 136; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 137; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 138; GCN-NEXT: BB1_3: ; %Flow 139; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1 140; GCN-NEXT: s_add_i32 s4, s4, 1 141; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7] 142; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 143; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 144; GCN-NEXT: s_cbranch_execnz BB1_1 145; GCN-NEXT: ; %bb.4: ; %bb9 146; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 147; GCN-NEXT: v_mov_b32_e32 v0, 7 148; GCN-NEXT: s_mov_b32 m0, -1 149; GCN-NEXT: ds_write_b32 v0, v0 150; GCN-NEXT: s_endpgm 151bb: 152 %id = call i32 @llvm.amdgcn.workitem.id.x() 153 %my.tmp = sub i32 %id, %arg 154 br label %bb1 155 156bb1: ; preds = %Flow, %bb 157 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 158 %lsr.iv.next = add i32 %lsr.iv, 1 159 %cmp0 = icmp slt i32 %lsr.iv.next, 0 160 br i1 %cmp0, label %bb4, label %Flow 161 162bb4: ; preds = %bb1 163 %load = load volatile i32, i32 addrspace(1)* undef, align 4 164 %cmp1 = icmp sge i32 %my.tmp, %load 165 br label %Flow 166 167Flow: ; preds = %bb4, %bb1 168 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 169 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 170 br i1 %my.tmp3, label %bb9, label %bb1 171 172bb9: ; preds = %Flow 173 store volatile i32 7, i32 addrspace(3)* undef 174 ret void 175} 176 177; FIXME: ConstantExpr compare of address to null folds away 178@lds = addrspace(3) global i32 undef 179 180define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 181; OPT-LABEL: @constexpr_phi_cond_break_loop( 182; OPT-NEXT: bb: 183; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 184; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 185; OPT-NEXT: br label [[BB1:%.*]] 186; OPT: bb1: 187; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 188; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 189; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 190; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 191; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 192; OPT: bb4: 193; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 194; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 195; OPT-NEXT: br label [[FLOW]] 196; OPT: Flow: 197; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 198; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ] 199; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 200; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 201; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 202; OPT: bb9: 203; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 204; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 205; OPT-NEXT: ret void 206; 207; GCN-LABEL: constexpr_phi_cond_break_loop: 208; GCN: ; %bb.0: ; %bb 209; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 210; GCN-NEXT: s_mov_b64 s[0:1], 0 211; GCN-NEXT: s_mov_b32 s2, lds@abs32@lo 212; GCN-NEXT: s_mov_b32 s6, -1 213; GCN-NEXT: s_waitcnt lgkmcnt(0) 214; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 215; GCN-NEXT: s_mov_b32 s7, 0xf000 216; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 217; GCN-NEXT: ; implicit-def: $sgpr3 218; GCN-NEXT: BB2_1: ; %bb1 219; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 220; GCN-NEXT: v_cmp_ne_u32_e64 s[8:9], s2, 4 221; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 222; GCN-NEXT: s_and_b64 s[8:9], s[8:9], exec 223; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 224; GCN-NEXT: s_cmp_gt_i32 s3, -1 225; GCN-NEXT: s_cbranch_scc1 BB2_3 226; GCN-NEXT: ; %bb.2: ; %bb4 227; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 228; GCN-NEXT: buffer_load_dword v1, off, s[4:7], 0 229; GCN-NEXT: s_waitcnt vmcnt(0) 230; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 231; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 232; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 233; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 234; GCN-NEXT: BB2_3: ; %Flow 235; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1 236; GCN-NEXT: s_add_i32 s3, s3, 1 237; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 238; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 239; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 240; GCN-NEXT: s_cbranch_execnz BB2_1 241; GCN-NEXT: ; %bb.4: ; %bb9 242; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 243; GCN-NEXT: v_mov_b32_e32 v0, 7 244; GCN-NEXT: s_mov_b32 m0, -1 245; GCN-NEXT: ds_write_b32 v0, v0 246; GCN-NEXT: s_endpgm 247bb: 248 %id = call i32 @llvm.amdgcn.workitem.id.x() 249 %my.tmp = sub i32 %id, %arg 250 br label %bb1 251 252bb1: ; preds = %Flow, %bb 253 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 254 %lsr.iv.next = add i32 %lsr.iv, 1 255 %cmp0 = icmp slt i32 %lsr.iv.next, 0 256 br i1 %cmp0, label %bb4, label %Flow 257 258bb4: ; preds = %bb1 259 %load = load volatile i32, i32 addrspace(1)* undef, align 4 260 %cmp1 = icmp sge i32 %my.tmp, %load 261 br label %Flow 262 263Flow: ; preds = %bb4, %bb1 264 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 265 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 266 br i1 %my.tmp3, label %bb9, label %bb1 267 268bb9: ; preds = %Flow 269 store volatile i32 7, i32 addrspace(3)* undef 270 ret void 271} 272 273define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 274; OPT-LABEL: @true_phi_cond_break_loop( 275; OPT-NEXT: bb: 276; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 277; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 278; OPT-NEXT: br label [[BB1:%.*]] 279; OPT: bb1: 280; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 281; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 282; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 283; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 284; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 285; OPT: bb4: 286; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 287; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 288; OPT-NEXT: br label [[FLOW]] 289; OPT: Flow: 290; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 291; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 292; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 293; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 294; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 295; OPT: bb9: 296; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 297; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 298; OPT-NEXT: ret void 299; 300; GCN-LABEL: true_phi_cond_break_loop: 301; GCN: ; %bb.0: ; %bb 302; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 303; GCN-NEXT: s_mov_b64 s[0:1], 0 304; GCN-NEXT: s_mov_b32 s2, -1 305; GCN-NEXT: s_waitcnt lgkmcnt(0) 306; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 307; GCN-NEXT: s_mov_b32 s3, 0xf000 308; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 309; GCN-NEXT: ; implicit-def: $sgpr6 310; GCN-NEXT: BB3_1: ; %bb1 311; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 312; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 313; GCN-NEXT: s_cmp_gt_i32 s6, -1 314; GCN-NEXT: s_cbranch_scc1 BB3_3 315; GCN-NEXT: ; %bb.2: ; %bb4 316; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 317; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 318; GCN-NEXT: s_waitcnt vmcnt(0) 319; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 320; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 321; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 322; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 323; GCN-NEXT: BB3_3: ; %Flow 324; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1 325; GCN-NEXT: s_add_i32 s6, s6, 1 326; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 327; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 328; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 329; GCN-NEXT: s_cbranch_execnz BB3_1 330; GCN-NEXT: ; %bb.4: ; %bb9 331; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 332; GCN-NEXT: v_mov_b32_e32 v0, 7 333; GCN-NEXT: s_mov_b32 m0, -1 334; GCN-NEXT: ds_write_b32 v0, v0 335; GCN-NEXT: s_endpgm 336bb: 337 %id = call i32 @llvm.amdgcn.workitem.id.x() 338 %my.tmp = sub i32 %id, %arg 339 br label %bb1 340 341bb1: ; preds = %Flow, %bb 342 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 343 %lsr.iv.next = add i32 %lsr.iv, 1 344 %cmp0 = icmp slt i32 %lsr.iv.next, 0 345 br i1 %cmp0, label %bb4, label %Flow 346 347bb4: ; preds = %bb1 348 %load = load volatile i32, i32 addrspace(1)* undef, align 4 349 %cmp1 = icmp sge i32 %my.tmp, %load 350 br label %Flow 351 352Flow: ; preds = %bb4, %bb1 353 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 354 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 355 br i1 %my.tmp3, label %bb9, label %bb1 356 357bb9: ; preds = %Flow 358 store volatile i32 7, i32 addrspace(3)* undef 359 ret void 360} 361 362define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 363; OPT-LABEL: @false_phi_cond_break_loop( 364; OPT-NEXT: bb: 365; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 366; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 367; OPT-NEXT: br label [[BB1:%.*]] 368; OPT: bb1: 369; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 370; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 371; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 372; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 373; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 374; OPT: bb4: 375; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 376; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 377; OPT-NEXT: br label [[FLOW]] 378; OPT: Flow: 379; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 380; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ] 381; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]]) 382; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) 383; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]] 384; OPT: bb9: 385; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) 386; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 387; OPT-NEXT: ret void 388; 389; GCN-LABEL: false_phi_cond_break_loop: 390; GCN: ; %bb.0: ; %bb 391; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 392; GCN-NEXT: s_mov_b64 s[0:1], 0 393; GCN-NEXT: s_mov_b32 s2, -1 394; GCN-NEXT: s_waitcnt lgkmcnt(0) 395; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 396; GCN-NEXT: s_mov_b32 s3, 0xf000 397; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 398; GCN-NEXT: ; implicit-def: $sgpr6 399; GCN-NEXT: BB4_1: ; %bb1 400; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 401; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 402; GCN-NEXT: s_cmp_gt_i32 s6, -1 403; GCN-NEXT: s_cbranch_scc1 BB4_3 404; GCN-NEXT: ; %bb.2: ; %bb4 405; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 406; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 407; GCN-NEXT: s_waitcnt vmcnt(0) 408; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 409; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 410; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 411; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 412; GCN-NEXT: BB4_3: ; %Flow 413; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 414; GCN-NEXT: s_add_i32 s6, s6, 1 415; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] 416; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 417; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 418; GCN-NEXT: s_cbranch_execnz BB4_1 419; GCN-NEXT: ; %bb.4: ; %bb9 420; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 421; GCN-NEXT: v_mov_b32_e32 v0, 7 422; GCN-NEXT: s_mov_b32 m0, -1 423; GCN-NEXT: ds_write_b32 v0, v0 424; GCN-NEXT: s_endpgm 425bb: 426 %id = call i32 @llvm.amdgcn.workitem.id.x() 427 %my.tmp = sub i32 %id, %arg 428 br label %bb1 429 430bb1: ; preds = %Flow, %bb 431 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 432 %lsr.iv.next = add i32 %lsr.iv, 1 433 %cmp0 = icmp slt i32 %lsr.iv.next, 0 434 br i1 %cmp0, label %bb4, label %Flow 435 436bb4: ; preds = %bb1 437 %load = load volatile i32, i32 addrspace(1)* undef, align 4 438 %cmp1 = icmp sge i32 %my.tmp, %load 439 br label %Flow 440 441Flow: ; preds = %bb4, %bb1 442 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 443 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 444 br i1 %my.tmp3, label %bb9, label %bb1 445 446bb9: ; preds = %Flow 447 store volatile i32 7, i32 addrspace(3)* undef 448 ret void 449} 450 451; Swap order of branches in flow block so that the true phi is 452; continue. 453 454define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 455; OPT-LABEL: @invert_true_phi_cond_break_loop( 456; OPT-NEXT: bb: 457; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 458; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]] 459; OPT-NEXT: br label [[BB1:%.*]] 460; OPT: bb1: 461; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ] 462; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ] 463; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1 464; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0 465; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] 466; OPT: bb4: 467; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 468; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] 469; OPT-NEXT: br label [[FLOW]] 470; OPT: Flow: 471; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ] 472; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] 473; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true 474; OPT-NEXT: [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]]) 475; OPT-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]]) 476; OPT-NEXT: br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]] 477; OPT: bb9: 478; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]]) 479; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 480; OPT-NEXT: ret void 481; 482; GCN-LABEL: invert_true_phi_cond_break_loop: 483; GCN: ; %bb.0: ; %bb 484; GCN-NEXT: s_load_dword s3, s[0:1], 0x9 485; GCN-NEXT: s_mov_b64 s[0:1], 0 486; GCN-NEXT: s_mov_b32 s2, -1 487; GCN-NEXT: s_waitcnt lgkmcnt(0) 488; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0 489; GCN-NEXT: s_mov_b32 s3, 0xf000 490; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 491; GCN-NEXT: ; implicit-def: $sgpr6 492; GCN-NEXT: BB5_1: ; %bb1 493; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 494; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec 495; GCN-NEXT: s_cmp_gt_i32 s6, -1 496; GCN-NEXT: s_cbranch_scc1 BB5_3 497; GCN-NEXT: ; %bb.2: ; %bb4 498; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 499; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 500; GCN-NEXT: s_waitcnt vmcnt(0) 501; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1 502; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 503; GCN-NEXT: s_and_b64 s[8:9], vcc, exec 504; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 505; GCN-NEXT: BB5_3: ; %Flow 506; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1 507; GCN-NEXT: s_add_i32 s6, s6, 1 508; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1 509; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9] 510; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 511; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] 512; GCN-NEXT: s_cbranch_execnz BB5_1 513; GCN-NEXT: ; %bb.4: ; %bb9 514; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 515; GCN-NEXT: v_mov_b32_e32 v0, 7 516; GCN-NEXT: s_mov_b32 m0, -1 517; GCN-NEXT: ds_write_b32 v0, v0 518; GCN-NEXT: s_endpgm 519bb: 520 %id = call i32 @llvm.amdgcn.workitem.id.x() 521 %my.tmp = sub i32 %id, %arg 522 br label %bb1 523 524bb1: ; preds = %Flow, %bb 525 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 526 %lsr.iv.next = add i32 %lsr.iv, 1 527 %cmp0 = icmp slt i32 %lsr.iv.next, 0 528 br i1 %cmp0, label %bb4, label %Flow 529 530bb4: ; preds = %bb1 531 %load = load volatile i32, i32 addrspace(1)* undef, align 4 532 %cmp1 = icmp sge i32 %my.tmp, %load 533 br label %Flow 534 535Flow: ; preds = %bb4, %bb1 536 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 537 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 538 br i1 %my.tmp3, label %bb1, label %bb9 539 540bb9: ; preds = %Flow 541 store volatile i32 7, i32 addrspace(3)* undef 542 ret void 543} 544 545declare i32 @llvm.amdgcn.workitem.id.x() #1 546 547attributes #0 = { nounwind } 548attributes #1 = { nounwind readnone } 549