1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s 4 5define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { 6; SI-LABEL: break_inserted_outside_of_loop: 7; SI: ; %bb.0: ; %main_body 8; SI-NEXT: s_load_dword s2, s[0:1], 0xb 9; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 10; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 11; SI-NEXT: s_waitcnt lgkmcnt(0) 12; SI-NEXT: v_and_b32_e32 v0, s2, v0 13; SI-NEXT: v_and_b32_e32 v0, 1, v0 14; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 15; SI-NEXT: s_mov_b64 s[2:3], 0 16; SI-NEXT: .LBB0_1: ; %ENDIF 17; SI-NEXT: ; =>This Inner Loop Header: Depth=1 18; SI-NEXT: s_and_b64 s[4:5], exec, vcc 19; SI-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] 20; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] 21; SI-NEXT: s_cbranch_execnz .LBB0_1 22; SI-NEXT: ; %bb.2: ; %ENDLOOP 23; SI-NEXT: s_or_b64 exec, exec, s[2:3] 24; SI-NEXT: s_mov_b32 s3, 0xf000 25; SI-NEXT: s_mov_b32 s2, -1 26; SI-NEXT: v_mov_b32_e32 v0, 0 27; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 28; SI-NEXT: s_endpgm 29; 30; FLAT-LABEL: break_inserted_outside_of_loop: 31; FLAT: ; %bb.0: ; %main_body 32; FLAT-NEXT: s_load_dword s2, s[0:1], 0x2c 33; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 34; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 35; FLAT-NEXT: s_waitcnt lgkmcnt(0) 36; FLAT-NEXT: v_and_b32_e32 v0, s2, v0 37; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 38; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 39; FLAT-NEXT: s_mov_b64 s[2:3], 0 40; FLAT-NEXT: .LBB0_1: ; %ENDIF 41; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 42; FLAT-NEXT: s_and_b64 s[4:5], exec, vcc 43; FLAT-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] 44; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] 45; FLAT-NEXT: s_cbranch_execnz .LBB0_1 46; FLAT-NEXT: ; %bb.2: ; %ENDLOOP 47; FLAT-NEXT: s_or_b64 exec, exec, s[2:3] 48; FLAT-NEXT: s_mov_b32 s3, 0xf000 49; FLAT-NEXT: s_mov_b32 s2, -1 50; FLAT-NEXT: v_mov_b32_e32 v0, 0 51; FLAT-NEXT: buffer_store_dword v0, off, s[0:3], 0 52; FLAT-NEXT: s_endpgm 53main_body: 54 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 55 %0 = and i32 %a, %tid 56 %1 = trunc i32 %0 to i1 57 br label %ENDIF 58 59ENDLOOP: 60 store i32 0, i32 addrspace(1)* %out 61 ret void 62 63ENDIF: 64 br i1 %1, label %ENDLOOP, label %ENDIF 65} 66 67define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 68; SI-LABEL: phi_cond_outside_loop: 69; SI: ; %bb.0: ; %entry 70; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 71; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 72; SI-NEXT: s_mov_b64 s[2:3], 0 73; SI-NEXT: s_mov_b64 s[4:5], 0 74; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc 75; SI-NEXT: s_cbranch_execz .LBB1_2 76; SI-NEXT: ; %bb.1: ; %else 77; SI-NEXT: s_load_dword s0, s[0:1], 0x9 78; SI-NEXT: s_waitcnt lgkmcnt(0) 79; SI-NEXT: s_cmp_eq_u32 s0, 0 80; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 81; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec 82; SI-NEXT: .LBB1_2: ; %endif 83; SI-NEXT: s_or_b64 exec, exec, s[6:7] 84; SI-NEXT: .LBB1_3: ; %loop 85; SI-NEXT: ; =>This Inner Loop Header: Depth=1 86; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 87; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 88; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] 89; SI-NEXT: s_cbranch_execnz .LBB1_3 90; SI-NEXT: ; %bb.4: ; %exit 91; SI-NEXT: s_endpgm 92; 93; FLAT-LABEL: phi_cond_outside_loop: 94; FLAT: ; %bb.0: ; %entry 95; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 96; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 97; FLAT-NEXT: s_mov_b64 s[2:3], 0 98; FLAT-NEXT: s_mov_b64 s[4:5], 0 99; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc 100; FLAT-NEXT: s_cbranch_execz .LBB1_2 101; FLAT-NEXT: ; %bb.1: ; %else 102; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24 103; FLAT-NEXT: s_waitcnt lgkmcnt(0) 104; FLAT-NEXT: s_cmp_eq_u32 s0, 0 105; FLAT-NEXT: s_cselect_b64 s[0:1], -1, 0 106; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec 107; FLAT-NEXT: .LBB1_2: ; %endif 108; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] 109; FLAT-NEXT: .LBB1_3: ; %loop 110; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 111; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 112; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 113; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] 114; FLAT-NEXT: s_cbranch_execnz .LBB1_3 115; FLAT-NEXT: ; %bb.4: ; %exit 116; FLAT-NEXT: s_endpgm 117entry: 118 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 119 %0 = icmp eq i32 %tid , 0 120 br i1 %0, label %if, label %else 121 122if: 123 br label %endif 124 125else: 126 %1 = icmp eq i32 %b, 0 127 br label %endif 128 129endif: 130 %2 = phi i1 [0, %if], [%1, %else] 131 br label %loop 132 133loop: 134 br i1 %2, label %exit, label %loop 135 136exit: 137 ret void 138} 139 140define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { 141; SI-LABEL: switch_unreachable: 142; SI: ; %bb.0: ; %centry 143; 144; FLAT-LABEL: switch_unreachable: 145; FLAT: ; %bb.0: ; %centry 146centry: 147 switch i32 %x, label %sw.default [ 148 i32 0, label %sw.bb 149 i32 60, label %sw.bb 150 ] 151 152sw.bb: 153 unreachable 154 155sw.default: 156 unreachable 157 158sw.epilog: 159 ret void 160} 161 162declare float @llvm.fabs.f32(float) nounwind readnone 163 164define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 165; SI-LABEL: loop_land_info_assert: 166; SI: ; %bb.0: ; %entry 167; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 168; SI-NEXT: s_load_dword s6, s[0:1], 0x0 169; SI-NEXT: s_load_dword s14, s[0:1], 0xc 170; SI-NEXT: v_bfrev_b32_e32 v0, 44 171; SI-NEXT: s_waitcnt lgkmcnt(0) 172; SI-NEXT: s_cmp_lt_i32 s2, 1 173; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 174; SI-NEXT: s_cmp_lt_i32 s3, 4 175; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 176; SI-NEXT: s_cmp_gt_i32 s3, 3 177; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 178; SI-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 179; SI-NEXT: v_cmp_lt_f32_e64 s[6:7], |s6|, v0 180; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 181; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3] 182; SI-NEXT: s_and_b64 s[4:5], exec, s[6:7] 183; SI-NEXT: s_mov_b32 s7, 0xf000 184; SI-NEXT: s_mov_b32 s6, -1 185; SI-NEXT: v_mov_b32_e32 v0, 3 186; SI-NEXT: s_branch .LBB3_4 187; SI-NEXT: .LBB3_1: ; %Flow6 188; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 189; SI-NEXT: s_mov_b64 s[8:9], 0 190; SI-NEXT: .LBB3_2: ; %Flow5 191; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 192; SI-NEXT: s_mov_b64 s[12:13], 0 193; SI-NEXT: .LBB3_3: ; %Flow 194; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 195; SI-NEXT: s_and_b64 vcc, exec, s[10:11] 196; SI-NEXT: s_cbranch_vccnz .LBB3_8 197; SI-NEXT: .LBB3_4: ; %while.cond 198; SI-NEXT: ; =>This Inner Loop Header: Depth=1 199; SI-NEXT: s_mov_b64 s[12:13], -1 200; SI-NEXT: s_mov_b64 s[8:9], -1 201; SI-NEXT: s_mov_b64 s[10:11], -1 202; SI-NEXT: s_mov_b64 vcc, s[0:1] 203; SI-NEXT: s_cbranch_vccz .LBB3_3 204; SI-NEXT: ; %bb.5: ; %convex.exit 205; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 206; SI-NEXT: s_mov_b64 s[8:9], -1 207; SI-NEXT: s_mov_b64 s[10:11], -1 208; SI-NEXT: s_mov_b64 vcc, s[2:3] 209; SI-NEXT: s_cbranch_vccz .LBB3_2 210; SI-NEXT: ; %bb.6: ; %if.end 211; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 212; SI-NEXT: s_mov_b64 s[10:11], -1 213; SI-NEXT: s_mov_b64 vcc, s[4:5] 214; SI-NEXT: s_cbranch_vccz .LBB3_1 215; SI-NEXT: ; %bb.7: ; %if.else 216; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 217; SI-NEXT: s_mov_b64 s[10:11], 0 218; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 219; SI-NEXT: s_waitcnt vmcnt(0) 220; SI-NEXT: s_branch .LBB3_1 221; SI-NEXT: .LBB3_8: ; %loop.exit.guard4 222; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 223; SI-NEXT: s_and_b64 vcc, exec, s[8:9] 224; SI-NEXT: s_cbranch_vccz .LBB3_4 225; SI-NEXT: ; %bb.9: ; %loop.exit.guard 226; SI-NEXT: s_and_b64 vcc, exec, s[12:13] 227; SI-NEXT: s_cbranch_vccz .LBB3_13 228; SI-NEXT: ; %bb.10: ; %for.cond.preheader 229; SI-NEXT: s_cmpk_lt_i32 s14, 0x3e8 230; SI-NEXT: s_cbranch_scc0 .LBB3_13 231; SI-NEXT: ; %bb.11: ; %for.body 232; SI-NEXT: s_and_b64 vcc, exec, 0 233; SI-NEXT: .LBB3_12: ; %self.loop 234; SI-NEXT: ; =>This Inner Loop Header: Depth=1 235; SI-NEXT: s_mov_b64 vcc, vcc 236; SI-NEXT: s_cbranch_vccz .LBB3_12 237; SI-NEXT: .LBB3_13: ; %DummyReturnBlock 238; SI-NEXT: s_endpgm 239; 240; FLAT-LABEL: loop_land_info_assert: 241; FLAT: ; %bb.0: ; %entry 242; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 243; FLAT-NEXT: s_load_dword s6, s[0:1], 0x0 244; FLAT-NEXT: s_load_dword s14, s[0:1], 0x30 245; FLAT-NEXT: v_bfrev_b32_e32 v0, 44 246; FLAT-NEXT: s_waitcnt lgkmcnt(0) 247; FLAT-NEXT: s_cmp_lt_i32 s2, 1 248; FLAT-NEXT: s_cselect_b64 s[0:1], -1, 0 249; FLAT-NEXT: s_cmp_lt_i32 s3, 4 250; FLAT-NEXT: s_cselect_b64 s[4:5], -1, 0 251; FLAT-NEXT: s_cmp_gt_i32 s3, 3 252; FLAT-NEXT: s_cselect_b64 s[2:3], -1, 0 253; FLAT-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 254; FLAT-NEXT: v_cmp_lt_f32_e64 s[6:7], |s6|, v0 255; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 256; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3] 257; FLAT-NEXT: s_and_b64 s[4:5], exec, s[6:7] 258; FLAT-NEXT: s_mov_b32 s7, 0xf000 259; FLAT-NEXT: s_mov_b32 s6, -1 260; FLAT-NEXT: v_mov_b32_e32 v0, 3 261; FLAT-NEXT: s_branch .LBB3_4 262; FLAT-NEXT: .LBB3_1: ; %Flow6 263; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 264; FLAT-NEXT: s_mov_b64 s[8:9], 0 265; FLAT-NEXT: .LBB3_2: ; %Flow5 266; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 267; FLAT-NEXT: s_mov_b64 s[12:13], 0 268; FLAT-NEXT: .LBB3_3: ; %Flow 269; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 270; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11] 271; FLAT-NEXT: s_cbranch_vccnz .LBB3_8 272; FLAT-NEXT: .LBB3_4: ; %while.cond 273; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 274; FLAT-NEXT: s_mov_b64 s[12:13], -1 275; FLAT-NEXT: s_mov_b64 s[8:9], -1 276; FLAT-NEXT: s_mov_b64 s[10:11], -1 277; FLAT-NEXT: s_mov_b64 vcc, s[0:1] 278; FLAT-NEXT: s_cbranch_vccz .LBB3_3 279; FLAT-NEXT: ; %bb.5: ; %convex.exit 280; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 281; FLAT-NEXT: s_mov_b64 s[8:9], -1 282; FLAT-NEXT: s_mov_b64 s[10:11], -1 283; FLAT-NEXT: s_mov_b64 vcc, s[2:3] 284; FLAT-NEXT: s_cbranch_vccz .LBB3_2 285; FLAT-NEXT: ; %bb.6: ; %if.end 286; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 287; FLAT-NEXT: s_mov_b64 s[10:11], -1 288; FLAT-NEXT: s_mov_b64 vcc, s[4:5] 289; FLAT-NEXT: s_cbranch_vccz .LBB3_1 290; FLAT-NEXT: ; %bb.7: ; %if.else 291; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 292; FLAT-NEXT: s_mov_b64 s[10:11], 0 293; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 294; FLAT-NEXT: s_waitcnt vmcnt(0) 295; FLAT-NEXT: s_branch .LBB3_1 296; FLAT-NEXT: .LBB3_8: ; %loop.exit.guard4 297; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 298; FLAT-NEXT: s_and_b64 vcc, exec, s[8:9] 299; FLAT-NEXT: s_cbranch_vccz .LBB3_4 300; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard 301; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] 302; FLAT-NEXT: s_cbranch_vccz .LBB3_13 303; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader 304; FLAT-NEXT: s_cmpk_lt_i32 s14, 0x3e8 305; FLAT-NEXT: s_cbranch_scc0 .LBB3_13 306; FLAT-NEXT: ; %bb.11: ; %for.body 307; FLAT-NEXT: s_and_b64 vcc, exec, 0 308; FLAT-NEXT: .LBB3_12: ; %self.loop 309; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 310; FLAT-NEXT: s_mov_b64 vcc, vcc 311; FLAT-NEXT: s_cbranch_vccz .LBB3_12 312; FLAT-NEXT: .LBB3_13: ; %DummyReturnBlock 313; FLAT-NEXT: s_endpgm 314entry: 315 %cmp = icmp sgt i32 %c0, 0 316 br label %while.cond.outer 317 318while.cond.outer: 319 %tmp = load float, float addrspace(1)* undef 320 br label %while.cond 321 322while.cond: 323 %cmp1 = icmp slt i32 %c1, 4 324 br i1 %cmp1, label %convex.exit, label %for.cond 325 326convex.exit: 327 %or = or i1 %cmp, %cmp1 328 br i1 %or, label %return, label %if.end 329 330if.end: 331 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 332 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 333 br i1 %cmp2, label %if.else, label %while.cond.outer 334 335if.else: 336 store volatile i32 3, i32 addrspace(1)* undef, align 4 337 br label %while.cond 338 339for.cond: 340 %cmp3 = icmp slt i32 %c3, 1000 341 br i1 %cmp3, label %for.body, label %return 342 343for.body: 344 br i1 %cmp3, label %self.loop, label %if.end.2 345 346if.end.2: 347 %or.cond2 = or i1 %cmp3, %arg 348 br i1 %or.cond2, label %return, label %for.cond 349 350self.loop: 351 br label %self.loop 352 353return: 354 ret void 355} 356 357declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 358 359attributes #0 = { nounwind readnone } 360