1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s 4 5define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { 6; SI-LABEL: break_inserted_outside_of_loop: 7; SI: ; %bb.0: ; %main_body 8; SI-NEXT: s_load_dword s2, s[0:1], 0xb 9; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 10; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 11; SI-NEXT: s_waitcnt lgkmcnt(0) 12; SI-NEXT: v_and_b32_e32 v0, s2, v0 13; SI-NEXT: v_and_b32_e32 v0, 1, v0 14; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 15; SI-NEXT: s_mov_b64 s[2:3], 0 16; SI-NEXT: .LBB0_1: ; %ENDIF 17; SI-NEXT: ; =>This Inner Loop Header: Depth=1 18; SI-NEXT: s_and_b64 s[4:5], exec, vcc 19; SI-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] 20; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] 21; SI-NEXT: s_cbranch_execnz .LBB0_1 22; SI-NEXT: ; %bb.2: ; %ENDLOOP 23; SI-NEXT: s_or_b64 exec, exec, s[2:3] 24; SI-NEXT: s_mov_b32 s3, 0xf000 25; SI-NEXT: s_mov_b32 s2, -1 26; SI-NEXT: v_mov_b32_e32 v0, 0 27; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 28; SI-NEXT: s_endpgm 29; 30; FLAT-LABEL: break_inserted_outside_of_loop: 31; FLAT: ; %bb.0: ; %main_body 32; FLAT-NEXT: s_load_dword s2, s[0:1], 0x2c 33; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 34; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 35; FLAT-NEXT: s_waitcnt lgkmcnt(0) 36; FLAT-NEXT: v_and_b32_e32 v0, s2, v0 37; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 38; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 39; FLAT-NEXT: s_mov_b64 s[2:3], 0 40; FLAT-NEXT: .LBB0_1: ; %ENDIF 41; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 42; FLAT-NEXT: s_and_b64 s[4:5], exec, vcc 43; FLAT-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] 44; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] 45; FLAT-NEXT: s_cbranch_execnz .LBB0_1 46; FLAT-NEXT: ; %bb.2: ; %ENDLOOP 47; FLAT-NEXT: s_or_b64 exec, exec, s[2:3] 48; FLAT-NEXT: s_mov_b32 s3, 0xf000 49; FLAT-NEXT: s_mov_b32 s2, -1 50; FLAT-NEXT: v_mov_b32_e32 v0, 0 51; FLAT-NEXT: buffer_store_dword v0, off, s[0:3], 0 52; FLAT-NEXT: s_endpgm 53main_body: 54 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 55 %0 = and i32 %a, %tid 56 %1 = trunc i32 %0 to i1 57 br label %ENDIF 58 59ENDLOOP: 60 store i32 0, i32 addrspace(1)* %out 61 ret void 62 63ENDIF: 64 br i1 %1, label %ENDLOOP, label %ENDIF 65} 66 67define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 68; SI-LABEL: phi_cond_outside_loop: 69; SI: ; %bb.0: ; %entry 70; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 71; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 72; SI-NEXT: s_mov_b64 s[2:3], 0 73; SI-NEXT: s_mov_b64 s[4:5], 0 74; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc 75; SI-NEXT: s_cbranch_execz .LBB1_2 76; SI-NEXT: ; %bb.1: ; %else 77; SI-NEXT: s_load_dword s0, s[0:1], 0x9 78; SI-NEXT: s_waitcnt lgkmcnt(0) 79; SI-NEXT: s_cmp_eq_u32 s0, 0 80; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 81; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec 82; SI-NEXT: .LBB1_2: ; %endif 83; SI-NEXT: s_or_b64 exec, exec, s[6:7] 84; SI-NEXT: .LBB1_3: ; %loop 85; SI-NEXT: ; =>This Inner Loop Header: Depth=1 86; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 87; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 88; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] 89; SI-NEXT: s_cbranch_execnz .LBB1_3 90; SI-NEXT: ; %bb.4: ; %exit 91; SI-NEXT: s_endpgm 92; 93; FLAT-LABEL: phi_cond_outside_loop: 94; FLAT: ; %bb.0: ; %entry 95; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 96; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 97; FLAT-NEXT: s_mov_b64 s[2:3], 0 98; FLAT-NEXT: s_mov_b64 s[4:5], 0 99; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc 100; FLAT-NEXT: s_cbranch_execz .LBB1_2 101; FLAT-NEXT: ; %bb.1: ; %else 102; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24 103; FLAT-NEXT: s_waitcnt lgkmcnt(0) 104; FLAT-NEXT: s_cmp_eq_u32 s0, 0 105; FLAT-NEXT: s_cselect_b64 s[0:1], -1, 0 106; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec 107; FLAT-NEXT: .LBB1_2: ; %endif 108; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] 109; FLAT-NEXT: .LBB1_3: ; %loop 110; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 111; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 112; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 113; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] 114; FLAT-NEXT: s_cbranch_execnz .LBB1_3 115; FLAT-NEXT: ; %bb.4: ; %exit 116; FLAT-NEXT: s_endpgm 117entry: 118 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 119 %0 = icmp eq i32 %tid , 0 120 br i1 %0, label %if, label %else 121 122if: 123 br label %endif 124 125else: 126 %1 = icmp eq i32 %b, 0 127 br label %endif 128 129endif: 130 %2 = phi i1 [0, %if], [%1, %else] 131 br label %loop 132 133loop: 134 br i1 %2, label %exit, label %loop 135 136exit: 137 ret void 138} 139 140define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { 141; SI-LABEL: switch_unreachable: 142; SI: ; %bb.0: ; %centry 143; 144; FLAT-LABEL: switch_unreachable: 145; FLAT: ; %bb.0: ; %centry 146centry: 147 switch i32 %x, label %sw.default [ 148 i32 0, label %sw.bb 149 i32 60, label %sw.bb 150 ] 151 152sw.bb: 153 unreachable 154 155sw.default: 156 unreachable 157 158sw.epilog: 159 ret void 160} 161 162declare float @llvm.fabs.f32(float) nounwind readnone 163 164define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 165; SI-LABEL: loop_land_info_assert: 166; SI: ; %bb.0: ; %entry 167; SI-NEXT: s_mov_b32 s7, 0xf000 168; SI-NEXT: s_mov_b32 s6, -1 169; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 170; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 171; SI-NEXT: s_load_dword s14, s[0:1], 0xc 172; SI-NEXT: s_brev_b32 s8, 44 173; SI-NEXT: s_waitcnt lgkmcnt(0) 174; SI-NEXT: s_cmp_lt_i32 s2, 1 175; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 176; SI-NEXT: s_cmp_lt_i32 s3, 4 177; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 178; SI-NEXT: s_cmp_gt_i32 s3, 3 179; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 180; SI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3] 181; SI-NEXT: s_and_b64 s[0:1], exec, s[0:1] 182; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3] 183; SI-NEXT: s_waitcnt vmcnt(0) 184; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s8 185; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5] 186; SI-NEXT: v_mov_b32_e32 v0, 3 187; SI-NEXT: s_branch .LBB3_4 188; SI-NEXT: .LBB3_1: ; %Flow6 189; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 190; SI-NEXT: s_mov_b64 s[8:9], 0 191; SI-NEXT: .LBB3_2: ; %Flow5 192; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 193; SI-NEXT: s_mov_b64 s[12:13], 0 194; SI-NEXT: .LBB3_3: ; %Flow 195; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 196; SI-NEXT: s_and_b64 vcc, exec, s[10:11] 197; SI-NEXT: s_cbranch_vccnz .LBB3_8 198; SI-NEXT: .LBB3_4: ; %while.cond 199; SI-NEXT: ; =>This Inner Loop Header: Depth=1 200; SI-NEXT: s_mov_b64 s[12:13], -1 201; SI-NEXT: s_mov_b64 s[8:9], -1 202; SI-NEXT: s_mov_b64 s[10:11], -1 203; SI-NEXT: s_mov_b64 vcc, s[0:1] 204; SI-NEXT: s_cbranch_vccz .LBB3_3 205; SI-NEXT: ; %bb.5: ; %convex.exit 206; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 207; SI-NEXT: s_mov_b64 s[8:9], -1 208; SI-NEXT: s_mov_b64 s[10:11], -1 209; SI-NEXT: s_mov_b64 vcc, s[2:3] 210; SI-NEXT: s_cbranch_vccz .LBB3_2 211; SI-NEXT: ; %bb.6: ; %if.end 212; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 213; SI-NEXT: s_mov_b64 s[10:11], -1 214; SI-NEXT: s_mov_b64 vcc, s[4:5] 215; SI-NEXT: s_cbranch_vccz .LBB3_1 216; SI-NEXT: ; %bb.7: ; %if.else 217; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 218; SI-NEXT: s_mov_b64 s[10:11], 0 219; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 220; SI-NEXT: s_waitcnt vmcnt(0) 221; SI-NEXT: s_branch .LBB3_1 222; SI-NEXT: .LBB3_8: ; %loop.exit.guard4 223; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 224; SI-NEXT: s_and_b64 vcc, exec, s[8:9] 225; SI-NEXT: s_cbranch_vccz .LBB3_4 226; SI-NEXT: ; %bb.9: ; %loop.exit.guard 227; SI-NEXT: s_and_b64 vcc, exec, s[12:13] 228; SI-NEXT: s_cbranch_vccz .LBB3_13 229; SI-NEXT: ; %bb.10: ; %for.cond.preheader 230; SI-NEXT: s_cmpk_lt_i32 s14, 0x3e8 231; SI-NEXT: s_cbranch_scc0 .LBB3_13 232; SI-NEXT: ; %bb.11: ; %for.body 233; SI-NEXT: s_and_b64 vcc, exec, 0 234; SI-NEXT: .LBB3_12: ; %self.loop 235; SI-NEXT: ; =>This Inner Loop Header: Depth=1 236; SI-NEXT: s_mov_b64 vcc, vcc 237; SI-NEXT: s_cbranch_vccz .LBB3_12 238; SI-NEXT: .LBB3_13: ; %DummyReturnBlock 239; SI-NEXT: s_endpgm 240; 241; FLAT-LABEL: loop_land_info_assert: 242; FLAT: ; %bb.0: ; %entry 243; FLAT-NEXT: s_mov_b32 s7, 0xf000 244; FLAT-NEXT: s_mov_b32 s6, -1 245; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0 246; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 247; FLAT-NEXT: s_load_dword s14, s[0:1], 0x30 248; FLAT-NEXT: s_brev_b32 s8, 44 249; FLAT-NEXT: s_waitcnt lgkmcnt(0) 250; FLAT-NEXT: s_cmp_lt_i32 s2, 1 251; FLAT-NEXT: s_cselect_b64 s[4:5], -1, 0 252; FLAT-NEXT: s_cmp_lt_i32 s3, 4 253; FLAT-NEXT: s_cselect_b64 s[0:1], -1, 0 254; FLAT-NEXT: s_cmp_gt_i32 s3, 3 255; FLAT-NEXT: s_cselect_b64 s[2:3], -1, 0 256; FLAT-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3] 257; FLAT-NEXT: s_and_b64 s[0:1], exec, s[0:1] 258; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3] 259; FLAT-NEXT: s_waitcnt vmcnt(0) 260; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s8 261; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5] 262; FLAT-NEXT: v_mov_b32_e32 v0, 3 263; FLAT-NEXT: s_branch .LBB3_4 264; FLAT-NEXT: .LBB3_1: ; %Flow6 265; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 266; FLAT-NEXT: s_mov_b64 s[8:9], 0 267; FLAT-NEXT: .LBB3_2: ; %Flow5 268; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 269; FLAT-NEXT: s_mov_b64 s[12:13], 0 270; FLAT-NEXT: .LBB3_3: ; %Flow 271; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 272; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11] 273; FLAT-NEXT: s_cbranch_vccnz .LBB3_8 274; FLAT-NEXT: .LBB3_4: ; %while.cond 275; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 276; FLAT-NEXT: s_mov_b64 s[12:13], -1 277; FLAT-NEXT: s_mov_b64 s[8:9], -1 278; FLAT-NEXT: s_mov_b64 s[10:11], -1 279; FLAT-NEXT: s_mov_b64 vcc, s[0:1] 280; FLAT-NEXT: s_cbranch_vccz .LBB3_3 281; FLAT-NEXT: ; %bb.5: ; %convex.exit 282; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 283; FLAT-NEXT: s_mov_b64 s[8:9], -1 284; FLAT-NEXT: s_mov_b64 s[10:11], -1 285; FLAT-NEXT: s_mov_b64 vcc, s[2:3] 286; FLAT-NEXT: s_cbranch_vccz .LBB3_2 287; FLAT-NEXT: ; %bb.6: ; %if.end 288; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 289; FLAT-NEXT: s_mov_b64 s[10:11], -1 290; FLAT-NEXT: s_mov_b64 vcc, s[4:5] 291; FLAT-NEXT: s_cbranch_vccz .LBB3_1 292; FLAT-NEXT: ; %bb.7: ; %if.else 293; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 294; FLAT-NEXT: s_mov_b64 s[10:11], 0 295; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 296; FLAT-NEXT: s_waitcnt vmcnt(0) 297; FLAT-NEXT: s_branch .LBB3_1 298; FLAT-NEXT: .LBB3_8: ; %loop.exit.guard4 299; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 300; FLAT-NEXT: s_and_b64 vcc, exec, s[8:9] 301; FLAT-NEXT: s_cbranch_vccz .LBB3_4 302; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard 303; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] 304; FLAT-NEXT: s_cbranch_vccz .LBB3_13 305; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader 306; FLAT-NEXT: s_cmpk_lt_i32 s14, 0x3e8 307; FLAT-NEXT: s_cbranch_scc0 .LBB3_13 308; FLAT-NEXT: ; %bb.11: ; %for.body 309; FLAT-NEXT: s_and_b64 vcc, exec, 0 310; FLAT-NEXT: .LBB3_12: ; %self.loop 311; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 312; FLAT-NEXT: s_mov_b64 vcc, vcc 313; FLAT-NEXT: s_cbranch_vccz .LBB3_12 314; FLAT-NEXT: .LBB3_13: ; %DummyReturnBlock 315; FLAT-NEXT: s_endpgm 316entry: 317 %cmp = icmp sgt i32 %c0, 0 318 br label %while.cond.outer 319 320while.cond.outer: 321 %tmp = load float, float addrspace(1)* undef 322 br label %while.cond 323 324while.cond: 325 %cmp1 = icmp slt i32 %c1, 4 326 br i1 %cmp1, label %convex.exit, label %for.cond 327 328convex.exit: 329 %or = or i1 %cmp, %cmp1 330 br i1 %or, label %return, label %if.end 331 332if.end: 333 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 334 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 335 br i1 %cmp2, label %if.else, label %while.cond.outer 336 337if.else: 338 store volatile i32 3, i32 addrspace(1)* undef, align 4 339 br label %while.cond 340 341for.cond: 342 %cmp3 = icmp slt i32 %c3, 1000 343 br i1 %cmp3, label %for.body, label %return 344 345for.body: 346 br i1 %cmp3, label %self.loop, label %if.end.2 347 348if.end.2: 349 %or.cond2 = or i1 %cmp3, %arg 350 br i1 %or.cond2, label %return, label %for.cond 351 352self.loop: 353 br label %self.loop 354 355return: 356 ret void 357} 358 359declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 360 361attributes #0 = { nounwind readnone } 362