1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s 4 5define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { 6; SI-LABEL: break_inserted_outside_of_loop: 7; SI: ; %bb.0: ; %main_body 8; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 9; SI-NEXT: s_load_dword s0, s[0:1], 0xb 10; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 11; SI-NEXT: s_waitcnt lgkmcnt(0) 12; SI-NEXT: v_and_b32_e32 v0, s0, v0 13; SI-NEXT: v_and_b32_e32 v0, 1, v0 14; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 15; SI-NEXT: s_mov_b64 s[0:1], 0 16; SI-NEXT: BB0_1: ; %ENDIF 17; SI-NEXT: ; =>This Inner Loop Header: Depth=1 18; SI-NEXT: s_and_b64 s[2:3], exec, vcc 19; SI-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 20; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 21; SI-NEXT: s_cbranch_execnz BB0_1 22; SI-NEXT: ; %bb.2: ; %ENDLOOP 23; SI-NEXT: s_or_b64 exec, exec, s[0:1] 24; SI-NEXT: s_mov_b32 s7, 0xf000 25; SI-NEXT: s_mov_b32 s6, -1 26; SI-NEXT: v_mov_b32_e32 v0, 0 27; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 28; SI-NEXT: s_endpgm 29; 30; FLAT-LABEL: break_inserted_outside_of_loop: 31; FLAT: ; %bb.0: ; %main_body 32; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 33; FLAT-NEXT: s_load_dword s0, s[0:1], 0x2c 34; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 35; FLAT-NEXT: s_waitcnt lgkmcnt(0) 36; FLAT-NEXT: v_and_b32_e32 v0, s0, v0 37; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 38; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 39; FLAT-NEXT: s_mov_b64 s[0:1], 0 40; FLAT-NEXT: BB0_1: ; %ENDIF 41; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 42; FLAT-NEXT: s_and_b64 s[2:3], exec, vcc 43; FLAT-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 44; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1] 45; FLAT-NEXT: s_cbranch_execnz BB0_1 46; FLAT-NEXT: ; %bb.2: ; %ENDLOOP 47; FLAT-NEXT: s_or_b64 exec, exec, s[0:1] 48; FLAT-NEXT: s_mov_b32 s7, 0xf000 49; FLAT-NEXT: s_mov_b32 s6, -1 50; FLAT-NEXT: v_mov_b32_e32 v0, 0 51; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 52; FLAT-NEXT: s_endpgm 53main_body: 54 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 55 %0 = and i32 %a, %tid 56 %1 = trunc i32 %0 to i1 57 br label %ENDIF 58 59ENDLOOP: 60 store i32 0, i32 addrspace(1)* %out 61 ret void 62 63ENDIF: 64 br i1 %1, label %ENDLOOP, label %ENDIF 65} 66 67define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 68; SI-LABEL: phi_cond_outside_loop: 69; SI: ; %bb.0: ; %entry 70; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 71; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 72; SI-NEXT: s_mov_b64 s[2:3], 0 73; SI-NEXT: s_mov_b64 s[4:5], 0 74; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc 75; SI-NEXT: s_cbranch_execz BB1_2 76; SI-NEXT: ; %bb.1: ; %else 77; SI-NEXT: s_load_dword s0, s[0:1], 0x9 78; SI-NEXT: s_waitcnt lgkmcnt(0) 79; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 80; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec 81; SI-NEXT: BB1_2: ; %endif 82; SI-NEXT: s_or_b64 exec, exec, s[6:7] 83; SI-NEXT: BB1_3: ; %loop 84; SI-NEXT: ; =>This Inner Loop Header: Depth=1 85; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 86; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 87; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] 88; SI-NEXT: s_cbranch_execnz BB1_3 89; SI-NEXT: ; %bb.4: ; %exit 90; SI-NEXT: s_endpgm 91; 92; FLAT-LABEL: phi_cond_outside_loop: 93; FLAT: ; %bb.0: ; %entry 94; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 95; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 96; FLAT-NEXT: s_mov_b64 s[2:3], 0 97; FLAT-NEXT: s_mov_b64 s[4:5], 0 98; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc 99; FLAT-NEXT: s_cbranch_execz BB1_2 100; FLAT-NEXT: ; %bb.1: ; %else 101; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24 102; FLAT-NEXT: s_waitcnt lgkmcnt(0) 103; FLAT-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 104; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec 105; FLAT-NEXT: BB1_2: ; %endif 106; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] 107; FLAT-NEXT: BB1_3: ; %loop 108; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 109; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 110; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 111; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] 112; FLAT-NEXT: s_cbranch_execnz BB1_3 113; FLAT-NEXT: ; %bb.4: ; %exit 114; FLAT-NEXT: s_endpgm 115entry: 116 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 117 %0 = icmp eq i32 %tid , 0 118 br i1 %0, label %if, label %else 119 120if: 121 br label %endif 122 123else: 124 %1 = icmp eq i32 %b, 0 125 br label %endif 126 127endif: 128 %2 = phi i1 [0, %if], [%1, %else] 129 br label %loop 130 131loop: 132 br i1 %2, label %exit, label %loop 133 134exit: 135 ret void 136} 137 138define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { 139; SI-LABEL: switch_unreachable: 140; SI: ; %bb.0: ; %centry 141; 142; FLAT-LABEL: switch_unreachable: 143; FLAT: ; %bb.0: ; %centry 144centry: 145 switch i32 %x, label %sw.default [ 146 i32 0, label %sw.bb 147 i32 60, label %sw.bb 148 ] 149 150sw.bb: 151 unreachable 152 153sw.default: 154 unreachable 155 156sw.epilog: 157 ret void 158} 159 160declare float @llvm.fabs.f32(float) nounwind readnone 161 162define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 163; SI-LABEL: loop_land_info_assert: 164; SI: ; %bb.0: ; %entry 165; SI-NEXT: s_mov_b32 s7, 0xf000 166; SI-NEXT: s_mov_b32 s6, -1 167; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 168; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 169; SI-NEXT: s_load_dword s8, s[0:1], 0xc 170; SI-NEXT: s_brev_b32 s9, 44 171; SI-NEXT: s_waitcnt lgkmcnt(0) 172; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 173; SI-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 174; SI-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3 175; SI-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 176; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 177; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3] 178; SI-NEXT: s_waitcnt vmcnt(0) 179; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 180; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5] 181; SI-NEXT: v_mov_b32_e32 v0, 3 182; SI-NEXT: s_branch BB3_4 183; SI-NEXT: BB3_1: ; %Flow6 184; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 185; SI-NEXT: s_mov_b64 s[10:11], 0 186; SI-NEXT: BB3_2: ; %Flow5 187; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 188; SI-NEXT: s_mov_b64 s[14:15], 0 189; SI-NEXT: BB3_3: ; %Flow 190; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 191; SI-NEXT: s_and_b64 vcc, exec, s[12:13] 192; SI-NEXT: s_cbranch_vccnz BB3_8 193; SI-NEXT: BB3_4: ; %while.cond 194; SI-NEXT: ; =>This Inner Loop Header: Depth=1 195; SI-NEXT: s_mov_b64 s[14:15], -1 196; SI-NEXT: s_mov_b64 s[10:11], -1 197; SI-NEXT: s_mov_b64 s[12:13], -1 198; SI-NEXT: s_mov_b64 vcc, s[0:1] 199; SI-NEXT: s_cbranch_vccz BB3_3 200; SI-NEXT: ; %bb.5: ; %convex.exit 201; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 202; SI-NEXT: s_mov_b64 s[10:11], -1 203; SI-NEXT: s_mov_b64 s[12:13], -1 204; SI-NEXT: s_mov_b64 vcc, s[2:3] 205; SI-NEXT: s_cbranch_vccz BB3_2 206; SI-NEXT: ; %bb.6: ; %if.end 207; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 208; SI-NEXT: s_mov_b64 s[12:13], -1 209; SI-NEXT: s_mov_b64 vcc, s[4:5] 210; SI-NEXT: s_cbranch_vccz BB3_1 211; SI-NEXT: ; %bb.7: ; %if.else 212; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 213; SI-NEXT: s_mov_b64 s[12:13], 0 214; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 215; SI-NEXT: s_waitcnt vmcnt(0) 216; SI-NEXT: s_branch BB3_1 217; SI-NEXT: BB3_8: ; %loop.exit.guard4 218; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 219; SI-NEXT: s_and_b64 vcc, exec, s[10:11] 220; SI-NEXT: s_cbranch_vccz BB3_4 221; SI-NEXT: ; %bb.9: ; %loop.exit.guard 222; SI-NEXT: s_and_b64 vcc, exec, s[14:15] 223; SI-NEXT: s_cbranch_vccz BB3_13 224; SI-NEXT: ; %bb.10: ; %for.cond.preheader 225; SI-NEXT: s_cmpk_lt_i32 s8, 0x3e8 226; SI-NEXT: s_cbranch_scc0 BB3_13 227; SI-NEXT: ; %bb.11: ; %for.body 228; SI-NEXT: s_and_b64 vcc, exec, 0 229; SI-NEXT: BB3_12: ; %self.loop 230; SI-NEXT: ; =>This Inner Loop Header: Depth=1 231; SI-NEXT: s_mov_b64 vcc, vcc 232; SI-NEXT: s_cbranch_vccz BB3_12 233; SI-NEXT: BB3_13: ; %DummyReturnBlock 234; SI-NEXT: s_endpgm 235; 236; FLAT-LABEL: loop_land_info_assert: 237; FLAT: ; %bb.0: ; %entry 238; FLAT-NEXT: s_mov_b32 s7, 0xf000 239; FLAT-NEXT: s_mov_b32 s6, -1 240; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0 241; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 242; FLAT-NEXT: s_load_dword s8, s[0:1], 0x30 243; FLAT-NEXT: s_brev_b32 s9, 44 244; FLAT-NEXT: s_waitcnt lgkmcnt(0) 245; FLAT-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 246; FLAT-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 247; FLAT-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3 248; FLAT-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 249; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 250; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3] 251; FLAT-NEXT: s_waitcnt vmcnt(0) 252; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 253; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5] 254; FLAT-NEXT: v_mov_b32_e32 v0, 3 255; FLAT-NEXT: s_branch BB3_4 256; FLAT-NEXT: BB3_1: ; %Flow6 257; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 258; FLAT-NEXT: s_mov_b64 s[10:11], 0 259; FLAT-NEXT: BB3_2: ; %Flow5 260; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 261; FLAT-NEXT: s_mov_b64 s[14:15], 0 262; FLAT-NEXT: BB3_3: ; %Flow 263; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 264; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] 265; FLAT-NEXT: s_cbranch_vccnz BB3_8 266; FLAT-NEXT: BB3_4: ; %while.cond 267; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 268; FLAT-NEXT: s_mov_b64 s[14:15], -1 269; FLAT-NEXT: s_mov_b64 s[10:11], -1 270; FLAT-NEXT: s_mov_b64 s[12:13], -1 271; FLAT-NEXT: s_mov_b64 vcc, s[0:1] 272; FLAT-NEXT: s_cbranch_vccz BB3_3 273; FLAT-NEXT: ; %bb.5: ; %convex.exit 274; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 275; FLAT-NEXT: s_mov_b64 s[10:11], -1 276; FLAT-NEXT: s_mov_b64 s[12:13], -1 277; FLAT-NEXT: s_mov_b64 vcc, s[2:3] 278; FLAT-NEXT: s_cbranch_vccz BB3_2 279; FLAT-NEXT: ; %bb.6: ; %if.end 280; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 281; FLAT-NEXT: s_mov_b64 s[12:13], -1 282; FLAT-NEXT: s_mov_b64 vcc, s[4:5] 283; FLAT-NEXT: s_cbranch_vccz BB3_1 284; FLAT-NEXT: ; %bb.7: ; %if.else 285; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 286; FLAT-NEXT: s_mov_b64 s[12:13], 0 287; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 288; FLAT-NEXT: s_waitcnt vmcnt(0) 289; FLAT-NEXT: s_branch BB3_1 290; FLAT-NEXT: BB3_8: ; %loop.exit.guard4 291; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 292; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11] 293; FLAT-NEXT: s_cbranch_vccz BB3_4 294; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard 295; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15] 296; FLAT-NEXT: s_cbranch_vccz BB3_13 297; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader 298; FLAT-NEXT: s_cmpk_lt_i32 s8, 0x3e8 299; FLAT-NEXT: s_cbranch_scc0 BB3_13 300; FLAT-NEXT: ; %bb.11: ; %for.body 301; FLAT-NEXT: s_and_b64 vcc, exec, 0 302; FLAT-NEXT: BB3_12: ; %self.loop 303; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 304; FLAT-NEXT: s_mov_b64 vcc, vcc 305; FLAT-NEXT: s_cbranch_vccz BB3_12 306; FLAT-NEXT: BB3_13: ; %DummyReturnBlock 307; FLAT-NEXT: s_endpgm 308entry: 309 %cmp = icmp sgt i32 %c0, 0 310 br label %while.cond.outer 311 312while.cond.outer: 313 %tmp = load float, float addrspace(1)* undef 314 br label %while.cond 315 316while.cond: 317 %cmp1 = icmp slt i32 %c1, 4 318 br i1 %cmp1, label %convex.exit, label %for.cond 319 320convex.exit: 321 %or = or i1 %cmp, %cmp1 322 br i1 %or, label %return, label %if.end 323 324if.end: 325 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 326 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 327 br i1 %cmp2, label %if.else, label %while.cond.outer 328 329if.else: 330 store volatile i32 3, i32 addrspace(1)* undef, align 4 331 br label %while.cond 332 333for.cond: 334 %cmp3 = icmp slt i32 %c3, 1000 335 br i1 %cmp3, label %for.body, label %return 336 337for.body: 338 br i1 %cmp3, label %self.loop, label %if.end.2 339 340if.end.2: 341 %or.cond2 = or i1 %cmp3, %arg 342 br i1 %or.cond2, label %return, label %for.cond 343 344self.loop: 345 br label %self.loop 346 347return: 348 ret void 349} 350 351declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 352 353attributes #0 = { nounwind readnone } 354