1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s 4 5define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { 6; SI-LABEL: break_inserted_outside_of_loop: 7; SI: ; %bb.0: ; %main_body 8; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 9; SI-NEXT: s_load_dword s0, s[0:1], 0xb 10; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 11; SI-NEXT: s_waitcnt lgkmcnt(0) 12; SI-NEXT: v_and_b32_e32 v0, s0, v0 13; SI-NEXT: v_and_b32_e32 v0, 1, v0 14; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 15; SI-NEXT: s_mov_b64 s[0:1], 0 16; SI-NEXT: BB0_1: ; %ENDIF 17; SI-NEXT: ; =>This Inner Loop Header: Depth=1 18; SI-NEXT: s_and_b64 s[2:3], exec, vcc 19; SI-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 20; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 21; SI-NEXT: s_cbranch_execnz BB0_1 22; SI-NEXT: ; %bb.2: ; %ENDLOOP 23; SI-NEXT: s_or_b64 exec, exec, s[0:1] 24; SI-NEXT: s_mov_b32 s7, 0xf000 25; SI-NEXT: s_mov_b32 s6, -1 26; SI-NEXT: v_mov_b32_e32 v0, 0 27; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 28; SI-NEXT: s_endpgm 29; 30; FLAT-LABEL: break_inserted_outside_of_loop: 31; FLAT: ; %bb.0: ; %main_body 32; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 33; FLAT-NEXT: s_load_dword s0, s[0:1], 0x2c 34; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 35; FLAT-NEXT: s_waitcnt lgkmcnt(0) 36; FLAT-NEXT: v_and_b32_e32 v0, s0, v0 37; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 38; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 39; FLAT-NEXT: s_mov_b64 s[0:1], 0 40; FLAT-NEXT: BB0_1: ; %ENDIF 41; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 42; FLAT-NEXT: s_and_b64 s[2:3], exec, vcc 43; FLAT-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 44; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1] 45; FLAT-NEXT: s_cbranch_execnz BB0_1 46; FLAT-NEXT: ; %bb.2: ; %ENDLOOP 47; FLAT-NEXT: s_or_b64 exec, exec, s[0:1] 48; FLAT-NEXT: s_mov_b32 s7, 0xf000 49; FLAT-NEXT: s_mov_b32 s6, -1 50; FLAT-NEXT: v_mov_b32_e32 v0, 0 51; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 52; FLAT-NEXT: s_endpgm 53main_body: 54 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 55 %0 = and i32 %a, %tid 56 %1 = trunc i32 %0 to i1 57 br label %ENDIF 58 59ENDLOOP: 60 store i32 0, i32 addrspace(1)* %out 61 ret void 62 63ENDIF: 64 br i1 %1, label %ENDLOOP, label %ENDIF 65} 66 67define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 68; SI-LABEL: phi_cond_outside_loop: 69; SI: ; %bb.0: ; %entry 70; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 71; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 72; SI-NEXT: s_mov_b64 s[2:3], 0 73; SI-NEXT: s_mov_b64 s[4:5], 0 74; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc 75; SI-NEXT: s_cbranch_execz BB1_2 76; SI-NEXT: ; %bb.1: ; %else 77; SI-NEXT: s_load_dword s0, s[0:1], 0x9 78; SI-NEXT: s_waitcnt lgkmcnt(0) 79; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 80; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec 81; SI-NEXT: BB1_2: ; %endif 82; SI-NEXT: s_or_b64 exec, exec, s[6:7] 83; SI-NEXT: BB1_3: ; %loop 84; SI-NEXT: ; =>This Inner Loop Header: Depth=1 85; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 86; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 87; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] 88; SI-NEXT: s_cbranch_execnz BB1_3 89; SI-NEXT: ; %bb.4: ; %exit 90; SI-NEXT: s_endpgm 91; 92; FLAT-LABEL: phi_cond_outside_loop: 93; FLAT: ; %bb.0: ; %entry 94; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 95; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 96; FLAT-NEXT: s_mov_b64 s[2:3], 0 97; FLAT-NEXT: s_mov_b64 s[4:5], 0 98; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc 99; FLAT-NEXT: s_cbranch_execz BB1_2 100; FLAT-NEXT: ; %bb.1: ; %else 101; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24 102; FLAT-NEXT: s_waitcnt lgkmcnt(0) 103; FLAT-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 104; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec 105; FLAT-NEXT: BB1_2: ; %endif 106; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] 107; FLAT-NEXT: BB1_3: ; %loop 108; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 109; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 110; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 111; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] 112; FLAT-NEXT: s_cbranch_execnz BB1_3 113; FLAT-NEXT: ; %bb.4: ; %exit 114; FLAT-NEXT: s_endpgm 115entry: 116 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 117 %0 = icmp eq i32 %tid , 0 118 br i1 %0, label %if, label %else 119 120if: 121 br label %endif 122 123else: 124 %1 = icmp eq i32 %b, 0 125 br label %endif 126 127endif: 128 %2 = phi i1 [0, %if], [%1, %else] 129 br label %loop 130 131loop: 132 br i1 %2, label %exit, label %loop 133 134exit: 135 ret void 136} 137 138define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { 139; SI-LABEL: switch_unreachable: 140; SI: ; %bb.0: ; %centry 141; 142; FLAT-LABEL: switch_unreachable: 143; FLAT: ; %bb.0: ; %centry 144centry: 145 switch i32 %x, label %sw.default [ 146 i32 0, label %sw.bb 147 i32 60, label %sw.bb 148 ] 149 150sw.bb: 151 unreachable 152 153sw.default: 154 unreachable 155 156sw.epilog: 157 ret void 158} 159 160declare float @llvm.fabs.f32(float) nounwind readnone 161 162define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 163; SI-LABEL: loop_land_info_assert: 164; SI: ; %bb.0: ; %entry 165; SI-NEXT: s_mov_b32 s7, 0xf000 166; SI-NEXT: s_mov_b32 s6, -1 167; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 168; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 169; SI-NEXT: s_load_dword s8, s[0:1], 0xc 170; SI-NEXT: s_brev_b32 s9, 44 171; SI-NEXT: s_waitcnt lgkmcnt(0) 172; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 173; SI-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 174; SI-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3 175; SI-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 176; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 177; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3] 178; SI-NEXT: s_waitcnt vmcnt(0) 179; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 180; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5] 181; SI-NEXT: v_mov_b32_e32 v0, 3 182; SI-NEXT: s_branch BB3_4 183; SI-NEXT: BB3_1: ; %Flow6 184; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 185; SI-NEXT: s_mov_b64 s[10:11], 0 186; SI-NEXT: BB3_2: ; %Flow5 187; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 188; SI-NEXT: s_mov_b64 s[14:15], 0 189; SI-NEXT: BB3_3: ; %Flow 190; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 191; SI-NEXT: s_and_b64 vcc, exec, s[12:13] 192; SI-NEXT: s_cbranch_vccnz BB3_8 193; SI-NEXT: BB3_4: ; %while.cond 194; SI-NEXT: ; =>This Inner Loop Header: Depth=1 195; SI-NEXT: s_mov_b64 s[14:15], -1 196; SI-NEXT: s_mov_b64 s[10:11], -1 197; SI-NEXT: s_mov_b64 s[12:13], -1 198; SI-NEXT: s_mov_b64 vcc, s[0:1] 199; SI-NEXT: s_cbranch_vccz BB3_3 200; SI-NEXT: ; %bb.5: ; %convex.exit 201; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 202; SI-NEXT: s_mov_b64 s[10:11], -1 203; SI-NEXT: s_mov_b64 s[12:13], -1 204; SI-NEXT: s_mov_b64 vcc, s[2:3] 205; SI-NEXT: s_cbranch_vccz BB3_2 206; SI-NEXT: ; %bb.6: ; %if.end 207; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 208; SI-NEXT: s_mov_b64 s[12:13], -1 209; SI-NEXT: s_mov_b64 vcc, s[4:5] 210; SI-NEXT: s_cbranch_vccz BB3_1 211; SI-NEXT: ; %bb.7: ; %if.else 212; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 213; SI-NEXT: s_mov_b64 s[12:13], 0 214; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 215; SI-NEXT: s_branch BB3_1 216; SI-NEXT: BB3_8: ; %loop.exit.guard4 217; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 218; SI-NEXT: s_and_b64 vcc, exec, s[10:11] 219; SI-NEXT: s_cbranch_vccz BB3_4 220; SI-NEXT: ; %bb.9: ; %loop.exit.guard 221; SI-NEXT: s_and_b64 vcc, exec, s[14:15] 222; SI-NEXT: s_cbranch_vccz BB3_13 223; SI-NEXT: ; %bb.10: ; %for.cond.preheader 224; SI-NEXT: s_cmpk_lt_i32 s8, 0x3e8 225; SI-NEXT: s_cbranch_scc0 BB3_13 226; SI-NEXT: ; %bb.11: ; %for.body 227; SI-NEXT: s_and_b64 vcc, exec, 0 228; SI-NEXT: BB3_12: ; %self.loop 229; SI-NEXT: ; =>This Inner Loop Header: Depth=1 230; SI-NEXT: s_cbranch_vccz BB3_12 231; SI-NEXT: BB3_13: ; %DummyReturnBlock 232; SI-NEXT: s_endpgm 233; 234; FLAT-LABEL: loop_land_info_assert: 235; FLAT: ; %bb.0: ; %entry 236; FLAT-NEXT: s_mov_b32 s7, 0xf000 237; FLAT-NEXT: s_mov_b32 s6, -1 238; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0 239; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 240; FLAT-NEXT: s_load_dword s8, s[0:1], 0x30 241; FLAT-NEXT: s_brev_b32 s9, 44 242; FLAT-NEXT: s_waitcnt lgkmcnt(0) 243; FLAT-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 244; FLAT-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 245; FLAT-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3 246; FLAT-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 247; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 248; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3] 249; FLAT-NEXT: s_waitcnt vmcnt(0) 250; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 251; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5] 252; FLAT-NEXT: v_mov_b32_e32 v0, 3 253; FLAT-NEXT: s_branch BB3_4 254; FLAT-NEXT: BB3_1: ; %Flow6 255; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 256; FLAT-NEXT: s_mov_b64 s[10:11], 0 257; FLAT-NEXT: BB3_2: ; %Flow5 258; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 259; FLAT-NEXT: s_mov_b64 s[14:15], 0 260; FLAT-NEXT: BB3_3: ; %Flow 261; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 262; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] 263; FLAT-NEXT: s_cbranch_vccnz BB3_8 264; FLAT-NEXT: BB3_4: ; %while.cond 265; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 266; FLAT-NEXT: s_mov_b64 s[14:15], -1 267; FLAT-NEXT: s_mov_b64 s[10:11], -1 268; FLAT-NEXT: s_mov_b64 s[12:13], -1 269; FLAT-NEXT: s_mov_b64 vcc, s[0:1] 270; FLAT-NEXT: s_cbranch_vccz BB3_3 271; FLAT-NEXT: ; %bb.5: ; %convex.exit 272; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 273; FLAT-NEXT: s_mov_b64 s[10:11], -1 274; FLAT-NEXT: s_mov_b64 s[12:13], -1 275; FLAT-NEXT: s_mov_b64 vcc, s[2:3] 276; FLAT-NEXT: s_cbranch_vccz BB3_2 277; FLAT-NEXT: ; %bb.6: ; %if.end 278; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 279; FLAT-NEXT: s_mov_b64 s[12:13], -1 280; FLAT-NEXT: s_mov_b64 vcc, s[4:5] 281; FLAT-NEXT: s_cbranch_vccz BB3_1 282; FLAT-NEXT: ; %bb.7: ; %if.else 283; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 284; FLAT-NEXT: s_mov_b64 s[12:13], 0 285; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 286; FLAT-NEXT: s_branch BB3_1 287; FLAT-NEXT: BB3_8: ; %loop.exit.guard4 288; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 289; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11] 290; FLAT-NEXT: s_cbranch_vccz BB3_4 291; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard 292; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15] 293; FLAT-NEXT: s_cbranch_vccz BB3_13 294; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader 295; FLAT-NEXT: s_cmpk_lt_i32 s8, 0x3e8 296; FLAT-NEXT: s_cbranch_scc0 BB3_13 297; FLAT-NEXT: ; %bb.11: ; %for.body 298; FLAT-NEXT: s_and_b64 vcc, exec, 0 299; FLAT-NEXT: BB3_12: ; %self.loop 300; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 301; FLAT-NEXT: s_cbranch_vccz BB3_12 302; FLAT-NEXT: BB3_13: ; %DummyReturnBlock 303; FLAT-NEXT: s_endpgm 304entry: 305 %cmp = icmp sgt i32 %c0, 0 306 br label %while.cond.outer 307 308while.cond.outer: 309 %tmp = load float, float addrspace(1)* undef 310 br label %while.cond 311 312while.cond: 313 %cmp1 = icmp slt i32 %c1, 4 314 br i1 %cmp1, label %convex.exit, label %for.cond 315 316convex.exit: 317 %or = or i1 %cmp, %cmp1 318 br i1 %or, label %return, label %if.end 319 320if.end: 321 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 322 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 323 br i1 %cmp2, label %if.else, label %while.cond.outer 324 325if.else: 326 store volatile i32 3, i32 addrspace(1)* undef, align 4 327 br label %while.cond 328 329for.cond: 330 %cmp3 = icmp slt i32 %c3, 1000 331 br i1 %cmp3, label %for.body, label %return 332 333for.body: 334 br i1 %cmp3, label %self.loop, label %if.end.2 335 336if.end.2: 337 %or.cond2 = or i1 %cmp3, %arg 338 br i1 %or.cond2, label %return, label %for.cond 339 340self.loop: 341 br label %self.loop 342 343return: 344 ret void 345} 346 347declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 348 349attributes #0 = { nounwind readnone } 350