1; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 2; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 3 4; FUNC-LABEL: {{^}}break_inserted_outside_of_loop: 5 6; SI: [[LOOP_LABEL:[A-Z0-9]+]]: 7; Lowered break instructin: 8; SI: s_or_b64 9; Lowered Loop instruction: 10; SI: s_andn2_b64 11; s_cbranch_execnz [[LOOP_LABEL]] 12; SI: s_endpgm 13define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { 14main_body: 15 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 16 %0 = and i32 %a, %tid 17 %1 = trunc i32 %0 to i1 18 br label %ENDIF 19 20ENDLOOP: 21 store i32 0, i32 addrspace(1)* %out 22 ret void 23 24ENDIF: 25 br i1 %1, label %ENDLOOP, label %ENDIF 26} 27 28 29; FUNC-LABEL: {{^}}phi_cond_outside_loop: 30 31; SI: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0 32; SI: s_mov_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], 0 33 34; SI: ; %else 35; SI: v_cmp_eq_u32_e64 [[TMP:s\[[0-9]+:[0-9]+\]]], 36; SI: s_and_b64 [[PHI]], [[TMP]], exec 37 38; SI: ; %endif 39 40; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop 41; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]] 42; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]] 43; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]] 44; SI: s_andn2_b64 exec, exec, [[LEFT]] 45; SI: s_cbranch_execnz [[LOOP_LABEL]] 46; SI: s_endpgm 47 48define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 49entry: 50 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 51 %0 = icmp eq i32 %tid , 0 52 br i1 %0, label %if, label %else 53 54if: 55 br label %endif 56 57else: 58 %1 = icmp eq i32 %b, 0 59 br label %endif 60 61endif: 62 %2 = phi i1 [0, %if], [%1, %else] 63 br label %loop 64 65loop: 66 br i1 %2, label %exit, label %loop 67 68exit: 69 ret void 70} 71 72; FIXME: should emit s_endpgm 73; CHECK-LABEL: {{^}}switch_unreachable: 74; CHECK-NOT: s_endpgm 75; CHECK: .Lfunc_end2 76define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { 77centry: 78 switch i32 %x, label %sw.default [ 79 i32 0, label %sw.bb 80 i32 60, label %sw.bb 81 ] 82 83sw.bb: 84 unreachable 85 86sw.default: 87 unreachable 88 89sw.epilog: 90 ret void 91} 92 93declare float @llvm.fabs.f32(float) nounwind readnone 94 95; This broke the old AMDIL cfg structurizer 96; FUNC-LABEL: {{^}}loop_land_info_assert: 97; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} 98; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] 99; SI: s_mov_b64 vcc, [[CMP4M]] 100; SI-NEXT: s_cbranch_vccnz [[CONVEX_EXIT:BB[0-9_]+]] 101; SI-NEXT: s_branch [[FOR_COND_PREHDR:BB[0-9_]+]] 102 103; SI: ; %if.else 104; SI: buffer_store_dword 105 106; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]: 107 108; SI: [[CONVEX_EXIT]]: 109; SI: s_mov_b64 vcc, 110; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] 111; SI: s_branch [[INFLOOP]] 112; SI-NEXT: [[FOR_COND_PREHDR]]: 113; SI: s_cbranch_vccz [[ENDPGM]] 114 115; SI: [[ENDPGM]]: 116; SI-NEXT: s_endpgm 117define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 118entry: 119 %cmp = icmp sgt i32 %c0, 0 120 br label %while.cond.outer 121 122while.cond.outer: 123 %tmp = load float, float addrspace(1)* undef 124 br label %while.cond 125 126while.cond: 127 %cmp1 = icmp slt i32 %c1, 4 128 br i1 %cmp1, label %convex.exit, label %for.cond 129 130convex.exit: 131 %or = or i1 %cmp, %cmp1 132 br i1 %or, label %return, label %if.end 133 134if.end: 135 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 136 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 137 br i1 %cmp2, label %if.else, label %while.cond.outer 138 139if.else: 140 store volatile i32 3, i32 addrspace(1)* undef, align 4 141 br label %while.cond 142 143for.cond: 144 %cmp3 = icmp slt i32 %c3, 1000 145 br i1 %cmp3, label %for.body, label %return 146 147for.body: 148 br i1 %cmp3, label %self.loop, label %if.end.2 149 150if.end.2: 151 %or.cond2 = or i1 %cmp3, %arg 152 br i1 %or.cond2, label %return, label %for.cond 153 154self.loop: 155 br label %self.loop 156 157return: 158 ret void 159} 160 161declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 162 163attributes #0 = { nounwind readnone } 164