1; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
3
4; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:
5
6; SI: [[LOOP_LABEL:[A-Z0-9]+]]:
7; Lowered break instructin:
8; SI: s_or_b64
9; Lowered Loop instruction:
10; SI: s_andn2_b64
11; s_cbranch_execnz [[LOOP_LABEL]]
12; SI: s_endpgm
13define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
14main_body:
15  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
16  %0 = and i32 %a, %tid
17  %1 = trunc i32 %0 to i1
18  br label %ENDIF
19
20ENDLOOP:
21  store i32 0, i32 addrspace(1)* %out
22  ret void
23
24ENDIF:
25  br i1 %1, label %ENDLOOP, label %ENDIF
26}
27
28
29; FUNC-LABEL: {{^}}phi_cond_outside_loop:
30
31; SI:     s_mov_b64         [[LEFT:s\[[0-9]+:[0-9]+\]]], 0
32; SI:     s_mov_b64         [[PHI:s\[[0-9]+:[0-9]+\]]], 0
33
34; SI: ; %else
35; SI:     v_cmp_eq_u32_e64  [[TMP:s\[[0-9]+:[0-9]+\]]],
36; SI:     s_and_b64         [[PHI]], [[TMP]], exec
37
38; SI: ; %endif
39
40; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop
41; SI:     s_mov_b64         [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
42; SI:     s_and_b64         [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]]
43; SI:     s_or_b64          [[LEFT]], [[TMP1]], [[TMP]]
44; SI:     s_andn2_b64       exec, exec, [[LEFT]]
45; SI:     s_cbranch_execnz  [[LOOP_LABEL]]
46; SI:     s_endpgm
47
48define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
49entry:
50  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
51  %0 = icmp eq i32 %tid , 0
52  br i1 %0, label %if, label %else
53
54if:
55  br label %endif
56
57else:
58  %1 = icmp eq i32 %b, 0
59  br label %endif
60
61endif:
62  %2 = phi i1 [0, %if], [%1, %else]
63  br label %loop
64
65loop:
66  br i1 %2, label %exit, label %loop
67
68exit:
69  ret void
70}
71
72; FIXME: should emit s_endpgm
73; CHECK-LABEL: {{^}}switch_unreachable:
74; CHECK-NOT: s_endpgm
75; CHECK: .Lfunc_end2
76define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
77centry:
78  switch i32 %x, label %sw.default [
79    i32 0, label %sw.bb
80    i32 60, label %sw.bb
81  ]
82
83sw.bb:
84  unreachable
85
86sw.default:
87  unreachable
88
89sw.epilog:
90  ret void
91}
92
93declare float @llvm.fabs.f32(float) nounwind readnone
94
95; This broke the old AMDIL cfg structurizer
96; FUNC-LABEL: {{^}}loop_land_info_assert:
97; SI:      v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
98; SI:      s_and_b64        [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]]
99; SI:      s_mov_b64        vcc, [[CMP4M]]
100; SI-NEXT: s_cbranch_vccnz  [[CONVEX_EXIT:BB[0-9_]+]]
101; SI-NEXT: s_branch         [[FOR_COND_PREHDR:BB[0-9_]+]]
102
103; SI: ; %if.else
104; SI:      buffer_store_dword
105
106; SI:      [[INFLOOP:BB[0-9]+_[0-9]+]]:
107
108; SI:      [[CONVEX_EXIT]]:
109; SI:      s_mov_b64        vcc,
110; SI-NEXT: s_cbranch_vccnz  [[ENDPGM:BB[0-9]+_[0-9]+]]
111; SI:      s_branch [[INFLOOP]]
112; SI-NEXT: [[FOR_COND_PREHDR]]:
113; SI:      s_cbranch_vccz [[ENDPGM]]
114
115; SI:      [[ENDPGM]]:
116; SI-NEXT: s_endpgm
117define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
118entry:
119  %cmp = icmp sgt i32 %c0, 0
120  br label %while.cond.outer
121
122while.cond.outer:
123  %tmp = load float, float addrspace(1)* undef
124  br label %while.cond
125
126while.cond:
127  %cmp1 = icmp slt i32 %c1, 4
128  br i1 %cmp1, label %convex.exit, label %for.cond
129
130convex.exit:
131  %or = or i1 %cmp, %cmp1
132  br i1 %or, label %return, label %if.end
133
134if.end:
135  %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
136  %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
137  br i1 %cmp2, label %if.else, label %while.cond.outer
138
139if.else:
140  store volatile i32 3, i32 addrspace(1)* undef, align 4
141  br label %while.cond
142
143for.cond:
144  %cmp3 = icmp slt i32 %c3, 1000
145  br i1 %cmp3, label %for.body, label %return
146
147for.body:
148  br i1 %cmp3, label %self.loop, label %if.end.2
149
150if.end.2:
151  %or.cond2 = or i1 %cmp3, %arg
152  br i1 %or.cond2, label %return, label %for.cond
153
154self.loop:
155 br label %self.loop
156
157return:
158  ret void
159}
160
161declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
162
163attributes #0 = { nounwind readnone }
164