1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}fcmp_f16_lt
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
8; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
9; SI:  v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
10; VI:  v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
11; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
12; GCN: buffer_store_dword v[[R_I32]]
13; GCN: s_endpgm
14define void @fcmp_f16_lt(
15    i32 addrspace(1)* %r,
16    half addrspace(1)* %a,
17    half addrspace(1)* %b) {
18entry:
19  %a.val = load half, half addrspace(1)* %a
20  %b.val = load half, half addrspace(1)* %b
21  %r.val = fcmp olt half %a.val, %b.val
22  %r.val.sext = sext i1 %r.val to i32
23  store i32 %r.val.sext, i32 addrspace(1)* %r
24  ret void
25}
26
27; GCN-LABEL: {{^}}fcmp_f16_lt_abs:
28; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
29; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
30
31; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
32; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
33
34; SI:  v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]|
35; VI:  v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]|
36
37; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
38; GCN: buffer_store_dword v[[R_I32]]
39; GCN: s_endpgm
40define void @fcmp_f16_lt_abs(
41    i32 addrspace(1)* %r,
42    half addrspace(1)* %a,
43    half addrspace(1)* %b) {
44entry:
45  %a.val = load half, half addrspace(1)* %a
46  %b.val = load half, half addrspace(1)* %b
47  %a.abs = call half @llvm.fabs.f16(half %a.val)
48  %b.abs = call half @llvm.fabs.f16(half %b.val)
49  %r.val = fcmp olt half %a.abs, %b.abs
50  %r.val.sext = sext i1 %r.val to i32
51  store i32 %r.val.sext, i32 addrspace(1)* %r
52  ret void
53}
54
55; GCN-LABEL: {{^}}fcmp_f16_eq
56; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
57; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
58; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
59; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
60; SI:  v_cmp_eq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
61; VI:  v_cmp_eq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
62; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
63; GCN: buffer_store_dword v[[R_I32]]
64; GCN: s_endpgm
65define void @fcmp_f16_eq(
66    i32 addrspace(1)* %r,
67    half addrspace(1)* %a,
68    half addrspace(1)* %b) {
69entry:
70  %a.val = load half, half addrspace(1)* %a
71  %b.val = load half, half addrspace(1)* %b
72  %r.val = fcmp oeq half %a.val, %b.val
73  %r.val.sext = sext i1 %r.val to i32
74  store i32 %r.val.sext, i32 addrspace(1)* %r
75  ret void
76}
77
78; GCN-LABEL: {{^}}fcmp_f16_le
79; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
80; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
81; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
82; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
83; SI:  v_cmp_le_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
84; VI:  v_cmp_le_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
85; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
86; GCN: buffer_store_dword v[[R_I32]]
87; GCN: s_endpgm
88define void @fcmp_f16_le(
89    i32 addrspace(1)* %r,
90    half addrspace(1)* %a,
91    half addrspace(1)* %b) {
92entry:
93  %a.val = load half, half addrspace(1)* %a
94  %b.val = load half, half addrspace(1)* %b
95  %r.val = fcmp ole half %a.val, %b.val
96  %r.val.sext = sext i1 %r.val to i32
97  store i32 %r.val.sext, i32 addrspace(1)* %r
98  ret void
99}
100
101; GCN-LABEL: {{^}}fcmp_f16_gt
102; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
103; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
104; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
105; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
106; SI:  v_cmp_gt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
107; VI:  v_cmp_gt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
108; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
109; GCN: buffer_store_dword v[[R_I32]]
110; GCN: s_endpgm
111define void @fcmp_f16_gt(
112    i32 addrspace(1)* %r,
113    half addrspace(1)* %a,
114    half addrspace(1)* %b) {
115entry:
116  %a.val = load half, half addrspace(1)* %a
117  %b.val = load half, half addrspace(1)* %b
118  %r.val = fcmp ogt half %a.val, %b.val
119  %r.val.sext = sext i1 %r.val to i32
120  store i32 %r.val.sext, i32 addrspace(1)* %r
121  ret void
122}
123
124; GCN-LABEL: {{^}}fcmp_f16_lg
125; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
126; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
127; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
128; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
129; SI:  v_cmp_lg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
130; VI:  v_cmp_lg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
131; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
132; GCN: buffer_store_dword v[[R_I32]]
133; GCN: s_endpgm
134define void @fcmp_f16_lg(
135    i32 addrspace(1)* %r,
136    half addrspace(1)* %a,
137    half addrspace(1)* %b) {
138entry:
139  %a.val = load half, half addrspace(1)* %a
140  %b.val = load half, half addrspace(1)* %b
141  %r.val = fcmp one half %a.val, %b.val
142  %r.val.sext = sext i1 %r.val to i32
143  store i32 %r.val.sext, i32 addrspace(1)* %r
144  ret void
145}
146
147; GCN-LABEL: {{^}}fcmp_f16_ge
148; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
149; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
150; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
151; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
152; SI:  v_cmp_ge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
153; VI:  v_cmp_ge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
154; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
155; GCN: buffer_store_dword v[[R_I32]]
156; GCN: s_endpgm
157define void @fcmp_f16_ge(
158    i32 addrspace(1)* %r,
159    half addrspace(1)* %a,
160    half addrspace(1)* %b) {
161entry:
162  %a.val = load half, half addrspace(1)* %a
163  %b.val = load half, half addrspace(1)* %b
164  %r.val = fcmp oge half %a.val, %b.val
165  %r.val.sext = sext i1 %r.val to i32
166  store i32 %r.val.sext, i32 addrspace(1)* %r
167  ret void
168}
169
170; GCN-LABEL: {{^}}fcmp_f16_o
171; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
172; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
173; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
174; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
175; SI:  v_cmp_o_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
176; VI:  v_cmp_o_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
177; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
178; GCN: buffer_store_dword v[[R_I32]]
179; GCN: s_endpgm
180define void @fcmp_f16_o(
181    i32 addrspace(1)* %r,
182    half addrspace(1)* %a,
183    half addrspace(1)* %b) {
184entry:
185  %a.val = load half, half addrspace(1)* %a
186  %b.val = load half, half addrspace(1)* %b
187  %r.val = fcmp ord half %a.val, %b.val
188  %r.val.sext = sext i1 %r.val to i32
189  store i32 %r.val.sext, i32 addrspace(1)* %r
190  ret void
191}
192
193; GCN-LABEL: {{^}}fcmp_f16_u
194; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
195; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
196; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
197; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
198; SI:  v_cmp_u_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
199; VI:  v_cmp_u_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
200; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
201; GCN: buffer_store_dword v[[R_I32]]
202; GCN: s_endpgm
203define void @fcmp_f16_u(
204    i32 addrspace(1)* %r,
205    half addrspace(1)* %a,
206    half addrspace(1)* %b) {
207entry:
208  %a.val = load half, half addrspace(1)* %a
209  %b.val = load half, half addrspace(1)* %b
210  %r.val = fcmp uno half %a.val, %b.val
211  %r.val.sext = sext i1 %r.val to i32
212  store i32 %r.val.sext, i32 addrspace(1)* %r
213  ret void
214}
215
216; GCN-LABEL: {{^}}fcmp_f16_nge
217; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
218; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
219; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
220; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
221; SI:  v_cmp_nge_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
222; VI:  v_cmp_nge_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
223; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
224; GCN: buffer_store_dword v[[R_I32]]
225; GCN: s_endpgm
226define void @fcmp_f16_nge(
227    i32 addrspace(1)* %r,
228    half addrspace(1)* %a,
229    half addrspace(1)* %b) {
230entry:
231  %a.val = load half, half addrspace(1)* %a
232  %b.val = load half, half addrspace(1)* %b
233  %r.val = fcmp ult half %a.val, %b.val
234  %r.val.sext = sext i1 %r.val to i32
235  store i32 %r.val.sext, i32 addrspace(1)* %r
236  ret void
237}
238
239; GCN-LABEL: {{^}}fcmp_f16_nlg
240; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
241; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
242; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
243; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
244; SI:  v_cmp_nlg_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
245; VI:  v_cmp_nlg_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
246; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
247; GCN: buffer_store_dword v[[R_I32]]
248; GCN: s_endpgm
249define void @fcmp_f16_nlg(
250    i32 addrspace(1)* %r,
251    half addrspace(1)* %a,
252    half addrspace(1)* %b) {
253entry:
254  %a.val = load half, half addrspace(1)* %a
255  %b.val = load half, half addrspace(1)* %b
256  %r.val = fcmp ueq half %a.val, %b.val
257  %r.val.sext = sext i1 %r.val to i32
258  store i32 %r.val.sext, i32 addrspace(1)* %r
259  ret void
260}
261
262; GCN-LABEL: {{^}}fcmp_f16_ngt
263; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
264; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
265; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
266; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
267; SI:  v_cmp_ngt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
268; VI:  v_cmp_ngt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
269; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
270; GCN: buffer_store_dword v[[R_I32]]
271; GCN: s_endpgm
272define void @fcmp_f16_ngt(
273    i32 addrspace(1)* %r,
274    half addrspace(1)* %a,
275    half addrspace(1)* %b) {
276entry:
277  %a.val = load half, half addrspace(1)* %a
278  %b.val = load half, half addrspace(1)* %b
279  %r.val = fcmp ule half %a.val, %b.val
280  %r.val.sext = sext i1 %r.val to i32
281  store i32 %r.val.sext, i32 addrspace(1)* %r
282  ret void
283}
284
285; GCN-LABEL: {{^}}fcmp_f16_nle
286; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
287; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
288; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
289; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
290; SI:  v_cmp_nle_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
291; VI:  v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
292; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
293; GCN: buffer_store_dword v[[R_I32]]
294; GCN: s_endpgm
295define void @fcmp_f16_nle(
296    i32 addrspace(1)* %r,
297    half addrspace(1)* %a,
298    half addrspace(1)* %b) {
299entry:
300  %a.val = load half, half addrspace(1)* %a
301  %b.val = load half, half addrspace(1)* %b
302  %r.val = fcmp ugt half %a.val, %b.val
303  %r.val.sext = sext i1 %r.val to i32
304  store i32 %r.val.sext, i32 addrspace(1)* %r
305  ret void
306}
307
308; GCN-LABEL: {{^}}fcmp_f16_neq
309; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
310; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
311; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
312; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
313; SI:  v_cmp_neq_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
314; VI:  v_cmp_neq_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
315; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
316; GCN: buffer_store_dword v[[R_I32]]
317; GCN: s_endpgm
318define void @fcmp_f16_neq(
319    i32 addrspace(1)* %r,
320    half addrspace(1)* %a,
321    half addrspace(1)* %b) {
322entry:
323  %a.val = load half, half addrspace(1)* %a
324  %b.val = load half, half addrspace(1)* %b
325  %r.val = fcmp une half %a.val, %b.val
326  %r.val.sext = sext i1 %r.val to i32
327  store i32 %r.val.sext, i32 addrspace(1)* %r
328  ret void
329}
330
331; GCN-LABEL: {{^}}fcmp_f16_nlt
332; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
333; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
334; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
335; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
336; SI:  v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
337; VI:  v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
338; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]
339; GCN: buffer_store_dword v[[R_I32]]
340; GCN: s_endpgm
341define void @fcmp_f16_nlt(
342    i32 addrspace(1)* %r,
343    half addrspace(1)* %a,
344    half addrspace(1)* %b) {
345entry:
346  %a.val = load half, half addrspace(1)* %a
347  %b.val = load half, half addrspace(1)* %b
348  %r.val = fcmp uge half %a.val, %b.val
349  %r.val.sext = sext i1 %r.val to i32
350  store i32 %r.val.sext, i32 addrspace(1)* %r
351  ret void
352}
353
354; GCN-LABEL: {{^}}fcmp_v2f16_lt
355; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
356; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
357; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
358; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
359; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
360; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
361; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
362; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
363; SI:  v_cmp_lt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
364; SI:  v_cmp_lt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
365; VI:  v_cmp_lt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
366; VI:  v_cmp_lt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
367; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
368; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
369; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
370; GCN: s_endpgm
371define void @fcmp_v2f16_lt(
372    <2 x i32> addrspace(1)* %r,
373    <2 x half> addrspace(1)* %a,
374    <2 x half> addrspace(1)* %b) {
375entry:
376  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
377  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
378  %r.val = fcmp olt <2 x half> %a.val, %b.val
379  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
380  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
381  ret void
382}
383
384; GCN-LABEL: {{^}}fcmp_v2f16_eq
385; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
386; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
387; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
388; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
389; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
390; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
391; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
392; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
393; SI:  v_cmp_eq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
394; SI:  v_cmp_eq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
395; VI:  v_cmp_eq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
396; VI:  v_cmp_eq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
397; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
398; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
399; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
400; GCN: s_endpgm
401define void @fcmp_v2f16_eq(
402    <2 x i32> addrspace(1)* %r,
403    <2 x half> addrspace(1)* %a,
404    <2 x half> addrspace(1)* %b) {
405entry:
406  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
407  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
408  %r.val = fcmp oeq <2 x half> %a.val, %b.val
409  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
410  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
411  ret void
412}
413
414; GCN-LABEL: {{^}}fcmp_v2f16_le
415; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
416; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
417; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
418; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
419; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
420; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
421; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
422; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
423; SI:  v_cmp_le_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
424; SI:  v_cmp_le_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
425; VI:  v_cmp_le_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
426; VI:  v_cmp_le_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
427; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
428; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
429; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
430; GCN: s_endpgm
431define void @fcmp_v2f16_le(
432    <2 x i32> addrspace(1)* %r,
433    <2 x half> addrspace(1)* %a,
434    <2 x half> addrspace(1)* %b) {
435entry:
436  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
437  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
438  %r.val = fcmp ole <2 x half> %a.val, %b.val
439  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
440  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
441  ret void
442}
443
444; GCN-LABEL: {{^}}fcmp_v2f16_gt
445; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
446; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
447; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
448; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
449; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
450; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
451; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
452; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
453; SI:  v_cmp_gt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
454; SI:  v_cmp_gt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
455; VI:  v_cmp_gt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
456; VI:  v_cmp_gt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
457; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
458; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
459; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
460; GCN: s_endpgm
461define void @fcmp_v2f16_gt(
462    <2 x i32> addrspace(1)* %r,
463    <2 x half> addrspace(1)* %a,
464    <2 x half> addrspace(1)* %b) {
465entry:
466  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
467  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
468  %r.val = fcmp ogt <2 x half> %a.val, %b.val
469  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
470  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
471  ret void
472}
473
474; GCN-LABEL: {{^}}fcmp_v2f16_lg
475; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
476; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
477; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
478; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
479; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
480; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
481; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
482; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
483; SI:  v_cmp_lg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
484; SI:  v_cmp_lg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
485; VI:  v_cmp_lg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
486; VI:  v_cmp_lg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
487; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
488; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
489; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
490; GCN: s_endpgm
491define void @fcmp_v2f16_lg(
492    <2 x i32> addrspace(1)* %r,
493    <2 x half> addrspace(1)* %a,
494    <2 x half> addrspace(1)* %b) {
495entry:
496  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
497  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
498  %r.val = fcmp one <2 x half> %a.val, %b.val
499  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
500  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
501  ret void
502}
503
504; GCN-LABEL: {{^}}fcmp_v2f16_ge
505; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
506; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
507; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
508; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
509; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
510; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
511; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
512; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
513; SI:  v_cmp_ge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
514; SI:  v_cmp_ge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
515; VI:  v_cmp_ge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
516; VI:  v_cmp_ge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
517; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
518; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
519; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
520; GCN: s_endpgm
521define void @fcmp_v2f16_ge(
522    <2 x i32> addrspace(1)* %r,
523    <2 x half> addrspace(1)* %a,
524    <2 x half> addrspace(1)* %b) {
525entry:
526  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
527  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
528  %r.val = fcmp oge <2 x half> %a.val, %b.val
529  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
530  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
531  ret void
532}
533
534; GCN-LABEL: {{^}}fcmp_v2f16_o
535; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
536; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
537; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
538; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
539; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
540; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
541; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
542; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
543; SI:  v_cmp_o_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
544; SI:  v_cmp_o_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
545; VI:  v_cmp_o_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
546; VI:  v_cmp_o_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
547; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
548; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
549; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
550; GCN: s_endpgm
551define void @fcmp_v2f16_o(
552    <2 x i32> addrspace(1)* %r,
553    <2 x half> addrspace(1)* %a,
554    <2 x half> addrspace(1)* %b) {
555entry:
556  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
557  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
558  %r.val = fcmp ord <2 x half> %a.val, %b.val
559  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
560  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
561  ret void
562}
563
564; GCN-LABEL: {{^}}fcmp_v2f16_u
565; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
566; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
567; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
568; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
569; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
570; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
571; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
572; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
573; SI:  v_cmp_u_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
574; SI:  v_cmp_u_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
575; VI:  v_cmp_u_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
576; VI:  v_cmp_u_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
577; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
578; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
579; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
580; GCN: s_endpgm
581define void @fcmp_v2f16_u(
582    <2 x i32> addrspace(1)* %r,
583    <2 x half> addrspace(1)* %a,
584    <2 x half> addrspace(1)* %b) {
585entry:
586  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
587  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
588  %r.val = fcmp uno <2 x half> %a.val, %b.val
589  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
590  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
591  ret void
592}
593
594; GCN-LABEL: {{^}}fcmp_v2f16_nge
595; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
596; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
597; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
598; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
599; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
600; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
601; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
602; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
603; SI:  v_cmp_nge_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
604; SI:  v_cmp_nge_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
605; VI:  v_cmp_nge_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
606; VI:  v_cmp_nge_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
607; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
608; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
609; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
610; GCN: s_endpgm
611define void @fcmp_v2f16_nge(
612    <2 x i32> addrspace(1)* %r,
613    <2 x half> addrspace(1)* %a,
614    <2 x half> addrspace(1)* %b) {
615entry:
616  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
617  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
618  %r.val = fcmp ult <2 x half> %a.val, %b.val
619  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
620  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
621  ret void
622}
623
624; GCN-LABEL: {{^}}fcmp_v2f16_nlg
625; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
626; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
627; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
628; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
629; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
630; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
631; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
632; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
633; SI:  v_cmp_nlg_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
634; SI:  v_cmp_nlg_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
635; VI:  v_cmp_nlg_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
636; VI:  v_cmp_nlg_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
637; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
638; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
639; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
640; GCN: s_endpgm
641define void @fcmp_v2f16_nlg(
642    <2 x i32> addrspace(1)* %r,
643    <2 x half> addrspace(1)* %a,
644    <2 x half> addrspace(1)* %b) {
645entry:
646  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
647  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
648  %r.val = fcmp ueq <2 x half> %a.val, %b.val
649  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
650  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
651  ret void
652}
653
654; GCN-LABEL: {{^}}fcmp_v2f16_ngt
655; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
656; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
657; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
658; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
659; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
660; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
661; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
662; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
663; SI:  v_cmp_ngt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
664; SI:  v_cmp_ngt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
665; VI:  v_cmp_ngt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
666; VI:  v_cmp_ngt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
667; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
668; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
669; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
670; GCN: s_endpgm
671define void @fcmp_v2f16_ngt(
672    <2 x i32> addrspace(1)* %r,
673    <2 x half> addrspace(1)* %a,
674    <2 x half> addrspace(1)* %b) {
675entry:
676  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
677  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
678  %r.val = fcmp ule <2 x half> %a.val, %b.val
679  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
680  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
681  ret void
682}
683
684; GCN-LABEL: {{^}}fcmp_v2f16_nle
685; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
686; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
687; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
688; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
689; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
690; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
691; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
692; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
693; SI:  v_cmp_nle_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
694; SI:  v_cmp_nle_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
695; VI:  v_cmp_nle_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
696; VI:  v_cmp_nle_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
697; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
698; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
699; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
700; GCN: s_endpgm
701define void @fcmp_v2f16_nle(
702    <2 x i32> addrspace(1)* %r,
703    <2 x half> addrspace(1)* %a,
704    <2 x half> addrspace(1)* %b) {
705entry:
706  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
707  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
708  %r.val = fcmp ugt <2 x half> %a.val, %b.val
709  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
710  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
711  ret void
712}
713
714; GCN-LABEL: {{^}}fcmp_v2f16_neq
715; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
716; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
717; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
718; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
719; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
720; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
721; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
722; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
723; SI:  v_cmp_neq_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
724; SI:  v_cmp_neq_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
725; VI:  v_cmp_neq_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
726; VI:  v_cmp_neq_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
727; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
728; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
729; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
730; GCN: s_endpgm
731define void @fcmp_v2f16_neq(
732    <2 x i32> addrspace(1)* %r,
733    <2 x half> addrspace(1)* %a,
734    <2 x half> addrspace(1)* %b) {
735entry:
736  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
737  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
738  %r.val = fcmp une <2 x half> %a.val, %b.val
739  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
740  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
741  ret void
742}
743
744; GCN-LABEL: {{^}}fcmp_v2f16_nlt
745; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
746; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
747; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
748; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
749; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
750; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
751; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
752; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
753; SI:  v_cmp_nlt_f32_e32 vcc, v[[A_F32_0]], v[[B_F32_0]]
754; SI:  v_cmp_nlt_f32_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F32_1]], v[[B_F32_1]]
755; VI:  v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
756; VI:  v_cmp_nlt_f16_e64 s[{{[0-9]+}}:{{[0-9]+}}], v[[A_F16_1]], v[[B_F16_1]]
757; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
758; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
759; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
760; GCN: s_endpgm
761define void @fcmp_v2f16_nlt(
762    <2 x i32> addrspace(1)* %r,
763    <2 x half> addrspace(1)* %a,
764    <2 x half> addrspace(1)* %b) {
765entry:
766  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
767  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
768  %r.val = fcmp uge <2 x half> %a.val, %b.val
769  %r.val.sext = sext <2 x i1> %r.val to <2 x i32>
770  store <2 x i32> %r.val.sext, <2 x i32> addrspace(1)* %r
771  ret void
772}
773
774declare half @llvm.fabs.f16(half) #1
775
776attributes #0 = { nounwind }
777attributes #1 = { nounwind readnone }
778