1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}select_f16
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
8; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
9; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
10; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
11; SI:  v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
12; SI:  v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
13; SI:  v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
14; SI:  v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
15; SI:  v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
16; VI:  v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
17; VI:  v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
18; GCN: buffer_store_short v[[R_F16]]
19; GCN: s_endpgm
20define void @select_f16(
21    half addrspace(1)* %r,
22    half addrspace(1)* %a,
23    half addrspace(1)* %b,
24    half addrspace(1)* %c,
25    half addrspace(1)* %d) {
26entry:
27  %a.val = load half, half addrspace(1)* %a
28  %b.val = load half, half addrspace(1)* %b
29  %c.val = load half, half addrspace(1)* %c
30  %d.val = load half, half addrspace(1)* %d
31  %fcmp = fcmp olt half %a.val, %b.val
32  %r.val = select i1 %fcmp, half %c.val, half %d.val
33  store half %r.val, half addrspace(1)* %r
34  ret void
35}
36
37; GCN-LABEL: {{^}}select_f16_imm_a
38; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
39; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
40; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
41; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x3800{{$}}
42; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
43; SI:  v_cmp_gt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
44; SI:  v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
45; SI:  v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
46; SI:  v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
47; SI:  v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
48; VI:  v_cmp_lt_f16_e32 vcc, 0.5, v[[B_F16]]
49; VI:  v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
50; GCN: buffer_store_short v[[R_F16]]
51; GCN: s_endpgm
52define void @select_f16_imm_a(
53    half addrspace(1)* %r,
54    half addrspace(1)* %b,
55    half addrspace(1)* %c,
56    half addrspace(1)* %d) {
57entry:
58  %b.val = load half, half addrspace(1)* %b
59  %c.val = load half, half addrspace(1)* %c
60  %d.val = load half, half addrspace(1)* %d
61  %fcmp = fcmp olt half 0xH3800, %b.val
62  %r.val = select i1 %fcmp, half %c.val, half %d.val
63  store half %r.val, half addrspace(1)* %r
64  ret void
65}
66
67; GCN-LABEL: {{^}}select_f16_imm_b
68; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
69; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
70; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
71; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0x3800{{$}}
72; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
73; SI:  v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
74; SI:  v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
75; SI:  v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
76; SI:  v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
77; SI:  v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
78; VI:  v_cmp_gt_f16_e32 vcc, 0.5, v[[A_F16]]
79; VI:  v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
80; GCN: buffer_store_short v[[R_F16]]
81; GCN: s_endpgm
82define void @select_f16_imm_b(
83    half addrspace(1)* %r,
84    half addrspace(1)* %a,
85    half addrspace(1)* %c,
86    half addrspace(1)* %d) {
87entry:
88  %a.val = load half, half addrspace(1)* %a
89  %c.val = load half, half addrspace(1)* %c
90  %d.val = load half, half addrspace(1)* %d
91  %fcmp = fcmp olt half %a.val, 0xH3800
92  %r.val = select i1 %fcmp, half %c.val, half %d.val
93  store half %r.val, half addrspace(1)* %r
94  ret void
95}
96
97; GCN-LABEL: {{^}}select_f16_imm_c
98; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
99; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
100; GCN: buffer_load_ushort v[[D_F16:[0-9]+]]
101; SI:  v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], 0x3800{{$}}
102; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
103; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
104; SI:  v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
105; SI:  v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
106; SI:  v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[C_F32]], v[[D_F32]], vcc
107; SI:  v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
108; VI:  v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
109; VI:  v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x3800{{$}}
110; VI:  v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[C_F16]], v[[D_F16]], vcc
111; GCN: buffer_store_short v[[R_F16]]
112; GCN: s_endpgm
113define void @select_f16_imm_c(
114    half addrspace(1)* %r,
115    half addrspace(1)* %a,
116    half addrspace(1)* %b,
117    half addrspace(1)* %d) {
118entry:
119  %a.val = load half, half addrspace(1)* %a
120  %b.val = load half, half addrspace(1)* %b
121  %d.val = load half, half addrspace(1)* %d
122  %fcmp = fcmp olt half %a.val, %b.val
123  %r.val = select i1 %fcmp, half 0xH3800, half %d.val
124  store half %r.val, half addrspace(1)* %r
125  ret void
126}
127
128; GCN-LABEL: {{^}}select_f16_imm_d
129; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
130; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
131; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
132; SI:  v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], 0x3800{{$}}
133; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
134; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
135; SI:  v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
136; SI:  v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
137; SI:  v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
138; SI:  v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
139; VI:  v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
140; VI:  v_mov_b32_e32 v[[D_F16:[0-9]+]], 0x3800{{$}}
141; VI:  v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
142; GCN: buffer_store_short v[[R_F16]]
143; GCN: s_endpgm
144define void @select_f16_imm_d(
145    half addrspace(1)* %r,
146    half addrspace(1)* %a,
147    half addrspace(1)* %b,
148    half addrspace(1)* %c) {
149entry:
150  %a.val = load half, half addrspace(1)* %a
151  %b.val = load half, half addrspace(1)* %b
152  %c.val = load half, half addrspace(1)* %c
153  %fcmp = fcmp olt half %a.val, %b.val
154  %r.val = select i1 %fcmp, half %c.val, half 0xH3800
155  store half %r.val, half addrspace(1)* %r
156  ret void
157}
158
159; GCN-LABEL: {{^}}select_v2f16
160; SI:  v_cvt_f32_f16_e32
161; SI:  v_cvt_f32_f16_e32
162; SI:  v_cvt_f32_f16_e32
163; SI:  v_cvt_f32_f16_e32
164; SI:  v_cmp_lt_f32_e64
165; SI:  v_cmp_lt_f32_e32
166; VI:  v_cmp_lt_f16_e32
167; VI:  v_cmp_lt_f16_e64
168; GCN: v_cndmask_b32_e32
169; GCN: v_cndmask_b32_e64
170; SI:  v_cvt_f16_f32_e32
171; SI:  v_cvt_f16_f32_e32
172; GCN: s_endpgm
173define void @select_v2f16(
174    <2 x half> addrspace(1)* %r,
175    <2 x half> addrspace(1)* %a,
176    <2 x half> addrspace(1)* %b,
177    <2 x half> addrspace(1)* %c,
178    <2 x half> addrspace(1)* %d) {
179entry:
180  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
181  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
182  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
183  %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
184  %fcmp = fcmp olt <2 x half> %a.val, %b.val
185  %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
186  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
187  ret void
188}
189
190; GCN-LABEL: {{^}}select_v2f16_imm_a
191; SI:  v_cvt_f32_f16_e32
192; SI:  v_cvt_f32_f16_e32
193; SI:  v_cvt_f32_f16_e32
194; SI:  v_cvt_f32_f16_e32
195; SI:  v_cmp_gt_f32_e32
196; SI:  v_cvt_f32_f16_e32
197; SI:  v_cvt_f32_f16_e32
198; SI:  v_cvt_f32_f16_e32
199; SI:  v_cvt_f32_f16_e32
200; SI:  v_cmp_gt_f32_e64
201; VI:  v_cmp_lt_f16_e32
202; VI:  v_cmp_lt_f16_e64
203; GCN: v_cndmask_b32_e32
204; SI:  v_cvt_f16_f32_e32
205; GCN: v_cndmask_b32_e64
206; SI:  v_cvt_f16_f32_e32
207; GCN: s_endpgm
208define void @select_v2f16_imm_a(
209    <2 x half> addrspace(1)* %r,
210    <2 x half> addrspace(1)* %b,
211    <2 x half> addrspace(1)* %c,
212    <2 x half> addrspace(1)* %d) {
213entry:
214  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
215  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
216  %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
217  %fcmp = fcmp olt <2 x half> <half 0xH3800, half 0xH3900>, %b.val
218  %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
219  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
220  ret void
221}
222
223; GCN-LABEL: {{^}}select_v2f16_imm_b
224; SI:  v_cvt_f32_f16_e32
225; SI:  v_cvt_f32_f16_e32
226; SI:  v_cvt_f32_f16_e32
227; SI:  v_cvt_f32_f16_e32
228; SI:  v_cmp_lt_f32_e32
229; SI:  v_cvt_f32_f16_e32
230; SI:  v_cvt_f32_f16_e32
231; SI:  v_cvt_f32_f16_e32
232; SI:  v_cvt_f32_f16_e32
233; SI:  v_cmp_lt_f32_e64
234; VI:  v_cmp_gt_f16_e32
235; VI:  v_cmp_gt_f16_e64
236; GCN: v_cndmask_b32_e32
237; SI:  v_cvt_f16_f32_e32
238; GCN: v_cndmask_b32_e64
239; SI:  v_cvt_f16_f32_e32
240; GCN: s_endpgm
241define void @select_v2f16_imm_b(
242    <2 x half> addrspace(1)* %r,
243    <2 x half> addrspace(1)* %a,
244    <2 x half> addrspace(1)* %c,
245    <2 x half> addrspace(1)* %d) {
246entry:
247  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
248  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
249  %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
250  %fcmp = fcmp olt <2 x half> %a.val, <half 0xH3800, half 0xH3900>
251  %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> %d.val
252  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
253  ret void
254}
255
256; GCN-LABEL: {{^}}select_v2f16_imm_c
257; SI:  v_cvt_f32_f16_e32
258; SI:  v_cvt_f32_f16_e32
259; SI:  v_cvt_f32_f16_e32
260; SI:  v_cvt_f32_f16_e32
261; SI:  v_cvt_f32_f16_e32
262; SI:  v_cvt_f32_f16_e32
263; SI:  v_cvt_f32_f16_e32
264; SI:  v_cvt_f32_f16_e32
265
266; SI: v_cmp_lt_f32_e32
267; SI: v_cmp_lt_f32_e64
268; SI: v_cndmask_b32_e32
269; SI: v_cndmask_b32_e64
270
271; VI: v_cmp_nlt_f16_e32
272; VI: v_cndmask_b32_e32
273
274; VI: v_cmp_nlt_f16_e32
275; VI: v_cndmask_b32_e32
276
277; SI:  v_cvt_f16_f32_e32
278; SI:  v_cvt_f16_f32_e32
279; GCN: s_endpgm
280define void @select_v2f16_imm_c(
281    <2 x half> addrspace(1)* %r,
282    <2 x half> addrspace(1)* %a,
283    <2 x half> addrspace(1)* %b,
284    <2 x half> addrspace(1)* %d) {
285entry:
286  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
287  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
288  %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
289  %fcmp = fcmp olt <2 x half> %a.val, %b.val
290  %r.val = select <2 x i1> %fcmp, <2 x half> <half 0xH3800, half 0xH3900>, <2 x half> %d.val
291  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
292  ret void
293}
294
295; GCN-LABEL: {{^}}select_v2f16_imm_d
296; SI:  v_cvt_f32_f16_e32
297; SI:  v_cvt_f32_f16_e32
298; SI:  v_cvt_f32_f16_e32
299; SI:  v_cvt_f32_f16_e32
300; SI:  v_cvt_f32_f16_e32
301; SI:  v_cvt_f32_f16_e32
302; SI:  v_cvt_f32_f16_e32
303; SI:  v_cvt_f32_f16_e32
304; SI:  v_cmp_lt_f32_e32
305; SI:  v_cmp_lt_f32_e64
306; VI:  v_cmp_lt_f16_e32
307; VI:  v_cmp_lt_f16_e64
308; GCN: v_cndmask_b32_e32
309; GCN: v_cndmask_b32_e64
310; SI:  v_cvt_f16_f32_e32
311; SI:  v_cvt_f16_f32_e32
312; GCN: s_endpgm
313define void @select_v2f16_imm_d(
314    <2 x half> addrspace(1)* %r,
315    <2 x half> addrspace(1)* %a,
316    <2 x half> addrspace(1)* %b,
317    <2 x half> addrspace(1)* %c) {
318entry:
319  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
320  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
321  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
322  %fcmp = fcmp olt <2 x half> %a.val, %b.val
323  %r.val = select <2 x i1> %fcmp, <2 x half> %c.val, <2 x half> <half 0xH3800, half 0xH3900>
324  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
325  ret void
326}
327