1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s
3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s
4
5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s
6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s
7
8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s
9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s
10
11define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 {
12; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16:
13; GFX9-SAFE:       ; %bb.0:
14; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
16; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
17; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
18;
19; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_f16:
20; GFX9-NNAN:       ; %bb.0:
21; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX9-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
23; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
24;
25; VI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
26; VI-SAFE:       ; %bb.0:
27; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
29; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
30; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
31;
32; VI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
33; VI-NNAN:       ; %bb.0:
34; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
36; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
37;
38; SI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
39; SI-SAFE:       ; %bb.0:
40; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
42; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
43; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
44; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
45; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v1, v0
46; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
47;
48; SI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
49; SI-NNAN:       ; %bb.0:
50; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
52; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
53; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
54; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
55; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v1
56; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
57  %cmp = fcmp ugt half %a, %b
58  %val = select i1 %cmp, half %a, half %b
59  ret half %val
60}
61
62define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
63; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
64; GFX9-SAFE:       ; %bb.0:
65; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
67; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
68; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v2
69; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
70; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
71; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
72; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
73; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
74; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
77; GFX9-NNAN:       ; %bb.0:
78; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v1
80; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
81;
82; VI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
83; VI-SAFE:       ; %bb.0:
84; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
86; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
87; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v2
88; VI-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
89; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
90; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
91; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
92; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
93; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
94;
95; VI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
96; VI-NNAN:       ; %bb.0:
97; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; VI-NNAN-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
99; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
100; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v2
101; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
102;
103; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
104; SI-SAFE:       ; %bb.0:
105; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
107; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
108; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
109; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
110; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
111; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
112; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
113; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
114; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v2, v0
115; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v3, v1
116; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
117;
118; SI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
119; SI-NNAN:       ; %bb.0:
120; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
122; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
123; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
124; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
125; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
126; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
127; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
128; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
129; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v2
130; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v3
131; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
132  %cmp = fcmp ugt <2 x half> %a, %b
133  %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
134  ret <2 x half> %val
135}
136
137define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
138; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
139; GFX9-SAFE:       ; %bb.0:
140; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
142; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
143; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
144; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
145; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
146; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
147; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
148; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
149; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
150; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v4, 16, v0
151; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
154; GFX9-NNAN:       ; %bb.0:
155; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
157; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
158; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
159;
160; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
161; VI-SAFE:       ; %bb.0:
162; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
164; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
165; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
166; VI-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
167; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
168; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
169; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
170; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
171; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
172; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
173; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
174;
175; VI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
176; VI-NNAN:       ; %bb.0:
177; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; VI-NNAN-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
179; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
180; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
181; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v4
182; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
183;
184; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
185; SI-SAFE:       ; %bb.0:
186; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
188; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
189; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
190; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
191; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
192; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
193; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
194; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
195; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
196; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
197; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
198; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
199; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v3, v0
200; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v4, v1
201; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v5, v2
202; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
203;
204; SI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
205; SI-NNAN:       ; %bb.0:
206; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
208; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
209; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
210; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
211; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
212; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
213; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
214; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
215; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
216; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
217; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
218; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
219; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v3
220; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v4
221; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v5
222; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
223  %cmp = fcmp ugt <3 x half> %a, %b
224  %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b
225  ret <3 x half> %val
226}
227
228define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
229; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
230; GFX9-SAFE:       ; %bb.0:
231; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
233; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
234; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
235; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
236; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v7, v6
237; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
238; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
239; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
240; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
241; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
242; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
243; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
244; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
245; GFX9-SAFE-NEXT:    v_and_b32_e32 v1, 0xffff, v1
246; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v4, 16, v0
247; GFX9-SAFE-NEXT:    v_lshl_or_b32 v1, v6, 16, v1
248; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
251; GFX9-NNAN:       ; %bb.0:
252; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
254; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
255; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
256;
257; VI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
258; VI-SAFE:       ; %bb.0:
259; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
261; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
262; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
263; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
264; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v7, v6
265; VI-SAFE-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
266; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
267; VI-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
268; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
269; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
270; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
271; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
272; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
273; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
274; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
275; VI-SAFE-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
276; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
277;
278; VI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
279; VI-NNAN:       ; %bb.0:
280; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; VI-NNAN-NEXT:    v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
282; VI-NNAN-NEXT:    v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
283; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
284; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
285; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v5
286; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v4
287; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
288;
289; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
290; SI-SAFE:       ; %bb.0:
291; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
293; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
294; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
295; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
296; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
297; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
298; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
299; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
300; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
301; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
302; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
303; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
304; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
305; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
306; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
307; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
308; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v4, v0
309; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v5, v1
310; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v6, v2
311; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v7, v3
312; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
313;
314; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
315; SI-NNAN:       ; %bb.0:
316; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
318; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
319; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
320; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
321; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
322; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
323; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
324; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
325; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
326; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
327; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
328; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
329; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
330; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
331; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
332; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
333; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v4
334; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v5
335; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v6
336; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v7
337; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
338  %cmp = fcmp ugt <4 x half> %a, %b
339  %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
340  ret <4 x half> %val
341}
342
343define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
344; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
345; GFX9-SAFE:       ; %bb.0:
346; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
348; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
349; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
350; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
351; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v15, v14
352; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
353; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
354; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
355; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v13, v12
356; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
357; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
358; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
359; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v11, v10
360; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
361; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v9, v8
362; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
363; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v7
364; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
365; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v2, v6
366; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
367; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v5
368; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
369; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v4
370; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
371; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
372; GFX9-SAFE-NEXT:    v_and_b32_e32 v1, 0xffff, v1
373; GFX9-SAFE-NEXT:    v_and_b32_e32 v2, 0xffff, v2
374; GFX9-SAFE-NEXT:    v_and_b32_e32 v3, 0xffff, v3
375; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v8, 16, v0
376; GFX9-SAFE-NEXT:    v_lshl_or_b32 v1, v10, 16, v1
377; GFX9-SAFE-NEXT:    v_lshl_or_b32 v2, v12, 16, v2
378; GFX9-SAFE-NEXT:    v_lshl_or_b32 v3, v14, 16, v3
379; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
380;
381; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
382; GFX9-NNAN:       ; %bb.0:
383; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v4
385; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v5
386; GFX9-NNAN-NEXT:    v_pk_max_f16 v2, v2, v6
387; GFX9-NNAN-NEXT:    v_pk_max_f16 v3, v3, v7
388; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
389;
390; VI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
391; VI-SAFE:       ; %bb.0:
392; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
394; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
395; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
396; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
397; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v15, v14
398; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
399; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
400; VI-SAFE-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
401; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v13, v12
402; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
403; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
404; VI-SAFE-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
405; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v11, v10
406; VI-SAFE-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
407; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v9, v8
408; VI-SAFE-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
409; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v7
410; VI-SAFE-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
411; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v2, v6
412; VI-SAFE-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
413; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v5
414; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
415; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v4
416; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
417; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
418; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
419; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
420; VI-SAFE-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
421; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v12
422; VI-SAFE-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
423; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v14
424; VI-SAFE-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
425; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
426;
427; VI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
428; VI-NNAN:       ; %bb.0:
429; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430; VI-NNAN-NEXT:    v_max_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
431; VI-NNAN-NEXT:    v_max_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
432; VI-NNAN-NEXT:    v_max_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
433; VI-NNAN-NEXT:    v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
434; VI-NNAN-NEXT:    v_max_f16_e32 v3, v3, v7
435; VI-NNAN-NEXT:    v_max_f16_e32 v2, v2, v6
436; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v5
437; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v4
438; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v11
439; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v10
440; VI-NNAN-NEXT:    v_or_b32_e32 v2, v2, v9
441; VI-NNAN-NEXT:    v_or_b32_e32 v3, v3, v8
442; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
443;
444; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
445; SI-SAFE:       ; %bb.0:
446; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
448; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v15, v15
449; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
450; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v14, v14
451; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
452; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v13, v13
453; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
454; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v12, v12
455; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
456; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
457; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
458; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
459; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
460; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
461; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
462; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
463; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
464; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v15, v15
465; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
466; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v14, v14
467; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
468; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v13, v13
469; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
470; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v12, v12
471; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
472; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
473; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
474; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
475; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
476; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
477; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
478; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
479; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v8, v0
480; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v9, v1
481; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v10, v2
482; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v11, v3
483; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v4, v12, v4
484; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
485; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
486; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v7, v15, v7
487; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
488;
489; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
490; SI-NNAN:       ; %bb.0:
491; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v15, v15
493; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
494; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v14, v14
495; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
496; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v13, v13
497; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
498; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v12, v12
499; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
500; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v11, v11
501; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
502; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v10, v10
503; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
504; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v9, v9
505; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
506; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v8, v8
507; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
508; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v15, v15
509; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
510; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v14, v14
511; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
512; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v13, v13
513; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
514; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v12, v12
515; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
516; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v11, v11
517; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
518; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v10, v10
519; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
520; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v9, v9
521; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
522; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
523; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
524; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v8
525; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v9
526; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v10
527; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v11
528; SI-NNAN-NEXT:    v_max_f32_e32 v4, v4, v12
529; SI-NNAN-NEXT:    v_max_f32_e32 v5, v5, v13
530; SI-NNAN-NEXT:    v_max_f32_e32 v6, v6, v14
531; SI-NNAN-NEXT:    v_max_f32_e32 v7, v7, v15
532; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
533  %cmp = fcmp ugt <8 x half> %a, %b
534  %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
535  ret <8 x half> %val
536}
537
538attributes #0 = { nounwind }
539