1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5
6define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
7; GFX89-LABEL: test_min_max_ValK0_K1_u32:
8; GFX89:       ; %bb.0:
9; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
11; GFX89-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10-LABEL: test_min_max_ValK0_K1_u32:
14; GFX10:       ; %bb.0:
15; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
17; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
18; GFX10-NEXT:    s_setpc_b64 s[30:31]
19  %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
20  %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
21  ret i32 %umed
22}
23
24define i32 @min_max_ValK0_K1_i32(i32 %a) {
25; GFX89-LABEL: min_max_ValK0_K1_i32:
26; GFX89:       ; %bb.0:
27; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
29; GFX89-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX10-LABEL: min_max_ValK0_K1_i32:
32; GFX10:       ; %bb.0:
33; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
35; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
36; GFX10-NEXT:    s_setpc_b64 s[30:31]
37  %umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
38  %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
39  ret i32 %umed
40}
41
42define i32 @test_min_K1max_ValK0__u32(i32 %a) {
43; GFX89-LABEL: test_min_K1max_ValK0__u32:
44; GFX89:       ; %bb.0:
45; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
47; GFX89-NEXT:    s_setpc_b64 s[30:31]
48;
49; GFX10-LABEL: test_min_K1max_ValK0__u32:
50; GFX10:       ; %bb.0:
51; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
53; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
54; GFX10-NEXT:    s_setpc_b64 s[30:31]
55  %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
56  %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
57  ret i32 %umed
58}
59
60define i32 @test_min_K1max_K0Val__u32(i32 %a) {
61; GFX89-LABEL: test_min_K1max_K0Val__u32:
62; GFX89:       ; %bb.0:
63; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
65; GFX89-NEXT:    s_setpc_b64 s[30:31]
66;
67; GFX10-LABEL: test_min_K1max_K0Val__u32:
68; GFX10:       ; %bb.0:
69; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
71; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
72; GFX10-NEXT:    s_setpc_b64 s[30:31]
73  %umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
74  %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
75  ret i32 %umed
76}
77
78define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
79; GFX89-LABEL: test_max_min_ValK1_K0_u32:
80; GFX89:       ; %bb.0:
81; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
83; GFX89-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX10-LABEL: test_max_min_ValK1_K0_u32:
86; GFX10:       ; %bb.0:
87; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
89; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
90; GFX10-NEXT:    s_setpc_b64 s[30:31]
91  %umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
92  %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
93  ret i32 %umed
94}
95
96define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
97; GFX89-LABEL: test_max_min_K1Val_K0_u32:
98; GFX89:       ; %bb.0:
99; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
101; GFX89-NEXT:    s_setpc_b64 s[30:31]
102;
103; GFX10-LABEL: test_max_min_K1Val_K0_u32:
104; GFX10:       ; %bb.0:
105; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
107; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
108; GFX10-NEXT:    s_setpc_b64 s[30:31]
109  %umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
110  %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
111  ret i32 %umed
112}
113
114define i32 @test_max_K0min_ValK1__u32(i32 %a) {
115; GFX89-LABEL: test_max_K0min_ValK1__u32:
116; GFX89:       ; %bb.0:
117; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
119; GFX89-NEXT:    s_setpc_b64 s[30:31]
120;
121; GFX10-LABEL: test_max_K0min_ValK1__u32:
122; GFX10:       ; %bb.0:
123; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
125; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
126; GFX10-NEXT:    s_setpc_b64 s[30:31]
127  %umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
128  %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
129  ret i32 %umed
130}
131
132define i32 @test_max_K0min_K1Val__u32(i32 %a) {
133; GFX89-LABEL: test_max_K0min_K1Val__u32:
134; GFX89:       ; %bb.0:
135; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX89-NEXT:    v_med3_u32 v0, v0, 12, 17
137; GFX89-NEXT:    s_setpc_b64 s[30:31]
138;
139; GFX10-LABEL: test_max_K0min_K1Val__u32:
140; GFX10:       ; %bb.0:
141; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
143; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 17
144; GFX10-NEXT:    s_setpc_b64 s[30:31]
145  %umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
146  %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
147  ret i32 %umed
148}
149
150define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
151; GFX8-LABEL: test_max_K0min_K1Val__v2u16:
152; GFX8:       ; %bb.0:
153; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154; GFX8-NEXT:    v_mov_b32_e32 v2, 17
155; GFX8-NEXT:    v_min_u16_e32 v1, 17, v0
156; GFX8-NEXT:    v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
157; GFX8-NEXT:    v_mov_b32_e32 v2, 12
158; GFX8-NEXT:    v_max_u16_e32 v1, 12, v1
159; GFX8-NEXT:    v_max_u16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
160; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
161; GFX8-NEXT:    s_setpc_b64 s[30:31]
162;
163; GFX9-LABEL: test_max_K0min_K1Val__v2u16:
164; GFX9:       ; %bb.0:
165; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX9-NEXT:    v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
167; GFX9-NEXT:    v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
168; GFX9-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
171; GFX10:       ; %bb.0:
172; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
174; GFX10-NEXT:    v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
175; GFX10-NEXT:    v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
176; GFX10-NEXT:    s_setpc_b64 s[30:31]
177  %umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
178  %umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin)
179  ret <2 x i16> %umed
180}
181
182define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
183; GFX89-LABEL: test_uniform_min_max:
184; GFX89:       ; %bb.0:
185; GFX89-NEXT:    s_max_u32 s0, s2, 12
186; GFX89-NEXT:    s_min_u32 s0, s0, 17
187; GFX89-NEXT:    ; return to shader part epilog
188;
189; GFX10-LABEL: test_uniform_min_max:
190; GFX10:       ; %bb.0:
191; GFX10-NEXT:    s_max_u32 s0, s2, 12
192; GFX10-NEXT:    s_min_u32 s0, s0, 17
193; GFX10-NEXT:    ; return to shader part epilog
194  %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
195  %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
196  ret i32 %umed
197}
198
199define i32 @test_non_inline_constant_u32(i32 %a) {
200; GFX89-LABEL: test_non_inline_constant_u32:
201; GFX89:       ; %bb.0:
202; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX89-NEXT:    v_mov_b32_e32 v1, 0x41
204; GFX89-NEXT:    v_med3_u32 v0, v0, 12, v1
205; GFX89-NEXT:    s_setpc_b64 s[30:31]
206;
207; GFX10-LABEL: test_non_inline_constant_u32:
208; GFX10:       ; %bb.0:
209; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
211; GFX10-NEXT:    v_med3_u32 v0, v0, 12, 0x41
212; GFX10-NEXT:    s_setpc_b64 s[30:31]
213  %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
214  %umed = call i32 @llvm.umin.i32(i32 %umax, i32 65)
215  ret i32 %umed
216}
217
218declare i32 @llvm.umin.i32(i32, i32)
219declare i32 @llvm.umax.i32(i32, i32)
220declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
221declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
222