1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
6
7define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
8; GFX6-LABEL: v_uaddsat_i8:
9; GFX6:       ; %bb.0:
10; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
12; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
13; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
14; GFX6-NEXT:    v_min_u32_e32 v0, 0xff, v0
15; GFX6-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX8-LABEL: v_uaddsat_i8:
18; GFX8:       ; %bb.0:
19; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
21; GFX8-NEXT:    v_min_u16_e32 v0, 0xff, v0
22; GFX8-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX9-LABEL: v_uaddsat_i8:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
28; GFX9-NEXT:    v_min_u16_e32 v0, 0xff, v0
29; GFX9-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX10-LABEL: v_uaddsat_i8:
32; GFX10:       ; %bb.0:
33; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
35; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
36; GFX10-NEXT:    v_and_b32_e32 v0, 0xff, v0
37; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1
38; GFX10-NEXT:    v_min_u16 v0, 0xff, v0
39; GFX10-NEXT:    s_setpc_b64 s[30:31]
40  %result = call i8 @llvm.uadd.sat.i8(i8 %lhs, i8 %rhs)
41  ret i8 %result
42}
43
44define i16 @v_uaddsat_i16(i16 %lhs, i16 %rhs) {
45; GFX6-LABEL: v_uaddsat_i16:
46; GFX6:       ; %bb.0:
47; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
49; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
50; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
51; GFX6-NEXT:    v_min_u32_e32 v0, 0xffff, v0
52; GFX6-NEXT:    s_setpc_b64 s[30:31]
53;
54; GFX8-LABEL: v_uaddsat_i16:
55; GFX8:       ; %bb.0:
56; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57; GFX8-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
58; GFX8-NEXT:    s_setpc_b64 s[30:31]
59;
60; GFX9-LABEL: v_uaddsat_i16:
61; GFX9:       ; %bb.0:
62; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX9-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
64; GFX9-NEXT:    s_setpc_b64 s[30:31]
65;
66; GFX10-LABEL: v_uaddsat_i16:
67; GFX10:       ; %bb.0:
68; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
70; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1 clamp
71; GFX10-NEXT:    s_setpc_b64 s[30:31]
72  %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs)
73  ret i16 %result
74}
75
76define i32 @v_uaddsat_i32(i32 %lhs, i32 %rhs) {
77; GFX6-LABEL: v_uaddsat_i32:
78; GFX6:       ; %bb.0:
79; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX6-NEXT:    v_not_b32_e32 v2, v1
81; GFX6-NEXT:    v_min_u32_e32 v0, v0, v2
82; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
83; GFX6-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX8-LABEL: v_uaddsat_i32:
86; GFX8:       ; %bb.0:
87; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v0, v1 clamp
89; GFX8-NEXT:    s_setpc_b64 s[30:31]
90;
91; GFX9-LABEL: v_uaddsat_i32:
92; GFX9:       ; %bb.0:
93; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX9-NEXT:    v_add_u32_e64 v0, v0, v1 clamp
95; GFX9-NEXT:    s_setpc_b64 s[30:31]
96;
97; GFX10-LABEL: v_uaddsat_i32:
98; GFX10:       ; %bb.0:
99; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
101; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v1 clamp
102; GFX10-NEXT:    s_setpc_b64 s[30:31]
103  %result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs)
104  ret i32 %result
105}
106
107define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
108; GFX6-LABEL: v_uaddsat_v2i16:
109; GFX6:       ; %bb.0:
110; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
112; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
113; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
114; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
115; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
116; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
117; GFX6-NEXT:    v_min_u32_e32 v1, 0xffff, v1
118; GFX6-NEXT:    v_min_u32_e32 v0, 0xffff, v0
119; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
120; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
121; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
122; GFX6-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX8-LABEL: v_uaddsat_v2i16:
125; GFX8:       ; %bb.0:
126; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX8-NEXT:    v_add_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
128; GFX8-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
129; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
130; GFX8-NEXT:    s_setpc_b64 s[30:31]
131;
132; GFX9-LABEL: v_uaddsat_v2i16:
133; GFX9:       ; %bb.0:
134; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 clamp
136; GFX9-NEXT:    s_setpc_b64 s[30:31]
137;
138; GFX10-LABEL: v_uaddsat_v2i16:
139; GFX10:       ; %bb.0:
140; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
142; GFX10-NEXT:    v_pk_add_u16 v0, v0, v1 clamp
143; GFX10-NEXT:    s_setpc_b64 s[30:31]
144  %result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
145  ret <2 x i16> %result
146}
147
148define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
149; GFX6-LABEL: v_uaddsat_v3i16:
150; GFX6:       ; %bb.0:
151; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
153; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
154; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
155; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
156; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
157; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
158; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
159; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
160; GFX6-NEXT:    v_min_u32_e32 v1, 0xffff, v1
161; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
162; GFX6-NEXT:    v_min_u32_e32 v0, 0xffff, v0
163; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
164; GFX6-NEXT:    v_min_u32_e32 v3, 0xffff, v2
165; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
166; GFX6-NEXT:    v_or_b32_e32 v2, 0xffff0000, v3
167; GFX6-NEXT:    v_alignbit_b32 v1, v3, v1, 16
168; GFX6-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX8-LABEL: v_uaddsat_v3i16:
171; GFX8:       ; %bb.0:
172; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX8-NEXT:    v_add_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
174; GFX8-NEXT:    v_add_u16_e64 v0, v0, v2 clamp
175; GFX8-NEXT:    v_add_u16_e64 v1, v1, v3 clamp
176; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
177; GFX8-NEXT:    s_setpc_b64 s[30:31]
178;
179; GFX9-LABEL: v_uaddsat_v3i16:
180; GFX9:       ; %bb.0:
181; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX9-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
183; GFX9-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
184; GFX9-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX10-LABEL: v_uaddsat_v3i16:
187; GFX10:       ; %bb.0:
188; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
190; GFX10-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
191; GFX10-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
192; GFX10-NEXT:    s_setpc_b64 s[30:31]
193  %result = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
194  ret <3 x i16> %result
195}
196
197define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
198; GFX6-LABEL: v_uaddsat_v4i16:
199; GFX6:       ; %bb.0:
200; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
202; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
203; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
204; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
205; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
206; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
207; GFX6-NEXT:    v_min_u32_e32 v1, 0xffff, v1
208; GFX6-NEXT:    v_and_b32_e32 v7, 0xffff, v7
209; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
210; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
211; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
212; GFX6-NEXT:    v_min_u32_e32 v0, 0xffff, v0
213; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
214; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
215; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v6
216; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v7
217; GFX6-NEXT:    v_min_u32_e32 v2, 0xffff, v2
218; GFX6-NEXT:    v_min_u32_e32 v1, 0xffff, v1
219; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
220; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
221; GFX6-NEXT:    s_setpc_b64 s[30:31]
222;
223; GFX8-LABEL: v_uaddsat_v4i16:
224; GFX8:       ; %bb.0:
225; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226; GFX8-NEXT:    v_add_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
227; GFX8-NEXT:    v_add_u16_e64 v0, v0, v2 clamp
228; GFX8-NEXT:    v_add_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
229; GFX8-NEXT:    v_add_u16_e64 v1, v1, v3 clamp
230; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
231; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
232; GFX8-NEXT:    s_setpc_b64 s[30:31]
233;
234; GFX9-LABEL: v_uaddsat_v4i16:
235; GFX9:       ; %bb.0:
236; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; GFX9-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
238; GFX9-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
239; GFX9-NEXT:    s_setpc_b64 s[30:31]
240;
241; GFX10-LABEL: v_uaddsat_v4i16:
242; GFX10:       ; %bb.0:
243; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
245; GFX10-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
246; GFX10-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
247; GFX10-NEXT:    s_setpc_b64 s[30:31]
248  %result = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
249  %cast = bitcast <4 x i16> %result to <2 x float>
250  ret <2 x float> %cast
251}
252
253define <2 x i32> @v_uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
254; GFX6-LABEL: v_uaddsat_v2i32:
255; GFX6:       ; %bb.0:
256; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; GFX6-NEXT:    v_not_b32_e32 v4, v2
258; GFX6-NEXT:    v_min_u32_e32 v0, v0, v4
259; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
260; GFX6-NEXT:    v_not_b32_e32 v2, v3
261; GFX6-NEXT:    v_min_u32_e32 v1, v1, v2
262; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
263; GFX6-NEXT:    s_setpc_b64 s[30:31]
264;
265; GFX8-LABEL: v_uaddsat_v2i32:
266; GFX8:       ; %bb.0:
267; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v0, v2 clamp
269; GFX8-NEXT:    v_add_u32_e64 v1, s[4:5], v1, v3 clamp
270; GFX8-NEXT:    s_setpc_b64 s[30:31]
271;
272; GFX9-LABEL: v_uaddsat_v2i32:
273; GFX9:       ; %bb.0:
274; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275; GFX9-NEXT:    v_add_u32_e64 v0, v0, v2 clamp
276; GFX9-NEXT:    v_add_u32_e64 v1, v1, v3 clamp
277; GFX9-NEXT:    s_setpc_b64 s[30:31]
278;
279; GFX10-LABEL: v_uaddsat_v2i32:
280; GFX10:       ; %bb.0:
281; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
283; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v2 clamp
284; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v3 clamp
285; GFX10-NEXT:    s_setpc_b64 s[30:31]
286  %result = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
287  ret <2 x i32> %result
288}
289
290define <3 x i32> @v_uaddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
291; GFX6-LABEL: v_uaddsat_v3i32:
292; GFX6:       ; %bb.0:
293; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX6-NEXT:    v_not_b32_e32 v6, v3
295; GFX6-NEXT:    v_min_u32_e32 v0, v0, v6
296; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
297; GFX6-NEXT:    v_not_b32_e32 v3, v4
298; GFX6-NEXT:    v_min_u32_e32 v1, v1, v3
299; GFX6-NEXT:    v_not_b32_e32 v3, v5
300; GFX6-NEXT:    v_min_u32_e32 v2, v2, v3
301; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
302; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
303; GFX6-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX8-LABEL: v_uaddsat_v3i32:
306; GFX8:       ; %bb.0:
307; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v0, v3 clamp
309; GFX8-NEXT:    v_add_u32_e64 v1, s[4:5], v1, v4 clamp
310; GFX8-NEXT:    v_add_u32_e64 v2, s[4:5], v2, v5 clamp
311; GFX8-NEXT:    s_setpc_b64 s[30:31]
312;
313; GFX9-LABEL: v_uaddsat_v3i32:
314; GFX9:       ; %bb.0:
315; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; GFX9-NEXT:    v_add_u32_e64 v0, v0, v3 clamp
317; GFX9-NEXT:    v_add_u32_e64 v1, v1, v4 clamp
318; GFX9-NEXT:    v_add_u32_e64 v2, v2, v5 clamp
319; GFX9-NEXT:    s_setpc_b64 s[30:31]
320;
321; GFX10-LABEL: v_uaddsat_v3i32:
322; GFX10:       ; %bb.0:
323; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
325; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v3 clamp
326; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v4 clamp
327; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v5 clamp
328; GFX10-NEXT:    s_setpc_b64 s[30:31]
329  %result = call <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
330  ret <3 x i32> %result
331}
332
333define <4 x i32> @v_uaddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
334; GFX6-LABEL: v_uaddsat_v4i32:
335; GFX6:       ; %bb.0:
336; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; GFX6-NEXT:    v_not_b32_e32 v8, v4
338; GFX6-NEXT:    v_min_u32_e32 v0, v0, v8
339; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
340; GFX6-NEXT:    v_not_b32_e32 v4, v5
341; GFX6-NEXT:    v_min_u32_e32 v1, v1, v4
342; GFX6-NEXT:    v_not_b32_e32 v4, v6
343; GFX6-NEXT:    v_min_u32_e32 v2, v2, v4
344; GFX6-NEXT:    v_not_b32_e32 v4, v7
345; GFX6-NEXT:    v_min_u32_e32 v3, v3, v4
346; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
347; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
348; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
349; GFX6-NEXT:    s_setpc_b64 s[30:31]
350;
351; GFX8-LABEL: v_uaddsat_v4i32:
352; GFX8:       ; %bb.0:
353; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v0, v4 clamp
355; GFX8-NEXT:    v_add_u32_e64 v1, s[4:5], v1, v5 clamp
356; GFX8-NEXT:    v_add_u32_e64 v2, s[4:5], v2, v6 clamp
357; GFX8-NEXT:    v_add_u32_e64 v3, s[4:5], v3, v7 clamp
358; GFX8-NEXT:    s_setpc_b64 s[30:31]
359;
360; GFX9-LABEL: v_uaddsat_v4i32:
361; GFX9:       ; %bb.0:
362; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363; GFX9-NEXT:    v_add_u32_e64 v0, v0, v4 clamp
364; GFX9-NEXT:    v_add_u32_e64 v1, v1, v5 clamp
365; GFX9-NEXT:    v_add_u32_e64 v2, v2, v6 clamp
366; GFX9-NEXT:    v_add_u32_e64 v3, v3, v7 clamp
367; GFX9-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX10-LABEL: v_uaddsat_v4i32:
370; GFX10:       ; %bb.0:
371; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
373; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v4 clamp
374; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v5 clamp
375; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v6 clamp
376; GFX10-NEXT:    v_add_nc_u32_e64 v3, v3, v7 clamp
377; GFX10-NEXT:    s_setpc_b64 s[30:31]
378  %result = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
379  ret <4 x i32> %result
380}
381
382define <8 x i32> @v_uaddsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
383; GFX6-LABEL: v_uaddsat_v8i32:
384; GFX6:       ; %bb.0:
385; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386; GFX6-NEXT:    v_not_b32_e32 v16, v8
387; GFX6-NEXT:    v_min_u32_e32 v0, v0, v16
388; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
389; GFX6-NEXT:    v_not_b32_e32 v8, v9
390; GFX6-NEXT:    v_min_u32_e32 v1, v1, v8
391; GFX6-NEXT:    v_not_b32_e32 v8, v10
392; GFX6-NEXT:    v_min_u32_e32 v2, v2, v8
393; GFX6-NEXT:    v_not_b32_e32 v8, v11
394; GFX6-NEXT:    v_min_u32_e32 v3, v3, v8
395; GFX6-NEXT:    v_not_b32_e32 v8, v12
396; GFX6-NEXT:    v_min_u32_e32 v4, v4, v8
397; GFX6-NEXT:    v_not_b32_e32 v8, v13
398; GFX6-NEXT:    v_min_u32_e32 v5, v5, v8
399; GFX6-NEXT:    v_not_b32_e32 v8, v14
400; GFX6-NEXT:    v_min_u32_e32 v6, v6, v8
401; GFX6-NEXT:    v_not_b32_e32 v8, v15
402; GFX6-NEXT:    v_min_u32_e32 v7, v7, v8
403; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
404; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v10
405; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
406; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v12
407; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v13
408; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v14
409; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v15
410; GFX6-NEXT:    s_setpc_b64 s[30:31]
411;
412; GFX8-LABEL: v_uaddsat_v8i32:
413; GFX8:       ; %bb.0:
414; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v0, v8 clamp
416; GFX8-NEXT:    v_add_u32_e64 v1, s[4:5], v1, v9 clamp
417; GFX8-NEXT:    v_add_u32_e64 v2, s[4:5], v2, v10 clamp
418; GFX8-NEXT:    v_add_u32_e64 v3, s[4:5], v3, v11 clamp
419; GFX8-NEXT:    v_add_u32_e64 v4, s[4:5], v4, v12 clamp
420; GFX8-NEXT:    v_add_u32_e64 v5, s[4:5], v5, v13 clamp
421; GFX8-NEXT:    v_add_u32_e64 v6, s[4:5], v6, v14 clamp
422; GFX8-NEXT:    v_add_u32_e64 v7, s[4:5], v7, v15 clamp
423; GFX8-NEXT:    s_setpc_b64 s[30:31]
424;
425; GFX9-LABEL: v_uaddsat_v8i32:
426; GFX9:       ; %bb.0:
427; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428; GFX9-NEXT:    v_add_u32_e64 v0, v0, v8 clamp
429; GFX9-NEXT:    v_add_u32_e64 v1, v1, v9 clamp
430; GFX9-NEXT:    v_add_u32_e64 v2, v2, v10 clamp
431; GFX9-NEXT:    v_add_u32_e64 v3, v3, v11 clamp
432; GFX9-NEXT:    v_add_u32_e64 v4, v4, v12 clamp
433; GFX9-NEXT:    v_add_u32_e64 v5, v5, v13 clamp
434; GFX9-NEXT:    v_add_u32_e64 v6, v6, v14 clamp
435; GFX9-NEXT:    v_add_u32_e64 v7, v7, v15 clamp
436; GFX9-NEXT:    s_setpc_b64 s[30:31]
437;
438; GFX10-LABEL: v_uaddsat_v8i32:
439; GFX10:       ; %bb.0:
440; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
442; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v8 clamp
443; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v9 clamp
444; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v10 clamp
445; GFX10-NEXT:    v_add_nc_u32_e64 v3, v3, v11 clamp
446; GFX10-NEXT:    v_add_nc_u32_e64 v4, v4, v12 clamp
447; GFX10-NEXT:    v_add_nc_u32_e64 v5, v5, v13 clamp
448; GFX10-NEXT:    v_add_nc_u32_e64 v6, v6, v14 clamp
449; GFX10-NEXT:    v_add_nc_u32_e64 v7, v7, v15 clamp
450; GFX10-NEXT:    s_setpc_b64 s[30:31]
451  %result = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
452  ret <8 x i32> %result
453}
454
455define <16 x i32> @v_uaddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
456; GFX6-LABEL: v_uaddsat_v16i32:
457; GFX6:       ; %bb.0:
458; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459; GFX6-NEXT:    v_not_b32_e32 v31, v16
460; GFX6-NEXT:    v_min_u32_e32 v0, v0, v31
461; GFX6-NEXT:    buffer_load_dword v31, off, s[0:3], s32
462; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v16
463; GFX6-NEXT:    v_not_b32_e32 v16, v17
464; GFX6-NEXT:    v_min_u32_e32 v1, v1, v16
465; GFX6-NEXT:    v_not_b32_e32 v16, v18
466; GFX6-NEXT:    v_min_u32_e32 v2, v2, v16
467; GFX6-NEXT:    v_not_b32_e32 v16, v19
468; GFX6-NEXT:    v_min_u32_e32 v3, v3, v16
469; GFX6-NEXT:    v_not_b32_e32 v16, v20
470; GFX6-NEXT:    v_min_u32_e32 v4, v4, v16
471; GFX6-NEXT:    v_not_b32_e32 v16, v21
472; GFX6-NEXT:    v_min_u32_e32 v5, v5, v16
473; GFX6-NEXT:    v_not_b32_e32 v16, v22
474; GFX6-NEXT:    v_min_u32_e32 v6, v6, v16
475; GFX6-NEXT:    v_not_b32_e32 v16, v23
476; GFX6-NEXT:    v_min_u32_e32 v7, v7, v16
477; GFX6-NEXT:    v_not_b32_e32 v16, v24
478; GFX6-NEXT:    v_min_u32_e32 v8, v8, v16
479; GFX6-NEXT:    v_not_b32_e32 v16, v25
480; GFX6-NEXT:    v_min_u32_e32 v9, v9, v16
481; GFX6-NEXT:    v_not_b32_e32 v16, v26
482; GFX6-NEXT:    v_min_u32_e32 v10, v10, v16
483; GFX6-NEXT:    v_not_b32_e32 v16, v27
484; GFX6-NEXT:    v_min_u32_e32 v11, v11, v16
485; GFX6-NEXT:    v_not_b32_e32 v16, v28
486; GFX6-NEXT:    v_min_u32_e32 v12, v12, v16
487; GFX6-NEXT:    v_not_b32_e32 v16, v29
488; GFX6-NEXT:    v_min_u32_e32 v13, v13, v16
489; GFX6-NEXT:    v_not_b32_e32 v16, v30
490; GFX6-NEXT:    v_min_u32_e32 v14, v14, v16
491; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v17
492; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v18
493; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v19
494; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v20
495; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v21
496; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v22
497; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v23
498; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v8, v24
499; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v9, v25
500; GFX6-NEXT:    v_add_i32_e32 v10, vcc, v10, v26
501; GFX6-NEXT:    v_add_i32_e32 v11, vcc, v11, v27
502; GFX6-NEXT:    v_add_i32_e32 v12, vcc, v12, v28
503; GFX6-NEXT:    v_add_i32_e32 v13, vcc, v13, v29
504; GFX6-NEXT:    v_add_i32_e32 v14, vcc, v14, v30
505; GFX6-NEXT:    s_waitcnt vmcnt(0)
506; GFX6-NEXT:    v_not_b32_e32 v16, v31
507; GFX6-NEXT:    v_min_u32_e32 v15, v15, v16
508; GFX6-NEXT:    v_add_i32_e32 v15, vcc, v15, v31
509; GFX6-NEXT:    s_setpc_b64 s[30:31]
510;
511; GFX8-LABEL: v_uaddsat_v16i32:
512; GFX8:       ; %bb.0:
513; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v0, v16 clamp
515; GFX8-NEXT:    buffer_load_dword v16, off, s[0:3], s32
516; GFX8-NEXT:    v_add_u32_e64 v1, s[4:5], v1, v17 clamp
517; GFX8-NEXT:    v_add_u32_e64 v2, s[4:5], v2, v18 clamp
518; GFX8-NEXT:    v_add_u32_e64 v3, s[4:5], v3, v19 clamp
519; GFX8-NEXT:    v_add_u32_e64 v4, s[4:5], v4, v20 clamp
520; GFX8-NEXT:    v_add_u32_e64 v5, s[4:5], v5, v21 clamp
521; GFX8-NEXT:    v_add_u32_e64 v6, s[4:5], v6, v22 clamp
522; GFX8-NEXT:    v_add_u32_e64 v7, s[4:5], v7, v23 clamp
523; GFX8-NEXT:    v_add_u32_e64 v8, s[4:5], v8, v24 clamp
524; GFX8-NEXT:    v_add_u32_e64 v9, s[4:5], v9, v25 clamp
525; GFX8-NEXT:    v_add_u32_e64 v10, s[4:5], v10, v26 clamp
526; GFX8-NEXT:    v_add_u32_e64 v11, s[4:5], v11, v27 clamp
527; GFX8-NEXT:    v_add_u32_e64 v12, s[4:5], v12, v28 clamp
528; GFX8-NEXT:    v_add_u32_e64 v13, s[4:5], v13, v29 clamp
529; GFX8-NEXT:    v_add_u32_e64 v14, s[4:5], v14, v30 clamp
530; GFX8-NEXT:    s_waitcnt vmcnt(0)
531; GFX8-NEXT:    v_add_u32_e64 v15, s[4:5], v15, v16 clamp
532; GFX8-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX9-LABEL: v_uaddsat_v16i32:
535; GFX9:       ; %bb.0:
536; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX9-NEXT:    v_add_u32_e64 v0, v0, v16 clamp
538; GFX9-NEXT:    buffer_load_dword v16, off, s[0:3], s32
539; GFX9-NEXT:    v_add_u32_e64 v1, v1, v17 clamp
540; GFX9-NEXT:    v_add_u32_e64 v2, v2, v18 clamp
541; GFX9-NEXT:    v_add_u32_e64 v3, v3, v19 clamp
542; GFX9-NEXT:    v_add_u32_e64 v4, v4, v20 clamp
543; GFX9-NEXT:    v_add_u32_e64 v5, v5, v21 clamp
544; GFX9-NEXT:    v_add_u32_e64 v6, v6, v22 clamp
545; GFX9-NEXT:    v_add_u32_e64 v7, v7, v23 clamp
546; GFX9-NEXT:    v_add_u32_e64 v8, v8, v24 clamp
547; GFX9-NEXT:    v_add_u32_e64 v9, v9, v25 clamp
548; GFX9-NEXT:    v_add_u32_e64 v10, v10, v26 clamp
549; GFX9-NEXT:    v_add_u32_e64 v11, v11, v27 clamp
550; GFX9-NEXT:    v_add_u32_e64 v12, v12, v28 clamp
551; GFX9-NEXT:    v_add_u32_e64 v13, v13, v29 clamp
552; GFX9-NEXT:    v_add_u32_e64 v14, v14, v30 clamp
553; GFX9-NEXT:    s_waitcnt vmcnt(0)
554; GFX9-NEXT:    v_add_u32_e64 v15, v15, v16 clamp
555; GFX9-NEXT:    s_setpc_b64 s[30:31]
556;
557; GFX10-LABEL: v_uaddsat_v16i32:
558; GFX10:       ; %bb.0:
559; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
561; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
562; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v16 clamp
563; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v17 clamp
564; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v18 clamp
565; GFX10-NEXT:    v_add_nc_u32_e64 v3, v3, v19 clamp
566; GFX10-NEXT:    v_add_nc_u32_e64 v4, v4, v20 clamp
567; GFX10-NEXT:    v_add_nc_u32_e64 v5, v5, v21 clamp
568; GFX10-NEXT:    v_add_nc_u32_e64 v6, v6, v22 clamp
569; GFX10-NEXT:    v_add_nc_u32_e64 v7, v7, v23 clamp
570; GFX10-NEXT:    v_add_nc_u32_e64 v8, v8, v24 clamp
571; GFX10-NEXT:    v_add_nc_u32_e64 v9, v9, v25 clamp
572; GFX10-NEXT:    v_add_nc_u32_e64 v10, v10, v26 clamp
573; GFX10-NEXT:    v_add_nc_u32_e64 v11, v11, v27 clamp
574; GFX10-NEXT:    v_add_nc_u32_e64 v12, v12, v28 clamp
575; GFX10-NEXT:    v_add_nc_u32_e64 v13, v13, v29 clamp
576; GFX10-NEXT:    v_add_nc_u32_e64 v14, v14, v30 clamp
577; GFX10-NEXT:    s_waitcnt vmcnt(0)
578; GFX10-NEXT:    v_add_nc_u32_e64 v15, v15, v31 clamp
579; GFX10-NEXT:    s_setpc_b64 s[30:31]
580  %result = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
581  ret <16 x i32> %result
582}
583
584
585define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
586; GFX6-LABEL: v_uaddsat_i64:
587; GFX6:       ; %bb.0:
588; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v2
590; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
591; GFX6-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
592; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
593; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
594; GFX6-NEXT:    s_setpc_b64 s[30:31]
595;
596; GFX8-LABEL: v_uaddsat_i64:
597; GFX8:       ; %bb.0:
598; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v2
600; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v1, v3, vcc
601; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
602; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
603; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
604; GFX8-NEXT:    s_setpc_b64 s[30:31]
605;
606; GFX9-LABEL: v_uaddsat_i64:
607; GFX9:       ; %bb.0:
608; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v0, v2
610; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v1, v3, vcc
611; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
612; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
613; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
614; GFX9-NEXT:    s_setpc_b64 s[30:31]
615;
616; GFX10-LABEL: v_uaddsat_i64:
617; GFX10:       ; %bb.0:
618; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
620; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v0, v2
621; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
622; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
623; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc_lo
624; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc_lo
625; GFX10-NEXT:    s_setpc_b64 s[30:31]
626  %result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs)
627  ret i64 %result
628}
629
630declare i8 @llvm.uadd.sat.i8(i8, i8) #0
631declare i16 @llvm.uadd.sat.i16(i16, i16) #0
632declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
633declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
634declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
635declare i32 @llvm.uadd.sat.i32(i32, i32) #0
636declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
637declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) #0
638declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) #0
639declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>) #0
640declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) #0
641declare i64 @llvm.uadd.sat.i64(i64, i64) #0
642
643attributes #0 = { nounwind readnone speculatable willreturn }
644