1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
5
6define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
7; GFX7-LABEL: @uadd_sat_v2i16(
8; GFX7-NEXT:  bb:
9; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
10; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
11; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
12; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
13; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
14; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
15; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0
16; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
17; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
18;
19; GFX8-LABEL: @uadd_sat_v2i16(
20; GFX8-NEXT:  bb:
21; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
22; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
23;
24bb:
25  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
26  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
27  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
28  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
29  %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
30  %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
31  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
32  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
33  ret <2 x i16> %ins.1
34}
35
36define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
37; GFX7-LABEL: @usub_sat_v2i16(
38; GFX7-NEXT:  bb:
39; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
40; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
41; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
42; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
43; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
44; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
45; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0
46; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
47; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
48;
49; GFX8-LABEL: @usub_sat_v2i16(
50; GFX8-NEXT:  bb:
51; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
52; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
53;
54bb:
55  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
56  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
57  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
58  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
59  %add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0)
60  %add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1)
61  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
62  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
63  ret <2 x i16> %ins.1
64}
65
66define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
67; GFX7-LABEL: @sadd_sat_v2i16(
68; GFX7-NEXT:  bb:
69; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
70; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
71; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
72; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
73; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
74; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
75; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0
76; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
77; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
78;
79; GFX8-LABEL: @sadd_sat_v2i16(
80; GFX8-NEXT:  bb:
81; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
82; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
83;
84bb:
85  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
86  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
87  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
88  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
89  %add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
90  %add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
91  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
92  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
93  ret <2 x i16> %ins.1
94}
95
96define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
97; GFX7-LABEL: @ssub_sat_v2i16(
98; GFX7-NEXT:  bb:
99; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
100; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
101; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
102; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
103; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
104; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
105; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0
106; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
107; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
108;
109; GFX8-LABEL: @ssub_sat_v2i16(
110; GFX8-NEXT:  bb:
111; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
112; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
113;
114bb:
115  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
116  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
117  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
118  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
119  %add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0)
120  %add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1)
121  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
122  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
123  ret <2 x i16> %ins.1
124}
125
126define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
127; GCN-LABEL: @uadd_sat_v2i32(
128; GCN-NEXT:  bb:
129; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
130; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
131; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
132; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
133; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
134; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
135; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
136; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
137; GCN-NEXT:    ret <2 x i32> [[INS_1]]
138;
139bb:
140  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
141  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
142  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
143  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
144  %add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
145  %add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
146  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
147  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
148  ret <2 x i32> %ins.1
149}
150
151define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
152; GCN-LABEL: @usub_sat_v2i32(
153; GCN-NEXT:  bb:
154; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
155; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
156; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
157; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
158; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
159; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
160; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
161; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
162; GCN-NEXT:    ret <2 x i32> [[INS_1]]
163;
164bb:
165  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
166  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
167  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
168  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
169  %add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0)
170  %add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1)
171  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
172  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
173  ret <2 x i32> %ins.1
174}
175
176define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
177; GCN-LABEL: @sadd_sat_v2i32(
178; GCN-NEXT:  bb:
179; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
180; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
181; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
182; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
183; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
184; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
185; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
186; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
187; GCN-NEXT:    ret <2 x i32> [[INS_1]]
188;
189bb:
190  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
191  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
192  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
193  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
194  %add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
195  %add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
196  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
197  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
198  ret <2 x i32> %ins.1
199}
200
201define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
202; GCN-LABEL: @ssub_sat_v2i32(
203; GCN-NEXT:  bb:
204; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
205; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
206; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
207; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
208; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
209; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
210; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
211; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
212; GCN-NEXT:    ret <2 x i32> [[INS_1]]
213;
214bb:
215  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
216  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
217  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
218  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
219  %add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0)
220  %add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1)
221  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
222  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
223  ret <2 x i32> %ins.1
224}
225
226define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
227; GFX7-LABEL: @uadd_sat_v3i16(
228; GFX7-NEXT:  bb:
229; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
230; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
231; GFX7-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
232; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
233; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
234; GFX7-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
235; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
236; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
237; GFX7-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
238; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <3 x i16> undef, i16 [[ADD_0]], i64 0
239; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
240; GFX7-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
241; GFX7-NEXT:    ret <3 x i16> [[INS_2]]
242;
243; GFX8-LABEL: @uadd_sat_v3i16(
244; GFX8-NEXT:  bb:
245; GFX8-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
246; GFX8-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
247; GFX8-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
248; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
249; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
250; GFX8-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
251; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 undef>
252; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
253; GFX8-NEXT:    ret <3 x i16> [[INS_2]]
254;
255bb:
256  %arg0.0 = extractelement <3 x i16> %arg0, i64 0
257  %arg0.1 = extractelement <3 x i16> %arg0, i64 1
258  %arg0.2 = extractelement <3 x i16> %arg0, i64 2
259  %arg1.0 = extractelement <3 x i16> %arg1, i64 0
260  %arg1.1 = extractelement <3 x i16> %arg1, i64 1
261  %arg1.2 = extractelement <3 x i16> %arg1, i64 2
262  %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
263  %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
264  %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
265  %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0
266  %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
267  %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
268  ret <3 x i16> %ins.2
269}
270
271define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
272; GFX7-LABEL: @uadd_sat_v4i16(
273; GFX7-NEXT:  bb:
274; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
275; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
276; GFX7-NEXT:    [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
277; GFX7-NEXT:    [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
278; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
279; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
280; GFX7-NEXT:    [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
281; GFX7-NEXT:    [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
282; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
283; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
284; GFX7-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
285; GFX7-NEXT:    [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
286; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <4 x i16> undef, i16 [[ADD_0]], i64 0
287; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
288; GFX7-NEXT:    [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
289; GFX7-NEXT:    [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
290; GFX7-NEXT:    ret <4 x i16> [[INS_3]]
291;
292; GFX8-LABEL: @uadd_sat_v4i16(
293; GFX8-NEXT:  bb:
294; GFX8-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1>
295; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1>
296; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
297; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
298; GFX8-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
299; GFX8-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
300; GFX8-NEXT:    [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301; GFX8-NEXT:    ret <4 x i16> [[INS_31]]
302;
303bb:
304  %arg0.0 = extractelement <4 x i16> %arg0, i64 0
305  %arg0.1 = extractelement <4 x i16> %arg0, i64 1
306  %arg0.2 = extractelement <4 x i16> %arg0, i64 2
307  %arg0.3 = extractelement <4 x i16> %arg0, i64 3
308  %arg1.0 = extractelement <4 x i16> %arg1, i64 0
309  %arg1.1 = extractelement <4 x i16> %arg1, i64 1
310  %arg1.2 = extractelement <4 x i16> %arg1, i64 2
311  %arg1.3 = extractelement <4 x i16> %arg1, i64 3
312  %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
313  %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
314  %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
315  %add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3)
316  %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0
317  %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
318  %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
319  %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
320  ret <4 x i16> %ins.3
321}
322
323declare i16 @llvm.uadd.sat.i16(i16, i16) #0
324declare i16 @llvm.usub.sat.i16(i16, i16) #0
325declare i16 @llvm.sadd.sat.i16(i16, i16) #0
326declare i16 @llvm.ssub.sat.i16(i16, i16) #0
327
328declare i32 @llvm.uadd.sat.i32(i32, i32) #0
329declare i32 @llvm.usub.sat.i32(i32, i32) #0
330declare i32 @llvm.sadd.sat.i32(i32, i32) #0
331declare i32 @llvm.ssub.sat.i32(i32, i32) #0
332
333attributes #0 = { nounwind readnone speculatable willreturn }
334