1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s
7
8define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
9; GFX6-LABEL: v_ssubsat_i8:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
13; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
14; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
15; GFX6-NEXT:    v_min_i32_e32 v0, 0x7f, v0
16; GFX6-NEXT:    v_max_i32_e32 v0, 0xffffff80, v0
17; GFX6-NEXT:    s_setpc_b64 s[30:31]
18;
19; GFX8-LABEL: v_ssubsat_i8:
20; GFX8:       ; %bb.0:
21; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX8-NEXT:    v_sub_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
23; GFX8-NEXT:    v_min_i16_e32 v0, 0x7f, v0
24; GFX8-NEXT:    v_max_i16_e32 v0, 0xff80, v0
25; GFX8-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX9-LABEL: v_ssubsat_i8:
28; GFX9:       ; %bb.0:
29; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
31; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
32; GFX9-NEXT:    v_sub_i16 v0, v0, v1 clamp
33; GFX9-NEXT:    v_ashrrev_i16_e32 v0, 8, v0
34; GFX9-NEXT:    s_setpc_b64 s[30:31]
35;
36; GFX10PLUS-LABEL: v_ssubsat_i8:
37; GFX10PLUS:       ; %bb.0:
38; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
40; GFX10PLUS-NEXT:    v_lshlrev_b16 v1, 8, v1
41; GFX10PLUS-NEXT:    v_lshlrev_b16 v0, 8, v0
42; GFX10PLUS-NEXT:    v_sub_nc_i16 v0, v0, v1 clamp
43; GFX10PLUS-NEXT:    v_ashrrev_i16 v0, 8, v0
44; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
45  %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs)
46  ret i8 %result
47}
48
49define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
50; GFX6-LABEL: v_ssubsat_i16:
51; GFX6:       ; %bb.0:
52; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
54; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
55; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
56; GFX6-NEXT:    v_min_i32_e32 v0, 0x7fff, v0
57; GFX6-NEXT:    v_max_i32_e32 v0, 0xffff8000, v0
58; GFX6-NEXT:    s_setpc_b64 s[30:31]
59;
60; GFX8-LABEL: v_ssubsat_i16:
61; GFX8:       ; %bb.0:
62; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v1
64; GFX8-NEXT:    v_sub_u16_e32 v1, v0, v1
65; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v1, v0
66; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v1
67; GFX8-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
68; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
69; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
70; GFX8-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX9-LABEL: v_ssubsat_i16:
73; GFX9:       ; %bb.0:
74; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX9-NEXT:    v_sub_i16 v0, v0, v1 clamp
76; GFX9-NEXT:    s_setpc_b64 s[30:31]
77;
78; GFX10PLUS-LABEL: v_ssubsat_i16:
79; GFX10PLUS:       ; %bb.0:
80; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
82; GFX10PLUS-NEXT:    v_sub_nc_i16 v0, v0, v1 clamp
83; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
84  %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
85  ret i16 %result
86}
87
88define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
89; GFX6-LABEL: v_ssubsat_i32:
90; GFX6:       ; %bb.0:
91; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
93; GFX6-NEXT:    v_sub_i32_e64 v1, s[4:5], v0, v1
94; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
95; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
96; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
97; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
98; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
99; GFX6-NEXT:    s_setpc_b64 s[30:31]
100;
101; GFX8-LABEL: v_ssubsat_i32:
102; GFX8:       ; %bb.0:
103; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
105; GFX8-NEXT:    v_sub_u32_e64 v1, s[4:5], v0, v1
106; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
107; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
108; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
109; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
110; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
111; GFX8-NEXT:    s_setpc_b64 s[30:31]
112;
113; GFX9-LABEL: v_ssubsat_i32:
114; GFX9:       ; %bb.0:
115; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX9-NEXT:    v_sub_i32 v0, v0, v1 clamp
117; GFX9-NEXT:    s_setpc_b64 s[30:31]
118;
119; GFX10PLUS-LABEL: v_ssubsat_i32:
120; GFX10PLUS:       ; %bb.0:
121; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
123; GFX10PLUS-NEXT:    v_sub_nc_i32 v0, v0, v1 clamp
124; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
125  %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
126  ret i32 %result
127}
128
129define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
130; GFX6-LABEL: v_ssubsat_v2i16:
131; GFX6:       ; %bb.0:
132; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
134; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
135; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
136; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
137; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
138; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
139; GFX6-NEXT:    v_min_i32_e32 v1, 0x7fff, v1
140; GFX6-NEXT:    v_min_i32_e32 v0, 0x7fff, v0
141; GFX6-NEXT:    v_max_i32_e32 v1, 0xffff8000, v1
142; GFX6-NEXT:    v_max_i32_e32 v0, 0xffff8000, v0
143; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
144; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
145; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
146; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
147; GFX6-NEXT:    s_setpc_b64 s[30:31]
148;
149; GFX8-LABEL: v_ssubsat_v2i16:
150; GFX8:       ; %bb.0:
151; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
153; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
154; GFX8-NEXT:    v_sub_u16_e32 v4, v3, v2
155; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v4, v3
156; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v2
157; GFX8-NEXT:    v_ashrrev_i16_e32 v2, 15, v4
158; GFX8-NEXT:    v_xor_b32_e32 v2, 0xffff8000, v2
159; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
160; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
161; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v1
162; GFX8-NEXT:    v_sub_u16_e32 v1, v0, v1
163; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v1, v0
164; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v1
165; GFX8-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
166; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
167; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
168; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
169; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
170; GFX8-NEXT:    s_setpc_b64 s[30:31]
171;
172; GFX9-LABEL: v_ssubsat_v2i16:
173; GFX9:       ; %bb.0:
174; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1 clamp
176; GFX9-NEXT:    s_setpc_b64 s[30:31]
177;
178; GFX10PLUS-LABEL: v_ssubsat_v2i16:
179; GFX10PLUS:       ; %bb.0:
180; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
182; GFX10PLUS-NEXT:    v_pk_sub_i16 v0, v0, v1 clamp
183; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
184  %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
185  ret <2 x i16> %result
186}
187
188define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
189; GFX6-LABEL: v_ssubsat_v3i16:
190; GFX6:       ; %bb.0:
191; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
193; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
194; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
195; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
196; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
197; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
198; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
199; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
200; GFX6-NEXT:    v_min_i32_e32 v1, 0x7fff, v1
201; GFX6-NEXT:    v_min_i32_e32 v0, 0x7fff, v0
202; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
203; GFX6-NEXT:    v_max_i32_e32 v1, 0xffff8000, v1
204; GFX6-NEXT:    v_max_i32_e32 v0, 0xffff8000, v0
205; GFX6-NEXT:    v_min_i32_e32 v2, 0x7fff, v2
206; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
207; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
208; GFX6-NEXT:    v_max_i32_e32 v3, 0xffff8000, v2
209; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
210; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
211; GFX6-NEXT:    v_alignbit_b32 v1, v3, v1, 16
212; GFX6-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX8-LABEL: v_ssubsat_v3i16:
215; GFX8:       ; %bb.0:
216; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
218; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
219; GFX8-NEXT:    v_sub_u16_e32 v6, v5, v4
220; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v6, v5
221; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v4
222; GFX8-NEXT:    v_ashrrev_i16_e32 v4, 15, v6
223; GFX8-NEXT:    v_xor_b32_e32 v4, 0xffff8000, v4
224; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
225; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
226; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v3
227; GFX8-NEXT:    v_sub_u16_e32 v3, v1, v3
228; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v3, v1
229; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 15, v3
230; GFX8-NEXT:    v_xor_b32_e32 v1, 0xffff8000, v1
231; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
232; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
233; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v2
234; GFX8-NEXT:    v_sub_u16_e32 v2, v0, v2
235; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v2, v0
236; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v2
237; GFX8-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
238; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
239; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
240; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
241; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
242; GFX8-NEXT:    s_setpc_b64 s[30:31]
243;
244; GFX9-LABEL: v_ssubsat_v3i16:
245; GFX9:       ; %bb.0:
246; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247; GFX9-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
248; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
249; GFX9-NEXT:    s_setpc_b64 s[30:31]
250;
251; GFX10PLUS-LABEL: v_ssubsat_v3i16:
252; GFX10PLUS:       ; %bb.0:
253; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
255; GFX10PLUS-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
256; GFX10PLUS-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
257; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
258  %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
259  ret <3 x i16> %result
260}
261
262define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
263; GFX6-LABEL: v_ssubsat_v4i16:
264; GFX6:       ; %bb.0:
265; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
267; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
268; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
269; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
270; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
271; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
272; GFX6-NEXT:    v_min_i32_e32 v1, 0x7fff, v1
273; GFX6-NEXT:    v_min_i32_e32 v0, 0x7fff, v0
274; GFX6-NEXT:    v_max_i32_e32 v1, 0xffff8000, v1
275; GFX6-NEXT:    v_max_i32_e32 v0, 0xffff8000, v0
276; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 16
277; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
278; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 16
279; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
280; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
281; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
282; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
283; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v3, v7
284; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
285; GFX6-NEXT:    v_min_i32_e32 v1, 0x7fff, v1
286; GFX6-NEXT:    v_min_i32_e32 v2, 0x7fff, v2
287; GFX6-NEXT:    v_max_i32_e32 v1, 0xffff8000, v1
288; GFX6-NEXT:    v_max_i32_e32 v2, 0xffff8000, v2
289; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
290; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
291; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
292; GFX6-NEXT:    s_setpc_b64 s[30:31]
293;
294; GFX8-LABEL: v_ssubsat_v4i16:
295; GFX8:       ; %bb.0:
296; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
298; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
299; GFX8-NEXT:    v_sub_u16_e32 v6, v5, v4
300; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v6, v5
301; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v4
302; GFX8-NEXT:    v_ashrrev_i16_e32 v4, 15, v6
303; GFX8-NEXT:    v_xor_b32_e32 v4, 0xffff8000, v4
304; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
305; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
306; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v2
307; GFX8-NEXT:    v_sub_u16_e32 v2, v0, v2
308; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v2, v0
309; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v2
310; GFX8-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
311; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
312; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
313; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
314; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
315; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
316; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
317; GFX8-NEXT:    v_sub_u16_e32 v5, v4, v2
318; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v5, v4
319; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v2
320; GFX8-NEXT:    v_ashrrev_i16_e32 v2, 15, v5
321; GFX8-NEXT:    v_xor_b32_e32 v2, 0xffff8000, v2
322; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
323; GFX8-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
324; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v3
325; GFX8-NEXT:    v_sub_u16_e32 v3, v1, v3
326; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v3, v1
327; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 15, v3
328; GFX8-NEXT:    v_xor_b32_e32 v1, 0xffff8000, v1
329; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
330; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
331; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
332; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
333; GFX8-NEXT:    s_setpc_b64 s[30:31]
334;
335; GFX9-LABEL: v_ssubsat_v4i16:
336; GFX9:       ; %bb.0:
337; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
339; GFX9-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
340; GFX9-NEXT:    s_setpc_b64 s[30:31]
341;
342; GFX10PLUS-LABEL: v_ssubsat_v4i16:
343; GFX10PLUS:       ; %bb.0:
344; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
346; GFX10PLUS-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
347; GFX10PLUS-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
348; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
349  %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
350  %cast = bitcast <4 x i16> %result to <2 x float>
351  ret <2 x float> %cast
352}
353
354define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
355; GFX6-LABEL: v_ssubsat_v2i32:
356; GFX6:       ; %bb.0:
357; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v2
359; GFX6-NEXT:    v_sub_i32_e64 v2, s[4:5], v0, v2
360; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
361; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
362; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
363; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
364; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
365; GFX6-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v3
366; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
367; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
368; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
369; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
370; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
371; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
372; GFX6-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX8-LABEL: v_ssubsat_v2i32:
375; GFX8:       ; %bb.0:
376; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v2
378; GFX8-NEXT:    v_sub_u32_e64 v2, s[4:5], v0, v2
379; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
380; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
381; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
382; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
383; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
384; GFX8-NEXT:    v_sub_u32_e64 v2, s[4:5], v1, v3
385; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
386; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
387; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
388; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
389; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
390; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
391; GFX8-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX9-LABEL: v_ssubsat_v2i32:
394; GFX9:       ; %bb.0:
395; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX9-NEXT:    v_sub_i32 v0, v0, v2 clamp
397; GFX9-NEXT:    v_sub_i32 v1, v1, v3 clamp
398; GFX9-NEXT:    s_setpc_b64 s[30:31]
399;
400; GFX10PLUS-LABEL: v_ssubsat_v2i32:
401; GFX10PLUS:       ; %bb.0:
402; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
404; GFX10PLUS-NEXT:    v_sub_nc_i32 v0, v0, v2 clamp
405; GFX10PLUS-NEXT:    v_sub_nc_i32 v1, v1, v3 clamp
406; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
407  %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
408  ret <2 x i32> %result
409}
410
411define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
412; GFX6-LABEL: v_ssubsat_v3i32:
413; GFX6:       ; %bb.0:
414; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
416; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v3
417; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
418; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
419; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
420; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
421; GFX6-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
422; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v1, v4
423; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
424; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v1
425; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
426; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
427; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
428; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
429; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v5
430; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
431; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v2
432; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v3
433; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
434; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
435; GFX6-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
436; GFX6-NEXT:    s_setpc_b64 s[30:31]
437;
438; GFX8-LABEL: v_ssubsat_v3i32:
439; GFX8:       ; %bb.0:
440; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
442; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v0, v3
443; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
444; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
445; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
446; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
447; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
448; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v1, v4
449; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
450; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v1
451; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
452; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
453; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
454; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
455; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v2, v5
456; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
457; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v2
458; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v3
459; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
460; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
461; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
462; GFX8-NEXT:    s_setpc_b64 s[30:31]
463;
464; GFX9-LABEL: v_ssubsat_v3i32:
465; GFX9:       ; %bb.0:
466; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
467; GFX9-NEXT:    v_sub_i32 v0, v0, v3 clamp
468; GFX9-NEXT:    v_sub_i32 v1, v1, v4 clamp
469; GFX9-NEXT:    v_sub_i32 v2, v2, v5 clamp
470; GFX9-NEXT:    s_setpc_b64 s[30:31]
471;
472; GFX10PLUS-LABEL: v_ssubsat_v3i32:
473; GFX10PLUS:       ; %bb.0:
474; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
476; GFX10PLUS-NEXT:    v_sub_nc_i32 v0, v0, v3 clamp
477; GFX10PLUS-NEXT:    v_sub_nc_i32 v1, v1, v4 clamp
478; GFX10PLUS-NEXT:    v_sub_nc_i32 v2, v2, v5 clamp
479; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
480  %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
481  ret <3 x i32> %result
482}
483
484define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
485; GFX6-LABEL: v_ssubsat_v4i32:
486; GFX6:       ; %bb.0:
487; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
489; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v0, v4
490; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v0
491; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v4
492; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
493; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
494; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
495; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v1, v5
496; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
497; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v1
498; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v4
499; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
500; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
501; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
502; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v2, v6
503; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v6
504; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v2
505; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v4
506; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
507; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
508; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
509; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v7
510; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v7
511; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v3
512; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v4
513; GFX6-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
514; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
515; GFX6-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
516; GFX6-NEXT:    s_setpc_b64 s[30:31]
517;
518; GFX8-LABEL: v_ssubsat_v4i32:
519; GFX8:       ; %bb.0:
520; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
522; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v0, v4
523; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v0
524; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v4
525; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
526; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
527; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
528; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v1, v5
529; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
530; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v1
531; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v4
532; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
533; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
534; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
535; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v2, v6
536; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v6
537; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v2
538; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v4
539; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
540; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
541; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
542; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v3, v7
543; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v7
544; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v3
545; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v4
546; GFX8-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
547; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
548; GFX8-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
549; GFX8-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX9-LABEL: v_ssubsat_v4i32:
552; GFX9:       ; %bb.0:
553; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX9-NEXT:    v_sub_i32 v0, v0, v4 clamp
555; GFX9-NEXT:    v_sub_i32 v1, v1, v5 clamp
556; GFX9-NEXT:    v_sub_i32 v2, v2, v6 clamp
557; GFX9-NEXT:    v_sub_i32 v3, v3, v7 clamp
558; GFX9-NEXT:    s_setpc_b64 s[30:31]
559;
560; GFX10PLUS-LABEL: v_ssubsat_v4i32:
561; GFX10PLUS:       ; %bb.0:
562; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
564; GFX10PLUS-NEXT:    v_sub_nc_i32 v0, v0, v4 clamp
565; GFX10PLUS-NEXT:    v_sub_nc_i32 v1, v1, v5 clamp
566; GFX10PLUS-NEXT:    v_sub_nc_i32 v2, v2, v6 clamp
567; GFX10PLUS-NEXT:    v_sub_nc_i32 v3, v3, v7 clamp
568; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
569  %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
570  ret <4 x i32> %result
571}
572
573define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
574; GFX6-LABEL: v_ssubsat_v8i32:
575; GFX6:       ; %bb.0:
576; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v8
578; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v8
579; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v0
580; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v8
581; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
582; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
583; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
584; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v1, v9
585; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v9
586; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v1
587; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v8
588; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
589; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
590; GFX6-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
591; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v10
592; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v10
593; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v2
594; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v8
595; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
596; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
597; GFX6-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
598; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v3, v11
599; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v11
600; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v3
601; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v8
602; GFX6-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
603; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
604; GFX6-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
605; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v4, v12
606; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v12
607; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v4
608; GFX6-NEXT:    v_ashrrev_i32_e32 v4, 31, v8
609; GFX6-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
610; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
611; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
612; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v5, v13
613; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v13
614; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v5
615; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v8
616; GFX6-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
617; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
618; GFX6-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
619; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v6, v14
620; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v14
621; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v6
622; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v8
623; GFX6-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
624; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
625; GFX6-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
626; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v7, v15
627; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v15
628; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v7
629; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v8
630; GFX6-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
631; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
632; GFX6-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
633; GFX6-NEXT:    s_setpc_b64 s[30:31]
634;
635; GFX8-LABEL: v_ssubsat_v8i32:
636; GFX8:       ; %bb.0:
637; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v8
639; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v0, v8
640; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v0
641; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v8
642; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
643; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
644; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
645; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v1, v9
646; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v9
647; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v1
648; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v8
649; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
650; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
651; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
652; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v2, v10
653; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v10
654; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v2
655; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v8
656; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
657; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
658; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
659; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v3, v11
660; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v11
661; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v3
662; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v8
663; GFX8-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
664; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
665; GFX8-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
666; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v4, v12
667; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v12
668; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v4
669; GFX8-NEXT:    v_ashrrev_i32_e32 v4, 31, v8
670; GFX8-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
671; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
672; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
673; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v5, v13
674; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v13
675; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v5
676; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v8
677; GFX8-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
678; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
679; GFX8-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
680; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v6, v14
681; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v14
682; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v6
683; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v8
684; GFX8-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
685; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
686; GFX8-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
687; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v7, v15
688; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v15
689; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v7
690; GFX8-NEXT:    v_ashrrev_i32_e32 v7, 31, v8
691; GFX8-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
692; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
693; GFX8-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
694; GFX8-NEXT:    s_setpc_b64 s[30:31]
695;
696; GFX9-LABEL: v_ssubsat_v8i32:
697; GFX9:       ; %bb.0:
698; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699; GFX9-NEXT:    v_sub_i32 v0, v0, v8 clamp
700; GFX9-NEXT:    v_sub_i32 v1, v1, v9 clamp
701; GFX9-NEXT:    v_sub_i32 v2, v2, v10 clamp
702; GFX9-NEXT:    v_sub_i32 v3, v3, v11 clamp
703; GFX9-NEXT:    v_sub_i32 v4, v4, v12 clamp
704; GFX9-NEXT:    v_sub_i32 v5, v5, v13 clamp
705; GFX9-NEXT:    v_sub_i32 v6, v6, v14 clamp
706; GFX9-NEXT:    v_sub_i32 v7, v7, v15 clamp
707; GFX9-NEXT:    s_setpc_b64 s[30:31]
708;
709; GFX10PLUS-LABEL: v_ssubsat_v8i32:
710; GFX10PLUS:       ; %bb.0:
711; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
712; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
713; GFX10PLUS-NEXT:    v_sub_nc_i32 v0, v0, v8 clamp
714; GFX10PLUS-NEXT:    v_sub_nc_i32 v1, v1, v9 clamp
715; GFX10PLUS-NEXT:    v_sub_nc_i32 v2, v2, v10 clamp
716; GFX10PLUS-NEXT:    v_sub_nc_i32 v3, v3, v11 clamp
717; GFX10PLUS-NEXT:    v_sub_nc_i32 v4, v4, v12 clamp
718; GFX10PLUS-NEXT:    v_sub_nc_i32 v5, v5, v13 clamp
719; GFX10PLUS-NEXT:    v_sub_nc_i32 v6, v6, v14 clamp
720; GFX10PLUS-NEXT:    v_sub_nc_i32 v7, v7, v15 clamp
721; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
722  %result = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
723  ret <8 x i32> %result
724}
725
726define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
727; GFX6-LABEL: v_ssubsat_v16i32:
728; GFX6:       ; %bb.0:
729; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
731; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v0, v16
732; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v0
733; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v16
734; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
735; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
736; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
737; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v1, v17
738; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v17
739; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v1
740; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v16
741; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
742; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
743; GFX6-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
744; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v2, v18
745; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v18
746; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v2
747; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v16
748; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
749; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
750; GFX6-NEXT:    v_cndmask_b32_e32 v2, v16, v2, vcc
751; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v3, v19
752; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v19
753; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v3
754; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v16
755; GFX6-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
756; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
757; GFX6-NEXT:    v_cndmask_b32_e32 v3, v16, v3, vcc
758; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v4, v20
759; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v20
760; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v4
761; GFX6-NEXT:    v_ashrrev_i32_e32 v4, 31, v16
762; GFX6-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
763; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
764; GFX6-NEXT:    v_cndmask_b32_e32 v4, v16, v4, vcc
765; GFX6-NEXT:    buffer_load_dword v16, off, s[0:3], s32
766; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v5, v21
767; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v21
768; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v5
769; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v17
770; GFX6-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
771; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
772; GFX6-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
773; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v6, v22
774; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v22
775; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v6
776; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v17
777; GFX6-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
778; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
779; GFX6-NEXT:    v_cndmask_b32_e32 v6, v17, v6, vcc
780; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v7, v23
781; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v23
782; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v7
783; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v17
784; GFX6-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
785; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
786; GFX6-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
787; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v8, v24
788; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v24
789; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v8
790; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v17
791; GFX6-NEXT:    v_xor_b32_e32 v8, 0x80000000, v8
792; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
793; GFX6-NEXT:    v_cndmask_b32_e32 v8, v17, v8, vcc
794; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v9, v25
795; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v25
796; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v9
797; GFX6-NEXT:    v_ashrrev_i32_e32 v9, 31, v17
798; GFX6-NEXT:    v_xor_b32_e32 v9, 0x80000000, v9
799; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
800; GFX6-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
801; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v10, v26
802; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v26
803; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v10
804; GFX6-NEXT:    v_ashrrev_i32_e32 v10, 31, v17
805; GFX6-NEXT:    v_xor_b32_e32 v10, 0x80000000, v10
806; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
807; GFX6-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc
808; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v11, v27
809; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v27
810; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v11
811; GFX6-NEXT:    v_ashrrev_i32_e32 v11, 31, v17
812; GFX6-NEXT:    v_xor_b32_e32 v11, 0x80000000, v11
813; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
814; GFX6-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
815; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v12, v28
816; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v28
817; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v12
818; GFX6-NEXT:    v_ashrrev_i32_e32 v12, 31, v17
819; GFX6-NEXT:    v_xor_b32_e32 v12, 0x80000000, v12
820; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
821; GFX6-NEXT:    v_cndmask_b32_e32 v12, v17, v12, vcc
822; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v13, v29
823; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v29
824; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v13
825; GFX6-NEXT:    v_ashrrev_i32_e32 v13, 31, v17
826; GFX6-NEXT:    v_xor_b32_e32 v13, 0x80000000, v13
827; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
828; GFX6-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
829; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v14, v30
830; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v30
831; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v14
832; GFX6-NEXT:    v_ashrrev_i32_e32 v14, 31, v17
833; GFX6-NEXT:    v_xor_b32_e32 v14, 0x80000000, v14
834; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
835; GFX6-NEXT:    v_cndmask_b32_e32 v14, v17, v14, vcc
836; GFX6-NEXT:    s_waitcnt vmcnt(0)
837; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
838; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v15, v16
839; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v15
840; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v16
841; GFX6-NEXT:    v_xor_b32_e32 v15, 0x80000000, v15
842; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
843; GFX6-NEXT:    v_cndmask_b32_e32 v15, v16, v15, vcc
844; GFX6-NEXT:    s_setpc_b64 s[30:31]
845;
846; GFX8-LABEL: v_ssubsat_v16i32:
847; GFX8:       ; %bb.0:
848; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
850; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v0, v16
851; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v0
852; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v16
853; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
854; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
855; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
856; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v1, v17
857; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v17
858; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v1
859; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v16
860; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
861; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
862; GFX8-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
863; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v2, v18
864; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v18
865; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v2
866; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v16
867; GFX8-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
868; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
869; GFX8-NEXT:    v_cndmask_b32_e32 v2, v16, v2, vcc
870; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v3, v19
871; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v19
872; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v3
873; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v16
874; GFX8-NEXT:    v_xor_b32_e32 v3, 0x80000000, v3
875; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
876; GFX8-NEXT:    v_cndmask_b32_e32 v3, v16, v3, vcc
877; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v4, v20
878; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v20
879; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v4
880; GFX8-NEXT:    v_ashrrev_i32_e32 v4, 31, v16
881; GFX8-NEXT:    v_xor_b32_e32 v4, 0x80000000, v4
882; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
883; GFX8-NEXT:    v_cndmask_b32_e32 v4, v16, v4, vcc
884; GFX8-NEXT:    buffer_load_dword v16, off, s[0:3], s32
885; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v5, v21
886; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v21
887; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v5
888; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v17
889; GFX8-NEXT:    v_xor_b32_e32 v5, 0x80000000, v5
890; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
891; GFX8-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
892; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v6, v22
893; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v22
894; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v6
895; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v17
896; GFX8-NEXT:    v_xor_b32_e32 v6, 0x80000000, v6
897; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
898; GFX8-NEXT:    v_cndmask_b32_e32 v6, v17, v6, vcc
899; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v7, v23
900; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v23
901; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v7
902; GFX8-NEXT:    v_ashrrev_i32_e32 v7, 31, v17
903; GFX8-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
904; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
905; GFX8-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
906; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v8, v24
907; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v24
908; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v8
909; GFX8-NEXT:    v_ashrrev_i32_e32 v8, 31, v17
910; GFX8-NEXT:    v_xor_b32_e32 v8, 0x80000000, v8
911; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
912; GFX8-NEXT:    v_cndmask_b32_e32 v8, v17, v8, vcc
913; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v9, v25
914; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v25
915; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v9
916; GFX8-NEXT:    v_ashrrev_i32_e32 v9, 31, v17
917; GFX8-NEXT:    v_xor_b32_e32 v9, 0x80000000, v9
918; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
919; GFX8-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
920; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v10, v26
921; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v26
922; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v10
923; GFX8-NEXT:    v_ashrrev_i32_e32 v10, 31, v17
924; GFX8-NEXT:    v_xor_b32_e32 v10, 0x80000000, v10
925; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
926; GFX8-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc
927; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v11, v27
928; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v27
929; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v11
930; GFX8-NEXT:    v_ashrrev_i32_e32 v11, 31, v17
931; GFX8-NEXT:    v_xor_b32_e32 v11, 0x80000000, v11
932; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
933; GFX8-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
934; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v12, v28
935; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v28
936; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v12
937; GFX8-NEXT:    v_ashrrev_i32_e32 v12, 31, v17
938; GFX8-NEXT:    v_xor_b32_e32 v12, 0x80000000, v12
939; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
940; GFX8-NEXT:    v_cndmask_b32_e32 v12, v17, v12, vcc
941; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v13, v29
942; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v29
943; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v13
944; GFX8-NEXT:    v_ashrrev_i32_e32 v13, 31, v17
945; GFX8-NEXT:    v_xor_b32_e32 v13, 0x80000000, v13
946; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
947; GFX8-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
948; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v14, v30
949; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v30
950; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v14
951; GFX8-NEXT:    v_ashrrev_i32_e32 v14, 31, v17
952; GFX8-NEXT:    v_xor_b32_e32 v14, 0x80000000, v14
953; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
954; GFX8-NEXT:    v_cndmask_b32_e32 v14, v17, v14, vcc
955; GFX8-NEXT:    s_waitcnt vmcnt(0)
956; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
957; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v15, v16
958; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v15
959; GFX8-NEXT:    v_ashrrev_i32_e32 v15, 31, v16
960; GFX8-NEXT:    v_xor_b32_e32 v15, 0x80000000, v15
961; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
962; GFX8-NEXT:    v_cndmask_b32_e32 v15, v16, v15, vcc
963; GFX8-NEXT:    s_setpc_b64 s[30:31]
964;
965; GFX9-LABEL: v_ssubsat_v16i32:
966; GFX9:       ; %bb.0:
967; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968; GFX9-NEXT:    v_sub_i32 v0, v0, v16 clamp
969; GFX9-NEXT:    buffer_load_dword v16, off, s[0:3], s32
970; GFX9-NEXT:    v_sub_i32 v1, v1, v17 clamp
971; GFX9-NEXT:    v_sub_i32 v2, v2, v18 clamp
972; GFX9-NEXT:    v_sub_i32 v3, v3, v19 clamp
973; GFX9-NEXT:    v_sub_i32 v4, v4, v20 clamp
974; GFX9-NEXT:    v_sub_i32 v5, v5, v21 clamp
975; GFX9-NEXT:    v_sub_i32 v6, v6, v22 clamp
976; GFX9-NEXT:    v_sub_i32 v7, v7, v23 clamp
977; GFX9-NEXT:    v_sub_i32 v8, v8, v24 clamp
978; GFX9-NEXT:    v_sub_i32 v9, v9, v25 clamp
979; GFX9-NEXT:    v_sub_i32 v10, v10, v26 clamp
980; GFX9-NEXT:    v_sub_i32 v11, v11, v27 clamp
981; GFX9-NEXT:    v_sub_i32 v12, v12, v28 clamp
982; GFX9-NEXT:    v_sub_i32 v13, v13, v29 clamp
983; GFX9-NEXT:    v_sub_i32 v14, v14, v30 clamp
984; GFX9-NEXT:    s_waitcnt vmcnt(0)
985; GFX9-NEXT:    v_sub_i32 v15, v15, v16 clamp
986; GFX9-NEXT:    s_setpc_b64 s[30:31]
987;
988; GFX10-LABEL: v_ssubsat_v16i32:
989; GFX10:       ; %bb.0:
990; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
992; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
993; GFX10-NEXT:    v_sub_nc_i32 v0, v0, v16 clamp
994; GFX10-NEXT:    v_sub_nc_i32 v1, v1, v17 clamp
995; GFX10-NEXT:    v_sub_nc_i32 v2, v2, v18 clamp
996; GFX10-NEXT:    v_sub_nc_i32 v3, v3, v19 clamp
997; GFX10-NEXT:    v_sub_nc_i32 v4, v4, v20 clamp
998; GFX10-NEXT:    v_sub_nc_i32 v5, v5, v21 clamp
999; GFX10-NEXT:    v_sub_nc_i32 v6, v6, v22 clamp
1000; GFX10-NEXT:    v_sub_nc_i32 v7, v7, v23 clamp
1001; GFX10-NEXT:    v_sub_nc_i32 v8, v8, v24 clamp
1002; GFX10-NEXT:    v_sub_nc_i32 v9, v9, v25 clamp
1003; GFX10-NEXT:    v_sub_nc_i32 v10, v10, v26 clamp
1004; GFX10-NEXT:    v_sub_nc_i32 v11, v11, v27 clamp
1005; GFX10-NEXT:    v_sub_nc_i32 v12, v12, v28 clamp
1006; GFX10-NEXT:    v_sub_nc_i32 v13, v13, v29 clamp
1007; GFX10-NEXT:    v_sub_nc_i32 v14, v14, v30 clamp
1008; GFX10-NEXT:    s_waitcnt vmcnt(0)
1009; GFX10-NEXT:    v_sub_nc_i32 v15, v15, v31 clamp
1010; GFX10-NEXT:    s_setpc_b64 s[30:31]
1011;
1012; GFX11-LABEL: v_ssubsat_v16i32:
1013; GFX11:       ; %bb.0:
1014; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1016; GFX11-NEXT:    scratch_load_b32 v31, off, s32
1017; GFX11-NEXT:    v_sub_nc_i32 v0, v0, v16 clamp
1018; GFX11-NEXT:    v_sub_nc_i32 v1, v1, v17 clamp
1019; GFX11-NEXT:    v_sub_nc_i32 v2, v2, v18 clamp
1020; GFX11-NEXT:    v_sub_nc_i32 v3, v3, v19 clamp
1021; GFX11-NEXT:    v_sub_nc_i32 v4, v4, v20 clamp
1022; GFX11-NEXT:    v_sub_nc_i32 v5, v5, v21 clamp
1023; GFX11-NEXT:    v_sub_nc_i32 v6, v6, v22 clamp
1024; GFX11-NEXT:    v_sub_nc_i32 v7, v7, v23 clamp
1025; GFX11-NEXT:    v_sub_nc_i32 v8, v8, v24 clamp
1026; GFX11-NEXT:    v_sub_nc_i32 v9, v9, v25 clamp
1027; GFX11-NEXT:    v_sub_nc_i32 v10, v10, v26 clamp
1028; GFX11-NEXT:    v_sub_nc_i32 v11, v11, v27 clamp
1029; GFX11-NEXT:    v_sub_nc_i32 v12, v12, v28 clamp
1030; GFX11-NEXT:    v_sub_nc_i32 v13, v13, v29 clamp
1031; GFX11-NEXT:    v_sub_nc_i32 v14, v14, v30 clamp
1032; GFX11-NEXT:    s_waitcnt vmcnt(0)
1033; GFX11-NEXT:    v_sub_nc_i32 v15, v15, v31 clamp
1034; GFX11-NEXT:    s_setpc_b64 s[30:31]
1035  %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
1036  ret <16 x i32> %result
1037}
1038
1039
1040define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
1041; GFX6-LABEL: v_ssubsat_i64:
1042; GFX6:       ; %bb.0:
1043; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
1045; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
1046; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1047; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1048; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
1049; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
1050; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1051; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1052; GFX6-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1053; GFX6-NEXT:    s_setpc_b64 s[30:31]
1054;
1055; GFX8-LABEL: v_ssubsat_i64:
1056; GFX8:       ; %bb.0:
1057; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v0, v2
1059; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
1060; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1061; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1062; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
1063; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
1064; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1065; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1066; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1067; GFX8-NEXT:    s_setpc_b64 s[30:31]
1068;
1069; GFX9-LABEL: v_ssubsat_i64:
1070; GFX9:       ; %bb.0:
1071; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1072; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
1073; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
1074; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1075; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1076; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
1077; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
1078; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1079; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1080; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1081; GFX9-NEXT:    s_setpc_b64 s[30:31]
1082;
1083; GFX10-LABEL: v_ssubsat_i64:
1084; GFX10:       ; %bb.0:
1085; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1087; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
1088; GFX10-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1089; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[2:3]
1090; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
1091; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1092; GFX10-NEXT:    v_xor_b32_e32 v1, 0x80000000, v6
1093; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
1094; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc_lo
1095; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
1096; GFX10-NEXT:    s_setpc_b64 s[30:31]
1097;
1098; GFX11-LABEL: v_ssubsat_i64:
1099; GFX11:       ; %bb.0:
1100; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1102; GFX11-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
1103; GFX11-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1104; GFX11-NEXT:    v_cmp_lt_i64_e64 s0, 0, v[2:3]
1105; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
1106; GFX11-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1107; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v6
1108; GFX11-NEXT:    s_xor_b32 vcc_lo, s0, vcc_lo
1109; GFX11-NEXT:    v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1
1110; GFX11-NEXT:    s_setpc_b64 s[30:31]
1111  %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
1112  ret i64 %result
1113}
1114
1115declare i8 @llvm.ssub.sat.i8(i8, i8) #0
1116declare i16 @llvm.ssub.sat.i16(i16, i16) #0
1117declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
1118declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
1119declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
1120declare i32 @llvm.ssub.sat.i32(i32, i32) #0
1121declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
1122declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
1123declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
1124declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0
1125declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
1126declare i64 @llvm.ssub.sat.i64(i64, i64) #0
1127
1128attributes #0 = { nounwind readnone speculatable willreturn }
1129