1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
6
7define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) {
8; GFX6-LABEL: v_ssubsat_i8:
9; GFX6:       ; %bb.0:
10; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 8
12; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
13; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
14; GFX6-NEXT:    v_min_i32_e32 v0, 0x7f, v0
15; GFX6-NEXT:    v_max_i32_e32 v0, 0xffffff80, v0
16; GFX6-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX8-LABEL: v_ssubsat_i8:
19; GFX8:       ; %bb.0:
20; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX8-NEXT:    v_sub_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
22; GFX8-NEXT:    v_min_i16_e32 v0, 0x7f, v0
23; GFX8-NEXT:    v_max_i16_e32 v0, 0xff80, v0
24; GFX8-NEXT:    s_setpc_b64 s[30:31]
25;
26; GFX9-LABEL: v_ssubsat_i8:
27; GFX9:       ; %bb.0:
28; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
30; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
31; GFX9-NEXT:    v_sub_i16 v0, v0, v1 clamp
32; GFX9-NEXT:    v_ashrrev_i16_e32 v0, 8, v0
33; GFX9-NEXT:    s_setpc_b64 s[30:31]
34;
35; GFX10-LABEL: v_ssubsat_i8:
36; GFX10:       ; %bb.0:
37; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
39; GFX10-NEXT:    v_lshlrev_b16 v1, 8, v1
40; GFX10-NEXT:    v_lshlrev_b16 v0, 8, v0
41; GFX10-NEXT:    v_sub_nc_i16 v0, v0, v1 clamp
42; GFX10-NEXT:    v_ashrrev_i16 v0, 8, v0
43; GFX10-NEXT:    s_setpc_b64 s[30:31]
44  %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs)
45  ret i8 %result
46}
47
48define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) {
49; GFX6-LABEL: v_ssubsat_i16:
50; GFX6:       ; %bb.0:
51; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
53; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
54; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
55; GFX6-NEXT:    v_min_i32_e32 v0, 0x7fff, v0
56; GFX6-NEXT:    v_max_i32_e32 v0, 0xffff8000, v0
57; GFX6-NEXT:    s_setpc_b64 s[30:31]
58;
59; GFX8-LABEL: v_ssubsat_i16:
60; GFX8:       ; %bb.0:
61; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v1
63; GFX8-NEXT:    v_sub_u16_e32 v1, v0, v1
64; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v1, v0
65; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v1
66; GFX8-NEXT:    v_xor_b32_e32 v0, 0xffff8000, v0
67; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
68; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
69; GFX8-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX9-LABEL: v_ssubsat_i16:
72; GFX9:       ; %bb.0:
73; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX9-NEXT:    v_sub_i16 v0, v0, v1 clamp
75; GFX9-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX10-LABEL: v_ssubsat_i16:
78; GFX10:       ; %bb.0:
79; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
81; GFX10-NEXT:    v_sub_nc_i16 v0, v0, v1 clamp
82; GFX10-NEXT:    s_setpc_b64 s[30:31]
83  %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
84  ret i16 %result
85}
86
87define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) {
88; GFX6-LABEL: v_ssubsat_i32:
89; GFX6:       ; %bb.0:
90; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
92; GFX6-NEXT:    v_sub_i32_e64 v1, s[4:5], v0, v1
93; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
94; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
95; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
96; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
97; GFX6-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
98; GFX6-NEXT:    s_setpc_b64 s[30:31]
99;
100; GFX8-LABEL: v_ssubsat_i32:
101; GFX8:       ; %bb.0:
102; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
104; GFX8-NEXT:    v_sub_u32_e64 v1, s[4:5], v0, v1
105; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v1, v0
106; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
107; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
108; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
109; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
110; GFX8-NEXT:    s_setpc_b64 s[30:31]
111;
112; GFX9-LABEL: v_ssubsat_i32:
113; GFX9:       ; %bb.0:
114; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GFX9-NEXT:    v_sub_i32 v0, v0, v1 clamp
116; GFX9-NEXT:    s_setpc_b64 s[30:31]
117;
118; GFX10-LABEL: v_ssubsat_i32:
119; GFX10:       ; %bb.0:
120; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
122; GFX10-NEXT:    v_sub_nc_i32 v0, v0, v1 clamp
123; GFX10-NEXT:    s_setpc_b64 s[30:31]
124  %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
125  ret i32 %result
126}
127
128define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
129; GFX6-LABEL: v_ssubsat_v2i16:
130; GFX6:       ; %bb.0:
131; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
133; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
134; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
135; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
136; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
137; GFX6-NEXT:    s_movk_i32 s4, 0x7fff
138; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
139; GFX6-NEXT:    v_min_i32_e32 v1, s4, v1
140; GFX6-NEXT:    s_movk_i32 s5, 0x8000
141; GFX6-NEXT:    v_min_i32_e32 v0, s4, v0
142; GFX6-NEXT:    v_max_i32_e32 v1, s5, v1
143; GFX6-NEXT:    v_max_i32_e32 v0, s5, v0
144; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
145; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
146; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
147; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
148; GFX6-NEXT:    s_setpc_b64 s[30:31]
149;
150; GFX8-LABEL: v_ssubsat_v2i16:
151; GFX8:       ; %bb.0:
152; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
154; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
155; GFX8-NEXT:    v_sub_u16_e32 v4, v3, v2
156; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v4, v3
157; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v2
158; GFX8-NEXT:    v_ashrrev_i16_e32 v2, 15, v4
159; GFX8-NEXT:    s_movk_i32 s6, 0x8000
160; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
161; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
162; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
163; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v1
164; GFX8-NEXT:    v_sub_u16_e32 v1, v0, v1
165; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v1, v0
166; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v1
167; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
168; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
169; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
170; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
171; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
172; GFX8-NEXT:    s_setpc_b64 s[30:31]
173;
174; GFX9-LABEL: v_ssubsat_v2i16:
175; GFX9:       ; %bb.0:
176; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1 clamp
178; GFX9-NEXT:    s_setpc_b64 s[30:31]
179;
180; GFX10-LABEL: v_ssubsat_v2i16:
181; GFX10:       ; %bb.0:
182; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
184; GFX10-NEXT:    v_pk_sub_i16 v0, v0, v1 clamp
185; GFX10-NEXT:    s_setpc_b64 s[30:31]
186  %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
187  ret <2 x i16> %result
188}
189
190define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
191; GFX6-LABEL: v_ssubsat_v3i16:
192; GFX6:       ; %bb.0:
193; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
195; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
196; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
197; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
198; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
199; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
200; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
201; GFX6-NEXT:    s_movk_i32 s4, 0x7fff
202; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
203; GFX6-NEXT:    v_min_i32_e32 v1, s4, v1
204; GFX6-NEXT:    s_movk_i32 s5, 0x8000
205; GFX6-NEXT:    v_min_i32_e32 v0, s4, v0
206; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
207; GFX6-NEXT:    v_max_i32_e32 v1, s5, v1
208; GFX6-NEXT:    v_max_i32_e32 v0, s5, v0
209; GFX6-NEXT:    s_mov_b32 s6, 0xffff
210; GFX6-NEXT:    v_min_i32_e32 v2, s4, v2
211; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
212; GFX6-NEXT:    v_and_b32_e32 v0, s6, v0
213; GFX6-NEXT:    v_max_i32_e32 v3, s5, v2
214; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
215; GFX6-NEXT:    v_and_b32_e32 v2, s6, v3
216; GFX6-NEXT:    v_alignbit_b32 v1, v3, v1, 16
217; GFX6-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX8-LABEL: v_ssubsat_v3i16:
220; GFX8:       ; %bb.0:
221; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
223; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
224; GFX8-NEXT:    v_sub_u16_e32 v6, v5, v4
225; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v6, v5
226; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v4
227; GFX8-NEXT:    v_ashrrev_i16_e32 v4, 15, v6
228; GFX8-NEXT:    s_movk_i32 s6, 0x8000
229; GFX8-NEXT:    v_xor_b32_e32 v4, s6, v4
230; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
231; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
232; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v3
233; GFX8-NEXT:    v_sub_u16_e32 v3, v1, v3
234; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v3, v1
235; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 15, v3
236; GFX8-NEXT:    v_xor_b32_e32 v1, s6, v1
237; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
238; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
239; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v2
240; GFX8-NEXT:    v_sub_u16_e32 v2, v0, v2
241; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v2, v0
242; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v2
243; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
244; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
245; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
246; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
247; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
248; GFX8-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX9-LABEL: v_ssubsat_v3i16:
251; GFX9:       ; %bb.0:
252; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX9-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
254; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
255; GFX9-NEXT:    s_setpc_b64 s[30:31]
256;
257; GFX10-LABEL: v_ssubsat_v3i16:
258; GFX10:       ; %bb.0:
259; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
261; GFX10-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
262; GFX10-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
263; GFX10-NEXT:    s_setpc_b64 s[30:31]
264  %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
265  ret <3 x i16> %result
266}
267
268define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
269; GFX6-LABEL: v_ssubsat_v4i16:
270; GFX6:       ; %bb.0:
271; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
273; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
274; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
275; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
276; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
277; GFX6-NEXT:    s_movk_i32 s4, 0x7fff
278; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
279; GFX6-NEXT:    v_min_i32_e32 v1, s4, v1
280; GFX6-NEXT:    s_movk_i32 s5, 0x8000
281; GFX6-NEXT:    v_min_i32_e32 v0, s4, v0
282; GFX6-NEXT:    v_max_i32_e32 v1, s5, v1
283; GFX6-NEXT:    v_max_i32_e32 v0, s5, v0
284; GFX6-NEXT:    s_mov_b32 s6, 0xffff
285; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 16
286; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
287; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 16
288; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
289; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
290; GFX6-NEXT:    v_and_b32_e32 v0, s6, v0
291; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
292; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v3, v7
293; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v6
294; GFX6-NEXT:    v_min_i32_e32 v1, s4, v1
295; GFX6-NEXT:    v_min_i32_e32 v2, s4, v2
296; GFX6-NEXT:    v_max_i32_e32 v1, s5, v1
297; GFX6-NEXT:    v_max_i32_e32 v2, s5, v2
298; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
299; GFX6-NEXT:    v_and_b32_e32 v2, s6, v2
300; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
301; GFX6-NEXT:    s_setpc_b64 s[30:31]
302;
303; GFX8-LABEL: v_ssubsat_v4i16:
304; GFX8:       ; %bb.0:
305; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
307; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
308; GFX8-NEXT:    v_sub_u16_e32 v6, v5, v4
309; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v6, v5
310; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v4
311; GFX8-NEXT:    v_ashrrev_i16_e32 v4, 15, v6
312; GFX8-NEXT:    s_movk_i32 s6, 0x8000
313; GFX8-NEXT:    v_xor_b32_e32 v4, s6, v4
314; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
315; GFX8-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
316; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v2
317; GFX8-NEXT:    v_sub_u16_e32 v2, v0, v2
318; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v2, v0
319; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v2
320; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
321; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
322; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
323; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
324; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
325; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
326; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
327; GFX8-NEXT:    v_sub_u16_e32 v5, v4, v2
328; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, v5, v4
329; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], 0, v2
330; GFX8-NEXT:    v_ashrrev_i16_e32 v2, 15, v5
331; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
332; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
333; GFX8-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
334; GFX8-NEXT:    v_cmp_lt_i16_e32 vcc, 0, v3
335; GFX8-NEXT:    v_sub_u16_e32 v3, v1, v3
336; GFX8-NEXT:    v_cmp_lt_i16_e64 s[4:5], v3, v1
337; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 15, v3
338; GFX8-NEXT:    v_xor_b32_e32 v1, s6, v1
339; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
340; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
341; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
342; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
343; GFX8-NEXT:    s_setpc_b64 s[30:31]
344;
345; GFX9-LABEL: v_ssubsat_v4i16:
346; GFX9:       ; %bb.0:
347; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
349; GFX9-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
350; GFX9-NEXT:    s_setpc_b64 s[30:31]
351;
352; GFX10-LABEL: v_ssubsat_v4i16:
353; GFX10:       ; %bb.0:
354; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
356; GFX10-NEXT:    v_pk_sub_i16 v0, v0, v2 clamp
357; GFX10-NEXT:    v_pk_sub_i16 v1, v1, v3 clamp
358; GFX10-NEXT:    s_setpc_b64 s[30:31]
359  %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
360  %cast = bitcast <4 x i16> %result to <2 x float>
361  ret <2 x float> %cast
362}
363
364define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
365; GFX6-LABEL: v_ssubsat_v2i32:
366; GFX6:       ; %bb.0:
367; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v2
369; GFX6-NEXT:    v_sub_i32_e64 v2, s[4:5], v0, v2
370; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
371; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
372; GFX6-NEXT:    s_brev_b32 s6, 1
373; GFX6-NEXT:    v_xor_b32_e32 v0, s6, v0
374; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
375; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
376; GFX6-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v3
377; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
378; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
379; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
380; GFX6-NEXT:    v_xor_b32_e32 v1, s6, v1
381; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
382; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
383; GFX6-NEXT:    s_setpc_b64 s[30:31]
384;
385; GFX8-LABEL: v_ssubsat_v2i32:
386; GFX8:       ; %bb.0:
387; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v2
389; GFX8-NEXT:    v_sub_u32_e64 v2, s[4:5], v0, v2
390; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v0
391; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v2
392; GFX8-NEXT:    s_brev_b32 s6, 1
393; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
394; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
395; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
396; GFX8-NEXT:    v_sub_u32_e64 v2, s[4:5], v1, v3
397; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
398; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v2, v1
399; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v2
400; GFX8-NEXT:    v_xor_b32_e32 v1, s6, v1
401; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
402; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
403; GFX8-NEXT:    s_setpc_b64 s[30:31]
404;
405; GFX9-LABEL: v_ssubsat_v2i32:
406; GFX9:       ; %bb.0:
407; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; GFX9-NEXT:    v_sub_i32 v0, v0, v2 clamp
409; GFX9-NEXT:    v_sub_i32 v1, v1, v3 clamp
410; GFX9-NEXT:    s_setpc_b64 s[30:31]
411;
412; GFX10-LABEL: v_ssubsat_v2i32:
413; GFX10:       ; %bb.0:
414; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
416; GFX10-NEXT:    v_sub_nc_i32 v0, v0, v2 clamp
417; GFX10-NEXT:    v_sub_nc_i32 v1, v1, v3 clamp
418; GFX10-NEXT:    s_setpc_b64 s[30:31]
419  %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
420  ret <2 x i32> %result
421}
422
423define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
424; GFX6-LABEL: v_ssubsat_v3i32:
425; GFX6:       ; %bb.0:
426; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
428; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v0, v3
429; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
430; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
431; GFX6-NEXT:    s_brev_b32 s6, 1
432; GFX6-NEXT:    v_xor_b32_e32 v0, s6, v0
433; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
434; GFX6-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
435; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v1, v4
436; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
437; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v1
438; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
439; GFX6-NEXT:    v_xor_b32_e32 v1, s6, v1
440; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
441; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
442; GFX6-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v5
443; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
444; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v2
445; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v3
446; GFX6-NEXT:    v_xor_b32_e32 v2, s6, v2
447; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
448; GFX6-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
449; GFX6-NEXT:    s_setpc_b64 s[30:31]
450;
451; GFX8-LABEL: v_ssubsat_v3i32:
452; GFX8:       ; %bb.0:
453; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v3
455; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v0, v3
456; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
457; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
458; GFX8-NEXT:    s_brev_b32 s6, 1
459; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
460; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
461; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
462; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v1, v4
463; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
464; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v1
465; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v3
466; GFX8-NEXT:    v_xor_b32_e32 v1, s6, v1
467; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
468; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
469; GFX8-NEXT:    v_sub_u32_e64 v3, s[4:5], v2, v5
470; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
471; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v2
472; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v3
473; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
474; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
475; GFX8-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
476; GFX8-NEXT:    s_setpc_b64 s[30:31]
477;
478; GFX9-LABEL: v_ssubsat_v3i32:
479; GFX9:       ; %bb.0:
480; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481; GFX9-NEXT:    v_sub_i32 v0, v0, v3 clamp
482; GFX9-NEXT:    v_sub_i32 v1, v1, v4 clamp
483; GFX9-NEXT:    v_sub_i32 v2, v2, v5 clamp
484; GFX9-NEXT:    s_setpc_b64 s[30:31]
485;
486; GFX10-LABEL: v_ssubsat_v3i32:
487; GFX10:       ; %bb.0:
488; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
490; GFX10-NEXT:    v_sub_nc_i32 v0, v0, v3 clamp
491; GFX10-NEXT:    v_sub_nc_i32 v1, v1, v4 clamp
492; GFX10-NEXT:    v_sub_nc_i32 v2, v2, v5 clamp
493; GFX10-NEXT:    s_setpc_b64 s[30:31]
494  %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
495  ret <3 x i32> %result
496}
497
498define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
499; GFX6-LABEL: v_ssubsat_v4i32:
500; GFX6:       ; %bb.0:
501; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
503; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v0, v4
504; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v0
505; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v4
506; GFX6-NEXT:    s_brev_b32 s6, 1
507; GFX6-NEXT:    v_xor_b32_e32 v0, s6, v0
508; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
509; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
510; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v1, v5
511; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
512; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v1
513; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v4
514; GFX6-NEXT:    v_xor_b32_e32 v1, s6, v1
515; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
516; GFX6-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
517; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v2, v6
518; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v6
519; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v2
520; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v4
521; GFX6-NEXT:    v_xor_b32_e32 v2, s6, v2
522; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
523; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
524; GFX6-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v7
525; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v7
526; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v3
527; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v4
528; GFX6-NEXT:    v_xor_b32_e32 v3, s6, v3
529; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
530; GFX6-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
531; GFX6-NEXT:    s_setpc_b64 s[30:31]
532;
533; GFX8-LABEL: v_ssubsat_v4i32:
534; GFX8:       ; %bb.0:
535; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v4
537; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v0, v4
538; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v0
539; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v4
540; GFX8-NEXT:    s_brev_b32 s6, 1
541; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
542; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
543; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
544; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v1, v5
545; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v5
546; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v1
547; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v4
548; GFX8-NEXT:    v_xor_b32_e32 v1, s6, v1
549; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
550; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
551; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v2, v6
552; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v6
553; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v2
554; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v4
555; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
556; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
557; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
558; GFX8-NEXT:    v_sub_u32_e64 v4, s[4:5], v3, v7
559; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v7
560; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v4, v3
561; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v4
562; GFX8-NEXT:    v_xor_b32_e32 v3, s6, v3
563; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
564; GFX8-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
565; GFX8-NEXT:    s_setpc_b64 s[30:31]
566;
567; GFX9-LABEL: v_ssubsat_v4i32:
568; GFX9:       ; %bb.0:
569; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570; GFX9-NEXT:    v_sub_i32 v0, v0, v4 clamp
571; GFX9-NEXT:    v_sub_i32 v1, v1, v5 clamp
572; GFX9-NEXT:    v_sub_i32 v2, v2, v6 clamp
573; GFX9-NEXT:    v_sub_i32 v3, v3, v7 clamp
574; GFX9-NEXT:    s_setpc_b64 s[30:31]
575;
576; GFX10-LABEL: v_ssubsat_v4i32:
577; GFX10:       ; %bb.0:
578; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
580; GFX10-NEXT:    v_sub_nc_i32 v0, v0, v4 clamp
581; GFX10-NEXT:    v_sub_nc_i32 v1, v1, v5 clamp
582; GFX10-NEXT:    v_sub_nc_i32 v2, v2, v6 clamp
583; GFX10-NEXT:    v_sub_nc_i32 v3, v3, v7 clamp
584; GFX10-NEXT:    s_setpc_b64 s[30:31]
585  %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
586  ret <4 x i32> %result
587}
588
589define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
590; GFX6-LABEL: v_ssubsat_v8i32:
591; GFX6:       ; %bb.0:
592; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v8
594; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v0, v8
595; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v0
596; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v8
597; GFX6-NEXT:    s_brev_b32 s6, 1
598; GFX6-NEXT:    v_xor_b32_e32 v0, s6, v0
599; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
600; GFX6-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
601; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v1, v9
602; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v9
603; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v1
604; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v8
605; GFX6-NEXT:    v_xor_b32_e32 v1, s6, v1
606; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
607; GFX6-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
608; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v2, v10
609; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v10
610; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v2
611; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v8
612; GFX6-NEXT:    v_xor_b32_e32 v2, s6, v2
613; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
614; GFX6-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
615; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v3, v11
616; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v11
617; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v3
618; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v8
619; GFX6-NEXT:    v_xor_b32_e32 v3, s6, v3
620; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
621; GFX6-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
622; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v4, v12
623; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v12
624; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v4
625; GFX6-NEXT:    v_ashrrev_i32_e32 v4, 31, v8
626; GFX6-NEXT:    v_xor_b32_e32 v4, s6, v4
627; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
628; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
629; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v5, v13
630; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v13
631; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v5
632; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v8
633; GFX6-NEXT:    v_xor_b32_e32 v5, s6, v5
634; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
635; GFX6-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
636; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v6, v14
637; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v14
638; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v6
639; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v8
640; GFX6-NEXT:    v_xor_b32_e32 v6, s6, v6
641; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
642; GFX6-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
643; GFX6-NEXT:    v_sub_i32_e64 v8, s[4:5], v7, v15
644; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v15
645; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v7
646; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v8
647; GFX6-NEXT:    v_xor_b32_e32 v7, s6, v7
648; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
649; GFX6-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
650; GFX6-NEXT:    s_setpc_b64 s[30:31]
651;
652; GFX8-LABEL: v_ssubsat_v8i32:
653; GFX8:       ; %bb.0:
654; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v8
656; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v0, v8
657; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v0
658; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v8
659; GFX8-NEXT:    s_brev_b32 s6, 1
660; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
661; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
662; GFX8-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
663; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v1, v9
664; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v9
665; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v1
666; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v8
667; GFX8-NEXT:    v_xor_b32_e32 v1, s6, v1
668; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
669; GFX8-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
670; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v2, v10
671; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v10
672; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v2
673; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v8
674; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
675; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
676; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v2, vcc
677; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v3, v11
678; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v11
679; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v3
680; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v8
681; GFX8-NEXT:    v_xor_b32_e32 v3, s6, v3
682; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
683; GFX8-NEXT:    v_cndmask_b32_e32 v3, v8, v3, vcc
684; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v4, v12
685; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v12
686; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v4
687; GFX8-NEXT:    v_ashrrev_i32_e32 v4, 31, v8
688; GFX8-NEXT:    v_xor_b32_e32 v4, s6, v4
689; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
690; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc
691; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v5, v13
692; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v13
693; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v5
694; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v8
695; GFX8-NEXT:    v_xor_b32_e32 v5, s6, v5
696; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
697; GFX8-NEXT:    v_cndmask_b32_e32 v5, v8, v5, vcc
698; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v6, v14
699; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v14
700; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v6
701; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v8
702; GFX8-NEXT:    v_xor_b32_e32 v6, s6, v6
703; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
704; GFX8-NEXT:    v_cndmask_b32_e32 v6, v8, v6, vcc
705; GFX8-NEXT:    v_sub_u32_e64 v8, s[4:5], v7, v15
706; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v15
707; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v8, v7
708; GFX8-NEXT:    v_ashrrev_i32_e32 v7, 31, v8
709; GFX8-NEXT:    v_xor_b32_e32 v7, s6, v7
710; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
711; GFX8-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
712; GFX8-NEXT:    s_setpc_b64 s[30:31]
713;
714; GFX9-LABEL: v_ssubsat_v8i32:
715; GFX9:       ; %bb.0:
716; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717; GFX9-NEXT:    v_sub_i32 v0, v0, v8 clamp
718; GFX9-NEXT:    v_sub_i32 v1, v1, v9 clamp
719; GFX9-NEXT:    v_sub_i32 v2, v2, v10 clamp
720; GFX9-NEXT:    v_sub_i32 v3, v3, v11 clamp
721; GFX9-NEXT:    v_sub_i32 v4, v4, v12 clamp
722; GFX9-NEXT:    v_sub_i32 v5, v5, v13 clamp
723; GFX9-NEXT:    v_sub_i32 v6, v6, v14 clamp
724; GFX9-NEXT:    v_sub_i32 v7, v7, v15 clamp
725; GFX9-NEXT:    s_setpc_b64 s[30:31]
726;
727; GFX10-LABEL: v_ssubsat_v8i32:
728; GFX10:       ; %bb.0:
729; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
731; GFX10-NEXT:    v_sub_nc_i32 v0, v0, v8 clamp
732; GFX10-NEXT:    v_sub_nc_i32 v1, v1, v9 clamp
733; GFX10-NEXT:    v_sub_nc_i32 v2, v2, v10 clamp
734; GFX10-NEXT:    v_sub_nc_i32 v3, v3, v11 clamp
735; GFX10-NEXT:    v_sub_nc_i32 v4, v4, v12 clamp
736; GFX10-NEXT:    v_sub_nc_i32 v5, v5, v13 clamp
737; GFX10-NEXT:    v_sub_nc_i32 v6, v6, v14 clamp
738; GFX10-NEXT:    v_sub_nc_i32 v7, v7, v15 clamp
739; GFX10-NEXT:    s_setpc_b64 s[30:31]
740  %result = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
741  ret <8 x i32> %result
742}
743
744define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
745; GFX6-LABEL: v_ssubsat_v16i32:
746; GFX6:       ; %bb.0:
747; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
749; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v0, v16
750; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v0
751; GFX6-NEXT:    s_brev_b32 s6, 1
752; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 31, v16
753; GFX6-NEXT:    v_xor_b32_e32 v0, s6, v0
754; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
755; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
756; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v1, v17
757; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v17
758; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v1
759; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v16
760; GFX6-NEXT:    v_xor_b32_e32 v1, s6, v1
761; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
762; GFX6-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
763; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v2, v18
764; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v18
765; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v2
766; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v16
767; GFX6-NEXT:    v_xor_b32_e32 v2, s6, v2
768; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
769; GFX6-NEXT:    v_cndmask_b32_e32 v2, v16, v2, vcc
770; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v3, v19
771; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v19
772; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v3
773; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v16
774; GFX6-NEXT:    v_xor_b32_e32 v3, s6, v3
775; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
776; GFX6-NEXT:    v_cndmask_b32_e32 v3, v16, v3, vcc
777; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v4, v20
778; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v20
779; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v4
780; GFX6-NEXT:    v_ashrrev_i32_e32 v4, 31, v16
781; GFX6-NEXT:    v_xor_b32_e32 v4, s6, v4
782; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
783; GFX6-NEXT:    v_cndmask_b32_e32 v4, v16, v4, vcc
784; GFX6-NEXT:    buffer_load_dword v16, off, s[0:3], s32
785; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v5, v21
786; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v21
787; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v5
788; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v17
789; GFX6-NEXT:    v_xor_b32_e32 v5, s6, v5
790; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
791; GFX6-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
792; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v6, v22
793; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v22
794; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v6
795; GFX6-NEXT:    v_ashrrev_i32_e32 v6, 31, v17
796; GFX6-NEXT:    v_xor_b32_e32 v6, s6, v6
797; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
798; GFX6-NEXT:    v_cndmask_b32_e32 v6, v17, v6, vcc
799; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v7, v23
800; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v23
801; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v7
802; GFX6-NEXT:    v_ashrrev_i32_e32 v7, 31, v17
803; GFX6-NEXT:    v_xor_b32_e32 v7, s6, v7
804; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
805; GFX6-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
806; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v8, v24
807; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v24
808; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v8
809; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v17
810; GFX6-NEXT:    v_xor_b32_e32 v8, s6, v8
811; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
812; GFX6-NEXT:    v_cndmask_b32_e32 v8, v17, v8, vcc
813; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v9, v25
814; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v25
815; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v9
816; GFX6-NEXT:    v_ashrrev_i32_e32 v9, 31, v17
817; GFX6-NEXT:    v_xor_b32_e32 v9, s6, v9
818; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
819; GFX6-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
820; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v10, v26
821; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v26
822; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v10
823; GFX6-NEXT:    v_ashrrev_i32_e32 v10, 31, v17
824; GFX6-NEXT:    v_xor_b32_e32 v10, s6, v10
825; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
826; GFX6-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc
827; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v11, v27
828; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v27
829; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v11
830; GFX6-NEXT:    v_ashrrev_i32_e32 v11, 31, v17
831; GFX6-NEXT:    v_xor_b32_e32 v11, s6, v11
832; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
833; GFX6-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
834; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v12, v28
835; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v28
836; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v12
837; GFX6-NEXT:    v_ashrrev_i32_e32 v12, 31, v17
838; GFX6-NEXT:    v_xor_b32_e32 v12, s6, v12
839; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
840; GFX6-NEXT:    v_cndmask_b32_e32 v12, v17, v12, vcc
841; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v13, v29
842; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v29
843; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v13
844; GFX6-NEXT:    v_ashrrev_i32_e32 v13, 31, v17
845; GFX6-NEXT:    v_xor_b32_e32 v13, s6, v13
846; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
847; GFX6-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
848; GFX6-NEXT:    v_sub_i32_e64 v17, s[4:5], v14, v30
849; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v30
850; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v14
851; GFX6-NEXT:    v_ashrrev_i32_e32 v14, 31, v17
852; GFX6-NEXT:    v_xor_b32_e32 v14, s6, v14
853; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
854; GFX6-NEXT:    v_cndmask_b32_e32 v14, v17, v14, vcc
855; GFX6-NEXT:    s_waitcnt vmcnt(0)
856; GFX6-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
857; GFX6-NEXT:    v_sub_i32_e64 v16, s[4:5], v15, v16
858; GFX6-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v15
859; GFX6-NEXT:    v_ashrrev_i32_e32 v15, 31, v16
860; GFX6-NEXT:    v_xor_b32_e32 v15, s6, v15
861; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
862; GFX6-NEXT:    v_cndmask_b32_e32 v15, v16, v15, vcc
863; GFX6-NEXT:    s_setpc_b64 s[30:31]
864;
865; GFX8-LABEL: v_ssubsat_v16i32:
866; GFX8:       ; %bb.0:
867; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
869; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v0, v16
870; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v0
871; GFX8-NEXT:    s_brev_b32 s6, 1
872; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 31, v16
873; GFX8-NEXT:    v_xor_b32_e32 v0, s6, v0
874; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
875; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc
876; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v1, v17
877; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v17
878; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v1
879; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v16
880; GFX8-NEXT:    v_xor_b32_e32 v1, s6, v1
881; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
882; GFX8-NEXT:    v_cndmask_b32_e32 v1, v16, v1, vcc
883; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v2, v18
884; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v18
885; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v2
886; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v16
887; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
888; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
889; GFX8-NEXT:    v_cndmask_b32_e32 v2, v16, v2, vcc
890; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v3, v19
891; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v19
892; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v3
893; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v16
894; GFX8-NEXT:    v_xor_b32_e32 v3, s6, v3
895; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
896; GFX8-NEXT:    v_cndmask_b32_e32 v3, v16, v3, vcc
897; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v4, v20
898; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v20
899; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v4
900; GFX8-NEXT:    v_ashrrev_i32_e32 v4, 31, v16
901; GFX8-NEXT:    v_xor_b32_e32 v4, s6, v4
902; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
903; GFX8-NEXT:    v_cndmask_b32_e32 v4, v16, v4, vcc
904; GFX8-NEXT:    buffer_load_dword v16, off, s[0:3], s32
905; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v5, v21
906; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v21
907; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v5
908; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v17
909; GFX8-NEXT:    v_xor_b32_e32 v5, s6, v5
910; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
911; GFX8-NEXT:    v_cndmask_b32_e32 v5, v17, v5, vcc
912; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v6, v22
913; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v22
914; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v6
915; GFX8-NEXT:    v_ashrrev_i32_e32 v6, 31, v17
916; GFX8-NEXT:    v_xor_b32_e32 v6, s6, v6
917; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
918; GFX8-NEXT:    v_cndmask_b32_e32 v6, v17, v6, vcc
919; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v7, v23
920; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v23
921; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v7
922; GFX8-NEXT:    v_ashrrev_i32_e32 v7, 31, v17
923; GFX8-NEXT:    v_xor_b32_e32 v7, s6, v7
924; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
925; GFX8-NEXT:    v_cndmask_b32_e32 v7, v17, v7, vcc
926; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v8, v24
927; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v24
928; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v8
929; GFX8-NEXT:    v_ashrrev_i32_e32 v8, 31, v17
930; GFX8-NEXT:    v_xor_b32_e32 v8, s6, v8
931; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
932; GFX8-NEXT:    v_cndmask_b32_e32 v8, v17, v8, vcc
933; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v9, v25
934; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v25
935; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v9
936; GFX8-NEXT:    v_ashrrev_i32_e32 v9, 31, v17
937; GFX8-NEXT:    v_xor_b32_e32 v9, s6, v9
938; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
939; GFX8-NEXT:    v_cndmask_b32_e32 v9, v17, v9, vcc
940; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v10, v26
941; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v26
942; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v10
943; GFX8-NEXT:    v_ashrrev_i32_e32 v10, 31, v17
944; GFX8-NEXT:    v_xor_b32_e32 v10, s6, v10
945; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
946; GFX8-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc
947; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v11, v27
948; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v27
949; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v11
950; GFX8-NEXT:    v_ashrrev_i32_e32 v11, 31, v17
951; GFX8-NEXT:    v_xor_b32_e32 v11, s6, v11
952; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
953; GFX8-NEXT:    v_cndmask_b32_e32 v11, v17, v11, vcc
954; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v12, v28
955; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v28
956; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v12
957; GFX8-NEXT:    v_ashrrev_i32_e32 v12, 31, v17
958; GFX8-NEXT:    v_xor_b32_e32 v12, s6, v12
959; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
960; GFX8-NEXT:    v_cndmask_b32_e32 v12, v17, v12, vcc
961; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v13, v29
962; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v29
963; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v13
964; GFX8-NEXT:    v_ashrrev_i32_e32 v13, 31, v17
965; GFX8-NEXT:    v_xor_b32_e32 v13, s6, v13
966; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
967; GFX8-NEXT:    v_cndmask_b32_e32 v13, v17, v13, vcc
968; GFX8-NEXT:    v_sub_u32_e64 v17, s[4:5], v14, v30
969; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v30
970; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v17, v14
971; GFX8-NEXT:    v_ashrrev_i32_e32 v14, 31, v17
972; GFX8-NEXT:    v_xor_b32_e32 v14, s6, v14
973; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
974; GFX8-NEXT:    v_cndmask_b32_e32 v14, v17, v14, vcc
975; GFX8-NEXT:    s_waitcnt vmcnt(0)
976; GFX8-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v16
977; GFX8-NEXT:    v_sub_u32_e64 v16, s[4:5], v15, v16
978; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v16, v15
979; GFX8-NEXT:    v_ashrrev_i32_e32 v15, 31, v16
980; GFX8-NEXT:    v_xor_b32_e32 v15, s6, v15
981; GFX8-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
982; GFX8-NEXT:    v_cndmask_b32_e32 v15, v16, v15, vcc
983; GFX8-NEXT:    s_setpc_b64 s[30:31]
984;
985; GFX9-LABEL: v_ssubsat_v16i32:
986; GFX9:       ; %bb.0:
987; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988; GFX9-NEXT:    v_sub_i32 v0, v0, v16 clamp
989; GFX9-NEXT:    buffer_load_dword v16, off, s[0:3], s32
990; GFX9-NEXT:    v_sub_i32 v1, v1, v17 clamp
991; GFX9-NEXT:    v_sub_i32 v2, v2, v18 clamp
992; GFX9-NEXT:    v_sub_i32 v3, v3, v19 clamp
993; GFX9-NEXT:    v_sub_i32 v4, v4, v20 clamp
994; GFX9-NEXT:    v_sub_i32 v5, v5, v21 clamp
995; GFX9-NEXT:    v_sub_i32 v6, v6, v22 clamp
996; GFX9-NEXT:    v_sub_i32 v7, v7, v23 clamp
997; GFX9-NEXT:    v_sub_i32 v8, v8, v24 clamp
998; GFX9-NEXT:    v_sub_i32 v9, v9, v25 clamp
999; GFX9-NEXT:    v_sub_i32 v10, v10, v26 clamp
1000; GFX9-NEXT:    v_sub_i32 v11, v11, v27 clamp
1001; GFX9-NEXT:    v_sub_i32 v12, v12, v28 clamp
1002; GFX9-NEXT:    v_sub_i32 v13, v13, v29 clamp
1003; GFX9-NEXT:    v_sub_i32 v14, v14, v30 clamp
1004; GFX9-NEXT:    s_waitcnt vmcnt(0)
1005; GFX9-NEXT:    v_sub_i32 v15, v15, v16 clamp
1006; GFX9-NEXT:    s_setpc_b64 s[30:31]
1007;
1008; GFX10-LABEL: v_ssubsat_v16i32:
1009; GFX10:       ; %bb.0:
1010; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1012; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
1013; GFX10-NEXT:    v_sub_nc_i32 v0, v0, v16 clamp
1014; GFX10-NEXT:    v_sub_nc_i32 v1, v1, v17 clamp
1015; GFX10-NEXT:    v_sub_nc_i32 v2, v2, v18 clamp
1016; GFX10-NEXT:    v_sub_nc_i32 v3, v3, v19 clamp
1017; GFX10-NEXT:    v_sub_nc_i32 v4, v4, v20 clamp
1018; GFX10-NEXT:    v_sub_nc_i32 v5, v5, v21 clamp
1019; GFX10-NEXT:    v_sub_nc_i32 v6, v6, v22 clamp
1020; GFX10-NEXT:    v_sub_nc_i32 v7, v7, v23 clamp
1021; GFX10-NEXT:    v_sub_nc_i32 v8, v8, v24 clamp
1022; GFX10-NEXT:    v_sub_nc_i32 v9, v9, v25 clamp
1023; GFX10-NEXT:    v_sub_nc_i32 v10, v10, v26 clamp
1024; GFX10-NEXT:    v_sub_nc_i32 v11, v11, v27 clamp
1025; GFX10-NEXT:    v_sub_nc_i32 v12, v12, v28 clamp
1026; GFX10-NEXT:    v_sub_nc_i32 v13, v13, v29 clamp
1027; GFX10-NEXT:    v_sub_nc_i32 v14, v14, v30 clamp
1028; GFX10-NEXT:    s_waitcnt vmcnt(0)
1029; GFX10-NEXT:    v_sub_nc_i32 v15, v15, v31 clamp
1030; GFX10-NEXT:    s_setpc_b64 s[30:31]
1031  %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
1032  ret <16 x i32> %result
1033}
1034
1035
1036define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
1037; GFX6-LABEL: v_ssubsat_i64:
1038; GFX6:       ; %bb.0:
1039; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
1041; GFX6-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
1042; GFX6-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1043; GFX6-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1044; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
1045; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
1046; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1047; GFX6-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1048; GFX6-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1049; GFX6-NEXT:    s_setpc_b64 s[30:31]
1050;
1051; GFX8-LABEL: v_ssubsat_i64:
1052; GFX8:       ; %bb.0:
1053; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v0, v2
1055; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v1, v3, vcc
1056; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1057; GFX8-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1058; GFX8-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
1059; GFX8-NEXT:    s_xor_b64 vcc, s[4:5], vcc
1060; GFX8-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1061; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1062; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1063; GFX8-NEXT:    s_setpc_b64 s[30:31]
1064;
1065; GFX9-LABEL: v_ssubsat_i64:
1066; GFX9:       ; %bb.0:
1067; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, v0, v2
1069; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
1070; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1]
1071; GFX9-NEXT:    v_cmp_lt_i64_e64 s[4:5], 0, v[2:3]
1072; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 31, v5
1073; GFX9-NEXT:    s_xor_b64 vcc, s[4:5], vcc
1074; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v1, vcc
1075; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1076; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
1077; GFX9-NEXT:    s_setpc_b64 s[30:31]
1078;
1079; GFX10-LABEL: v_ssubsat_i64:
1080; GFX10:       ; %bb.0:
1081; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1083; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v0, v2
1084; GFX10-NEXT:    v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
1085; GFX10-NEXT:    v_cmp_lt_i64_e64 s4, 0, v[2:3]
1086; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v5
1087; GFX10-NEXT:    v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1]
1088; GFX10-NEXT:    v_xor_b32_e32 v1, 0x80000000, v6
1089; GFX10-NEXT:    s_xor_b32 vcc_lo, s4, vcc_lo
1090; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v6, vcc_lo
1091; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
1092; GFX10-NEXT:    s_setpc_b64 s[30:31]
1093  %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
1094  ret i64 %result
1095}
1096
1097declare i8 @llvm.ssub.sat.i8(i8, i8) #0
1098declare i16 @llvm.ssub.sat.i16(i16, i16) #0
1099declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
1100declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
1101declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
1102declare i32 @llvm.ssub.sat.i32(i32, i32) #0
1103declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
1104declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
1105declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
1106declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0
1107declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
1108declare i64 @llvm.ssub.sat.i64(i64, i64) #0
1109
1110attributes #0 = { nounwind readnone speculatable willreturn }
1111