1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel < %s | FileCheck -check-prefix=GISEL %s
4
5define i16 @csh_16(i16 %a, i16 %b) {
6; CHECK-LABEL: csh_16:
7; CHECK:       ; %bb.0:
8; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; CHECK-NEXT:    v_lshlrev_b16_e32 v2, v1, v0
10; CHECK-NEXT:    v_lshrrev_b16_e32 v3, v1, v0
11; CHECK-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
12; CHECK-NEXT:    v_add_u16_e32 v1, v2, v3
13; CHECK-NEXT:    v_add_u16_e32 v0, v1, v0
14; CHECK-NEXT:    s_setpc_b64 s[30:31]
15;
16; GISEL-LABEL: csh_16:
17; GISEL:       ; %bb.0:
18; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GISEL-NEXT:    v_and_b32_e32 v1, 15, v1
20; GISEL-NEXT:    v_lshlrev_b16_e32 v2, v1, v0
21; GISEL-NEXT:    v_lshrrev_b16_e32 v3, v1, v0
22; GISEL-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
23; GISEL-NEXT:    v_add_u16_e32 v1, v2, v3
24; GISEL-NEXT:    v_add_u16_e32 v0, v1, v0
25; GISEL-NEXT:    s_setpc_b64 s[30:31]
26  %and = and i16 %b, 15
27  %shl = shl i16 %a, %and
28  %lshr = lshr i16 %a, %and
29  %ashr = ashr i16 %a, %and
30  %ret.0 = add i16 %shl, %lshr
31  %ret = add i16 %ret.0, %ashr
32  ret i16 %ret
33}
34
35define i32 @csh_32(i32 %a, i32 %b) {
36; CHECK-LABEL: csh_32:
37; CHECK:       ; %bb.0:
38; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; CHECK-NEXT:    v_lshlrev_b32_e32 v2, v1, v0
40; CHECK-NEXT:    v_lshrrev_b32_e32 v3, v1, v0
41; CHECK-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
42; CHECK-NEXT:    v_add3_u32 v0, v2, v3, v0
43; CHECK-NEXT:    s_setpc_b64 s[30:31]
44;
45; GISEL-LABEL: csh_32:
46; GISEL:       ; %bb.0:
47; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GISEL-NEXT:    v_and_b32_e32 v1, 31, v1
49; GISEL-NEXT:    v_lshlrev_b32_e32 v2, v1, v0
50; GISEL-NEXT:    v_lshrrev_b32_e32 v3, v1, v0
51; GISEL-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
52; GISEL-NEXT:    v_add3_u32 v0, v2, v3, v0
53; GISEL-NEXT:    s_setpc_b64 s[30:31]
54  %and = and i32 %b, 31
55  %shl = shl i32 %a, %and
56  %lshr = lshr i32 %a, %and
57  %ashr = ashr i32 %a, %and
58  %ret.0 = add i32 %shl, %lshr
59  %ret = add i32 %ret.0, %ashr
60  ret i32 %ret
61}
62
63define amdgpu_ps i32 @s_csh_32_0(i32 inreg %a, i32 inreg %b) {
64; CHECK-LABEL: s_csh_32_0:
65; CHECK:       ; %bb.0:
66; CHECK-NEXT:    s_lshl_b32 s2, s0, s1
67; CHECK-NEXT:    s_lshr_b32 s3, s0, s1
68; CHECK-NEXT:    s_ashr_i32 s0, s0, s1
69; CHECK-NEXT:    s_add_i32 s1, s2, s3
70; CHECK-NEXT:    s_add_i32 s0, s1, s0
71; CHECK-NEXT:    ; return to shader part epilog
72;
73; GISEL-LABEL: s_csh_32_0:
74; GISEL:       ; %bb.0:
75; GISEL-NEXT:    s_lshl_b32 s2, s0, s1
76; GISEL-NEXT:    s_lshr_b32 s3, s0, s1
77; GISEL-NEXT:    s_ashr_i32 s0, s0, s1
78; GISEL-NEXT:    s_add_i32 s1, s2, s3
79; GISEL-NEXT:    s_add_i32 s0, s1, s0
80; GISEL-NEXT:    ; return to shader part epilog
81  %and = and i32 %b, 31
82  %shl = shl i32 %a, %and
83  %lshr = lshr i32 %a, %and
84  %ashr = ashr i32 %a, %and
85  %ret.0 = add i32 %shl, %lshr
86  %ret = add i32 %ret.0, %ashr
87  ret i32 %ret
88}
89
90define amdgpu_ps i32 @s_csh_32_1(i32 inreg %a, i32 inreg %b) {
91; CHECK-LABEL: s_csh_32_1:
92; CHECK:       ; %bb.0:
93; CHECK-NEXT:    s_lshl_b32 s2, s0, s1
94; CHECK-NEXT:    s_lshr_b32 s3, s0, s1
95; CHECK-NEXT:    s_ashr_i32 s0, s0, s1
96; CHECK-NEXT:    s_add_i32 s1, s2, s3
97; CHECK-NEXT:    s_add_i32 s0, s1, s0
98; CHECK-NEXT:    ; return to shader part epilog
99;
100; GISEL-LABEL: s_csh_32_1:
101; GISEL:       ; %bb.0:
102; GISEL-NEXT:    s_lshl_b32 s2, s0, s1
103; GISEL-NEXT:    s_lshr_b32 s3, s0, s1
104; GISEL-NEXT:    s_ashr_i32 s0, s0, s1
105; GISEL-NEXT:    s_add_i32 s1, s2, s3
106; GISEL-NEXT:    s_add_i32 s0, s1, s0
107; GISEL-NEXT:    ; return to shader part epilog
108  %and = and i32 %b, 127
109  %shl = shl i32 %a, %and
110  %lshr = lshr i32 %a, %and
111  %ashr = ashr i32 %a, %and
112  %ret.0 = add i32 %shl, %lshr
113  %ret = add i32 %ret.0, %ashr
114  ret i32 %ret
115}
116
117define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
118; CHECK-LABEL: csh_v4i32:
119; CHECK:       ; %bb.0:
120; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; CHECK-NEXT:    v_lshlrev_b32_e32 v8, v7, v3
122; CHECK-NEXT:    v_lshlrev_b32_e32 v9, v6, v2
123; CHECK-NEXT:    v_lshlrev_b32_e32 v10, v5, v1
124; CHECK-NEXT:    v_lshlrev_b32_e32 v11, v4, v0
125; CHECK-NEXT:    v_lshrrev_b32_e32 v12, v7, v3
126; CHECK-NEXT:    v_lshrrev_b32_e32 v13, v6, v2
127; CHECK-NEXT:    v_lshrrev_b32_e32 v14, v5, v1
128; CHECK-NEXT:    v_lshrrev_b32_e32 v15, v4, v0
129; CHECK-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
130; CHECK-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
131; CHECK-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
132; CHECK-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
133; CHECK-NEXT:    v_add3_u32 v0, v11, v15, v0
134; CHECK-NEXT:    v_add3_u32 v1, v10, v14, v1
135; CHECK-NEXT:    v_add3_u32 v2, v9, v13, v2
136; CHECK-NEXT:    v_add3_u32 v3, v8, v12, v3
137; CHECK-NEXT:    s_setpc_b64 s[30:31]
138;
139; GISEL-LABEL: csh_v4i32:
140; GISEL:       ; %bb.0:
141; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GISEL-NEXT:    v_and_b32_e32 v4, 31, v4
143; GISEL-NEXT:    v_and_b32_e32 v5, 31, v5
144; GISEL-NEXT:    v_and_b32_e32 v6, 31, v6
145; GISEL-NEXT:    v_and_b32_e32 v7, 31, v7
146; GISEL-NEXT:    v_lshlrev_b32_e32 v8, v4, v0
147; GISEL-NEXT:    v_lshlrev_b32_e32 v9, v5, v1
148; GISEL-NEXT:    v_lshlrev_b32_e32 v10, v6, v2
149; GISEL-NEXT:    v_lshlrev_b32_e32 v11, v7, v3
150; GISEL-NEXT:    v_lshrrev_b32_e32 v12, v4, v0
151; GISEL-NEXT:    v_lshrrev_b32_e32 v13, v5, v1
152; GISEL-NEXT:    v_lshrrev_b32_e32 v14, v6, v2
153; GISEL-NEXT:    v_lshrrev_b32_e32 v15, v7, v3
154; GISEL-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
155; GISEL-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
156; GISEL-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
157; GISEL-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
158; GISEL-NEXT:    v_add3_u32 v0, v8, v12, v0
159; GISEL-NEXT:    v_add3_u32 v1, v9, v13, v1
160; GISEL-NEXT:    v_add3_u32 v2, v10, v14, v2
161; GISEL-NEXT:    v_add3_u32 v3, v11, v15, v3
162; GISEL-NEXT:    s_setpc_b64 s[30:31]
163  %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
164  %shl = shl <4 x i32> %a, %and
165  %lshr = lshr <4 x i32> %a, %and
166  %ashr = ashr <4 x i32> %a, %and
167  %ret.0 = add <4 x i32> %shl, %lshr
168  %ret = add <4 x i32> %ret.0, %ashr
169  ret <4 x i32> %ret
170}
171
172define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) {
173; CHECK-LABEL: s_csh_v4i32:
174; CHECK:       ; %bb.0:
175; CHECK-NEXT:    s_lshl_b32 s8, s0, s4
176; CHECK-NEXT:    s_lshl_b32 s9, s1, s5
177; CHECK-NEXT:    s_lshl_b32 s10, s2, s6
178; CHECK-NEXT:    s_lshl_b32 s11, s3, s7
179; CHECK-NEXT:    s_lshr_b32 s12, s0, s4
180; CHECK-NEXT:    s_lshr_b32 s13, s1, s5
181; CHECK-NEXT:    s_lshr_b32 s14, s2, s6
182; CHECK-NEXT:    s_lshr_b32 s15, s3, s7
183; CHECK-NEXT:    s_ashr_i32 s3, s3, s7
184; CHECK-NEXT:    s_ashr_i32 s2, s2, s6
185; CHECK-NEXT:    s_ashr_i32 s1, s1, s5
186; CHECK-NEXT:    s_ashr_i32 s0, s0, s4
187; CHECK-NEXT:    s_add_i32 s4, s11, s15
188; CHECK-NEXT:    s_add_i32 s5, s10, s14
189; CHECK-NEXT:    s_add_i32 s6, s9, s13
190; CHECK-NEXT:    s_add_i32 s7, s8, s12
191; CHECK-NEXT:    s_add_i32 s0, s7, s0
192; CHECK-NEXT:    s_add_i32 s1, s6, s1
193; CHECK-NEXT:    s_add_i32 s2, s5, s2
194; CHECK-NEXT:    s_add_i32 s3, s4, s3
195; CHECK-NEXT:    ; return to shader part epilog
196;
197; GISEL-LABEL: s_csh_v4i32:
198; GISEL:       ; %bb.0:
199; GISEL-NEXT:    s_mov_b32 s8, 31
200; GISEL-NEXT:    s_mov_b32 s9, s8
201; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
202; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], s[8:9]
203; GISEL-NEXT:    s_lshl_b32 s8, s0, s4
204; GISEL-NEXT:    s_lshl_b32 s9, s1, s5
205; GISEL-NEXT:    s_lshl_b32 s10, s2, s6
206; GISEL-NEXT:    s_lshl_b32 s11, s3, s7
207; GISEL-NEXT:    s_lshr_b32 s12, s0, s4
208; GISEL-NEXT:    s_lshr_b32 s13, s1, s5
209; GISEL-NEXT:    s_lshr_b32 s14, s2, s6
210; GISEL-NEXT:    s_lshr_b32 s15, s3, s7
211; GISEL-NEXT:    s_ashr_i32 s0, s0, s4
212; GISEL-NEXT:    s_ashr_i32 s1, s1, s5
213; GISEL-NEXT:    s_ashr_i32 s2, s2, s6
214; GISEL-NEXT:    s_ashr_i32 s3, s3, s7
215; GISEL-NEXT:    s_add_i32 s4, s8, s12
216; GISEL-NEXT:    s_add_i32 s5, s9, s13
217; GISEL-NEXT:    s_add_i32 s6, s10, s14
218; GISEL-NEXT:    s_add_i32 s7, s11, s15
219; GISEL-NEXT:    s_add_i32 s0, s4, s0
220; GISEL-NEXT:    s_add_i32 s1, s5, s1
221; GISEL-NEXT:    s_add_i32 s2, s6, s2
222; GISEL-NEXT:    s_add_i32 s3, s7, s3
223; GISEL-NEXT:    ; return to shader part epilog
224  %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
225  %shl = shl <4 x i32> %a, %and
226  %lshr = lshr <4 x i32> %a, %and
227  %ashr = ashr <4 x i32> %a, %and
228  %ret.0 = add <4 x i32> %shl, %lshr
229  %ret = add <4 x i32> %ret.0, %ashr
230  ret <4 x i32> %ret
231}
232
233define i64 @csh_64(i64 %a, i64 %b) {
234; CHECK-LABEL: csh_64:
235; CHECK:       ; %bb.0:
236; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; CHECK-NEXT:    v_lshlrev_b64 v[3:4], v2, v[0:1]
238; CHECK-NEXT:    v_lshrrev_b64 v[5:6], v2, v[0:1]
239; CHECK-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
240; CHECK-NEXT:    v_add_co_u32_e32 v2, vcc, v3, v5
241; CHECK-NEXT:    v_addc_co_u32_e32 v3, vcc, v4, v6, vcc
242; CHECK-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
243; CHECK-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
244; CHECK-NEXT:    s_setpc_b64 s[30:31]
245;
246; GISEL-LABEL: csh_64:
247; GISEL:       ; %bb.0:
248; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249; GISEL-NEXT:    v_and_b32_e32 v6, 63, v2
250; GISEL-NEXT:    v_lshlrev_b64 v[2:3], v6, v[0:1]
251; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v6, v[0:1]
252; GISEL-NEXT:    v_ashrrev_i64 v[0:1], v6, v[0:1]
253; GISEL-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
254; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
255; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
256; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
257; GISEL-NEXT:    s_setpc_b64 s[30:31]
258  %and = and i64 %b, 63
259  %shl = shl i64 %a, %and
260  %lshr = lshr i64 %a, %and
261  %ashr = ashr i64 %a, %and
262  %ret.0 = add i64 %shl, %lshr
263  %ret = add i64 %ret.0, %ashr
264  ret i64 %ret
265}
266
267define amdgpu_ps i64 @s_csh_64_0(i64 inreg %a, i64 inreg %b) {
268; CHECK-LABEL: s_csh_64_0:
269; CHECK:       ; %bb.0:
270; CHECK-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
271; CHECK-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
272; CHECK-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
273; CHECK-NEXT:    s_add_u32 s2, s4, s6
274; CHECK-NEXT:    s_addc_u32 s3, s5, s7
275; CHECK-NEXT:    s_add_u32 s0, s2, s0
276; CHECK-NEXT:    s_addc_u32 s1, s3, s1
277; CHECK-NEXT:    ; return to shader part epilog
278;
279; GISEL-LABEL: s_csh_64_0:
280; GISEL:       ; %bb.0:
281; GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], 63
282; GISEL-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
283; GISEL-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
284; GISEL-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
285; GISEL-NEXT:    s_add_u32 s2, s4, s6
286; GISEL-NEXT:    s_addc_u32 s3, s5, s7
287; GISEL-NEXT:    s_add_u32 s0, s2, s0
288; GISEL-NEXT:    s_addc_u32 s1, s3, s1
289; GISEL-NEXT:    ; return to shader part epilog
290  %and = and i64 %b, 63
291  %shl = shl i64 %a, %and
292  %lshr = lshr i64 %a, %and
293  %ashr = ashr i64 %a, %and
294  %ret.0 = add i64 %shl, %lshr
295  %ret = add i64 %ret.0, %ashr
296  ret i64 %ret
297}
298
299define amdgpu_ps i64 @s_csh_64_1(i64 inreg %a, i64 inreg %b) {
300; CHECK-LABEL: s_csh_64_1:
301; CHECK:       ; %bb.0:
302; CHECK-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
303; CHECK-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
304; CHECK-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
305; CHECK-NEXT:    s_add_u32 s2, s4, s6
306; CHECK-NEXT:    s_addc_u32 s3, s5, s7
307; CHECK-NEXT:    s_add_u32 s0, s2, s0
308; CHECK-NEXT:    s_addc_u32 s1, s3, s1
309; CHECK-NEXT:    ; return to shader part epilog
310;
311; GISEL-LABEL: s_csh_64_1:
312; GISEL:       ; %bb.0:
313; GISEL-NEXT:    s_mov_b64 s[4:5], 0xff
314; GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[4:5]
315; GISEL-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
316; GISEL-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
317; GISEL-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
318; GISEL-NEXT:    s_add_u32 s2, s4, s6
319; GISEL-NEXT:    s_addc_u32 s3, s5, s7
320; GISEL-NEXT:    s_add_u32 s0, s2, s0
321; GISEL-NEXT:    s_addc_u32 s1, s3, s1
322; GISEL-NEXT:    ; return to shader part epilog
323  %and = and i64 %b, 255
324  %shl = shl i64 %a, %and
325  %lshr = lshr i64 %a, %and
326  %ashr = ashr i64 %a, %and
327  %ret.0 = add i64 %shl, %lshr
328  %ret = add i64 %ret.0, %ashr
329  ret i64 %ret
330}
331
332define i32 @cshl_or(i32 %a, i32 %b) {
333; CHECK-LABEL: cshl_or:
334; CHECK:       ; %bb.0:
335; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336; CHECK-NEXT:    v_lshl_or_b32 v0, v0, v1, v0
337; CHECK-NEXT:    s_setpc_b64 s[30:31]
338;
339; GISEL-LABEL: cshl_or:
340; GISEL:       ; %bb.0:
341; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342; GISEL-NEXT:    v_and_b32_e32 v1, 31, v1
343; GISEL-NEXT:    v_lshl_or_b32 v0, v0, v1, v0
344; GISEL-NEXT:    s_setpc_b64 s[30:31]
345  %and = and i32 %b, 31
346  %shl = shl i32 %a, %and
347  %or = or i32 %shl, %a
348  ret i32 %or
349}
350
351define i32 @cshl_add(i32 %a, i32 %b, i32 %c) {
352; CHECK-LABEL: cshl_add:
353; CHECK:       ; %bb.0:
354; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355; CHECK-NEXT:    v_lshl_add_u32 v0, v0, v1, v2
356; CHECK-NEXT:    s_setpc_b64 s[30:31]
357;
358; GISEL-LABEL: cshl_add:
359; GISEL:       ; %bb.0:
360; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361; GISEL-NEXT:    v_and_b32_e32 v1, 31, v1
362; GISEL-NEXT:    v_lshl_add_u32 v0, v0, v1, v2
363; GISEL-NEXT:    s_setpc_b64 s[30:31]
364  %and = and i32 %b, 31
365  %shl = shl i32 %a, %and
366  %add = add i32 %shl, %c
367  ret i32 %add
368}
369
370define i32 @add_cshl(i32 %a, i32 %b) {
371; CHECK-LABEL: add_cshl:
372; CHECK:       ; %bb.0:
373; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; CHECK-NEXT:    v_add_lshl_u32 v0, v0, v1, v1
375; CHECK-NEXT:    s_setpc_b64 s[30:31]
376;
377; GISEL-LABEL: add_cshl:
378; GISEL:       ; %bb.0:
379; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380; GISEL-NEXT:    v_and_b32_e32 v2, 31, v1
381; GISEL-NEXT:    v_add_lshl_u32 v0, v0, v1, v2
382; GISEL-NEXT:    s_setpc_b64 s[30:31]
383  %add = add i32 %a, %b
384  %and = and i32 %b, 31
385  %shl = shl i32 %add, %and
386  ret i32 %shl
387}
388