1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s
3
4define i16 @csh_16(i16 %a, i16 %b) {
5; CHECK-LABEL: csh_16:
6; CHECK:       ; %bb.0:
7; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; CHECK-NEXT:    v_lshlrev_b16_e32 v2, v1, v0
9; CHECK-NEXT:    v_lshrrev_b16_e32 v3, v1, v0
10; CHECK-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
11; CHECK-NEXT:    v_add_u16_e32 v1, v2, v3
12; CHECK-NEXT:    v_add_u16_e32 v0, v1, v0
13; CHECK-NEXT:    s_setpc_b64 s[30:31]
14  %and = and i16 %b, 15
15  %shl = shl i16 %a, %and
16  %lshr = lshr i16 %a, %and
17  %ashr = ashr i16 %a, %and
18  %ret.0 = add i16 %shl, %lshr
19  %ret = add i16 %ret.0, %ashr
20  ret i16 %ret
21}
22
23define i32 @csh_32(i32 %a, i32 %b) {
24; CHECK-LABEL: csh_32:
25; CHECK:       ; %bb.0:
26; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; CHECK-NEXT:    v_lshlrev_b32_e32 v2, v1, v0
28; CHECK-NEXT:    v_lshrrev_b32_e32 v3, v1, v0
29; CHECK-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
30; CHECK-NEXT:    v_add3_u32 v0, v2, v3, v0
31; CHECK-NEXT:    s_setpc_b64 s[30:31]
32  %and = and i32 %b, 31
33  %shl = shl i32 %a, %and
34  %lshr = lshr i32 %a, %and
35  %ashr = ashr i32 %a, %and
36  %ret.0 = add i32 %shl, %lshr
37  %ret = add i32 %ret.0, %ashr
38  ret i32 %ret
39}
40
41define amdgpu_ps i32 @s_csh_32(i32 inreg %a, i32 inreg %b) {
42; CHECK-LABEL: s_csh_32:
43; CHECK:       ; %bb.0:
44; CHECK-NEXT:    s_lshl_b32 s2, s0, s1
45; CHECK-NEXT:    s_lshr_b32 s3, s0, s1
46; CHECK-NEXT:    s_ashr_i32 s0, s0, s1
47; CHECK-NEXT:    s_add_i32 s1, s2, s3
48; CHECK-NEXT:    s_add_i32 s0, s1, s0
49; CHECK-NEXT:    ; return to shader part epilog
50  %and = and i32 %b, 31
51  %shl = shl i32 %a, %and
52  %lshr = lshr i32 %a, %and
53  %ashr = ashr i32 %a, %and
54  %ret.0 = add i32 %shl, %lshr
55  %ret = add i32 %ret.0, %ashr
56  ret i32 %ret
57}
58
59define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
60; CHECK-LABEL: csh_v4i32:
61; CHECK:       ; %bb.0:
62; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; CHECK-NEXT:    v_lshlrev_b32_e32 v8, v7, v3
64; CHECK-NEXT:    v_lshlrev_b32_e32 v9, v6, v2
65; CHECK-NEXT:    v_lshlrev_b32_e32 v10, v5, v1
66; CHECK-NEXT:    v_lshlrev_b32_e32 v11, v4, v0
67; CHECK-NEXT:    v_lshrrev_b32_e32 v12, v7, v3
68; CHECK-NEXT:    v_lshrrev_b32_e32 v13, v6, v2
69; CHECK-NEXT:    v_lshrrev_b32_e32 v14, v5, v1
70; CHECK-NEXT:    v_lshrrev_b32_e32 v15, v4, v0
71; CHECK-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
72; CHECK-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
73; CHECK-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
74; CHECK-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
75; CHECK-NEXT:    v_add3_u32 v0, v11, v15, v0
76; CHECK-NEXT:    v_add3_u32 v1, v10, v14, v1
77; CHECK-NEXT:    v_add3_u32 v2, v9, v13, v2
78; CHECK-NEXT:    v_add3_u32 v3, v8, v12, v3
79; CHECK-NEXT:    s_setpc_b64 s[30:31]
80  %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
81  %shl = shl <4 x i32> %a, %and
82  %lshr = lshr <4 x i32> %a, %and
83  %ashr = ashr <4 x i32> %a, %and
84  %ret.0 = add <4 x i32> %shl, %lshr
85  %ret = add <4 x i32> %ret.0, %ashr
86  ret <4 x i32> %ret
87}
88
89define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) {
90; CHECK-LABEL: s_csh_v4i32:
91; CHECK:       ; %bb.0:
92; CHECK-NEXT:    s_lshl_b32 s8, s0, s4
93; CHECK-NEXT:    s_lshl_b32 s9, s1, s5
94; CHECK-NEXT:    s_lshl_b32 s10, s2, s6
95; CHECK-NEXT:    s_lshl_b32 s11, s3, s7
96; CHECK-NEXT:    s_lshr_b32 s12, s0, s4
97; CHECK-NEXT:    s_lshr_b32 s13, s1, s5
98; CHECK-NEXT:    s_lshr_b32 s14, s2, s6
99; CHECK-NEXT:    s_lshr_b32 s15, s3, s7
100; CHECK-NEXT:    s_ashr_i32 s3, s3, s7
101; CHECK-NEXT:    s_ashr_i32 s2, s2, s6
102; CHECK-NEXT:    s_ashr_i32 s1, s1, s5
103; CHECK-NEXT:    s_ashr_i32 s0, s0, s4
104; CHECK-NEXT:    s_add_i32 s4, s11, s15
105; CHECK-NEXT:    s_add_i32 s5, s10, s14
106; CHECK-NEXT:    s_add_i32 s6, s9, s13
107; CHECK-NEXT:    s_add_i32 s7, s8, s12
108; CHECK-NEXT:    s_add_i32 s0, s7, s0
109; CHECK-NEXT:    s_add_i32 s1, s6, s1
110; CHECK-NEXT:    s_add_i32 s2, s5, s2
111; CHECK-NEXT:    s_add_i32 s3, s4, s3
112; CHECK-NEXT:    ; return to shader part epilog
113  %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
114  %shl = shl <4 x i32> %a, %and
115  %lshr = lshr <4 x i32> %a, %and
116  %ashr = ashr <4 x i32> %a, %and
117  %ret.0 = add <4 x i32> %shl, %lshr
118  %ret = add <4 x i32> %ret.0, %ashr
119  ret <4 x i32> %ret
120}
121
122define i64 @csh_64(i64 %a, i64 %b) {
123; CHECK-LABEL: csh_64:
124; CHECK:       ; %bb.0:
125; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126; CHECK-NEXT:    v_lshlrev_b64 v[3:4], v2, v[0:1]
127; CHECK-NEXT:    v_lshrrev_b64 v[5:6], v2, v[0:1]
128; CHECK-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
129; CHECK-NEXT:    v_add_co_u32_e32 v2, vcc, v3, v5
130; CHECK-NEXT:    v_addc_co_u32_e32 v3, vcc, v4, v6, vcc
131; CHECK-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
132; CHECK-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
133; CHECK-NEXT:    s_setpc_b64 s[30:31]
134  %and = and i64 %b, 63
135  %shl = shl i64 %a, %and
136  %lshr = lshr i64 %a, %and
137  %ashr = ashr i64 %a, %and
138  %ret.0 = add i64 %shl, %lshr
139  %ret = add i64 %ret.0, %ashr
140  ret i64 %ret
141}
142
143define amdgpu_ps i64 @s_csh_64(i64 inreg %a, i64 inreg %b) {
144; CHECK-LABEL: s_csh_64:
145; CHECK:       ; %bb.0:
146; CHECK-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
147; CHECK-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
148; CHECK-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
149; CHECK-NEXT:    s_add_u32 s2, s4, s6
150; CHECK-NEXT:    s_addc_u32 s3, s5, s7
151; CHECK-NEXT:    s_add_u32 s0, s2, s0
152; CHECK-NEXT:    s_addc_u32 s1, s3, s1
153; CHECK-NEXT:    ; return to shader part epilog
154  %and = and i64 %b, 63
155  %shl = shl i64 %a, %and
156  %lshr = lshr i64 %a, %and
157  %ashr = ashr i64 %a, %and
158  %ret.0 = add i64 %shl, %lshr
159  %ret = add i64 %ret.0, %ashr
160  ret i64 %ret
161}
162
163define i32 @cshl_or(i32 %a, i32 %b) {
164; CHECK-LABEL: cshl_or:
165; CHECK:       ; %bb.0:
166; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167; CHECK-NEXT:    v_lshl_or_b32 v0, v0, v1, v0
168; CHECK-NEXT:    s_setpc_b64 s[30:31]
169  %and = and i32 %b, 31
170  %shl = shl i32 %a, %and
171  %or = or i32 %shl, %a
172  ret i32 %or
173}
174
175define i32 @cshl_add(i32 %a, i32 %b, i32 %c) {
176; CHECK-LABEL: cshl_add:
177; CHECK:       ; %bb.0:
178; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179; CHECK-NEXT:    v_lshl_add_u32 v0, v0, v1, v2
180; CHECK-NEXT:    s_setpc_b64 s[30:31]
181  %and = and i32 %b, 31
182  %shl = shl i32 %a, %and
183  %add = add i32 %shl, %c
184  ret i32 %add
185}
186
187define i32 @add_cshl(i32 %a, i32 %b) {
188; CHECK-LABEL: add_cshl:
189; CHECK:       ; %bb.0:
190; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191; CHECK-NEXT:    v_add_lshl_u32 v0, v0, v1, v1
192; CHECK-NEXT:    s_setpc_b64 s[30:31]
193  %add = add i32 %a, %b
194  %and = and i32 %b, 31
195  %shl = shl i32 %add, %and
196  ret i32 %shl
197}
198