1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
5
6declare i8 @llvm.fshl.i8(i8, i8, i8)
7declare i16 @llvm.fshl.i16(i16, i16, i16)
8declare i32 @llvm.fshl.i32(i32, i32, i32)
9declare i64 @llvm.fshl.i64(i64, i64, i64)
10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
11
12declare i8 @llvm.fshr.i8(i8, i8, i8)
13declare i16 @llvm.fshr.i16(i16, i16, i16)
14declare i32 @llvm.fshr.i32(i32, i32, i32)
15declare i64 @llvm.fshr.i64(i64, i64, i64)
16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
17
18; When first 2 operands match, it's a rotate.
19
20define i8 @rotl_i8_const_shift(i8 %x) {
21; CHECK-LABEL: rotl_i8_const_shift:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    rotlwi 4, 3, 27
24; CHECK-NEXT:    rlwimi 4, 3, 3, 0, 28
25; CHECK-NEXT:    mr 3, 4
26; CHECK-NEXT:    blr
27  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
28  ret i8 %f
29}
30
31define i64 @rotl_i64_const_shift(i64 %x) {
32; CHECK32-LABEL: rotl_i64_const_shift:
33; CHECK32:       # %bb.0:
34; CHECK32-NEXT:    rotlwi 5, 4, 3
35; CHECK32-NEXT:    rotlwi 6, 3, 3
36; CHECK32-NEXT:    rlwimi 5, 3, 3, 0, 28
37; CHECK32-NEXT:    rlwimi 6, 4, 3, 0, 28
38; CHECK32-NEXT:    mr 3, 5
39; CHECK32-NEXT:    mr 4, 6
40; CHECK32-NEXT:    blr
41;
42; CHECK64-LABEL: rotl_i64_const_shift:
43; CHECK64:       # %bb.0:
44; CHECK64-NEXT:    rotldi 3, 3, 3
45; CHECK64-NEXT:    blr
46  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
47  ret i64 %f
48}
49
50; When first 2 operands match, it's a rotate (by variable amount).
51
52define i16 @rotl_i16(i16 %x, i16 %z) {
53; CHECK32-LABEL: rotl_i16:
54; CHECK32:       # %bb.0:
55; CHECK32-NEXT:    clrlwi 6, 4, 28
56; CHECK32-NEXT:    neg 4, 4
57; CHECK32-NEXT:    clrlwi 5, 3, 16
58; CHECK32-NEXT:    clrlwi 4, 4, 28
59; CHECK32-NEXT:    slw 3, 3, 6
60; CHECK32-NEXT:    srw 4, 5, 4
61; CHECK32-NEXT:    or 3, 3, 4
62; CHECK32-NEXT:    blr
63;
64; CHECK64-LABEL: rotl_i16:
65; CHECK64:       # %bb.0:
66; CHECK64-NEXT:    neg 5, 4
67; CHECK64-NEXT:    clrlwi 6, 3, 16
68; CHECK64-NEXT:    clrlwi 4, 4, 28
69; CHECK64-NEXT:    clrlwi 5, 5, 28
70; CHECK64-NEXT:    slw 3, 3, 4
71; CHECK64-NEXT:    srw 4, 6, 5
72; CHECK64-NEXT:    or 3, 3, 4
73; CHECK64-NEXT:    blr
74  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
75  ret i16 %f
76}
77
78define i32 @rotl_i32(i32 %x, i32 %z) {
79; CHECK-LABEL: rotl_i32:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    rotlw 3, 3, 4
82; CHECK-NEXT:    blr
83  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
84  ret i32 %f
85}
86
87define i64 @rotl_i64(i64 %x, i64 %z) {
88; CHECK32_32-LABEL: rotl_i64:
89; CHECK32_32:       # %bb.0:
90; CHECK32_32-NEXT:    andi. 5, 6, 32
91; CHECK32_32-NEXT:    clrlwi 5, 6, 27
92; CHECK32_32-NEXT:    subfic 6, 5, 32
93; CHECK32_32-NEXT:    bc 12, 2, .LBB4_2
94; CHECK32_32-NEXT:  # %bb.1:
95; CHECK32_32-NEXT:    ori 7, 3, 0
96; CHECK32_32-NEXT:    ori 3, 4, 0
97; CHECK32_32-NEXT:    b .LBB4_3
98; CHECK32_32-NEXT:  .LBB4_2:
99; CHECK32_32-NEXT:    addi 7, 4, 0
100; CHECK32_32-NEXT:  .LBB4_3:
101; CHECK32_32-NEXT:    srw 4, 7, 6
102; CHECK32_32-NEXT:    slw 8, 3, 5
103; CHECK32_32-NEXT:    srw 6, 3, 6
104; CHECK32_32-NEXT:    slw 5, 7, 5
105; CHECK32_32-NEXT:    or 3, 8, 4
106; CHECK32_32-NEXT:    or 4, 5, 6
107; CHECK32_32-NEXT:    blr
108;
109; CHECK32_64-LABEL: rotl_i64:
110; CHECK32_64:       # %bb.0:
111; CHECK32_64-NEXT:    andi. 5, 6, 32
112; CHECK32_64-NEXT:    clrlwi 5, 6, 27
113; CHECK32_64-NEXT:    bc 12, 2, .LBB4_2
114; CHECK32_64-NEXT:  # %bb.1:
115; CHECK32_64-NEXT:    ori 7, 3, 0
116; CHECK32_64-NEXT:    ori 3, 4, 0
117; CHECK32_64-NEXT:    b .LBB4_3
118; CHECK32_64-NEXT:  .LBB4_2:
119; CHECK32_64-NEXT:    addi 7, 4, 0
120; CHECK32_64-NEXT:  .LBB4_3:
121; CHECK32_64-NEXT:    subfic 6, 5, 32
122; CHECK32_64-NEXT:    srw 4, 7, 6
123; CHECK32_64-NEXT:    slw 8, 3, 5
124; CHECK32_64-NEXT:    srw 6, 3, 6
125; CHECK32_64-NEXT:    slw 5, 7, 5
126; CHECK32_64-NEXT:    or 3, 8, 4
127; CHECK32_64-NEXT:    or 4, 5, 6
128; CHECK32_64-NEXT:    blr
129;
130; CHECK64-LABEL: rotl_i64:
131; CHECK64:       # %bb.0:
132; CHECK64-NEXT:    rotld 3, 3, 4
133; CHECK64-NEXT:    blr
134  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
135  ret i64 %f
136}
137
138; Vector rotate.
139
140define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
141; CHECK32_32-LABEL: rotl_v4i32:
142; CHECK32_32:       # %bb.0:
143; CHECK32_32-NEXT:    rotlw 3, 3, 7
144; CHECK32_32-NEXT:    rotlw 4, 4, 8
145; CHECK32_32-NEXT:    rotlw 5, 5, 9
146; CHECK32_32-NEXT:    rotlw 6, 6, 10
147; CHECK32_32-NEXT:    blr
148;
149; CHECK32_64-LABEL: rotl_v4i32:
150; CHECK32_64:       # %bb.0:
151; CHECK32_64-NEXT:    vrlw 2, 2, 3
152; CHECK32_64-NEXT:    blr
153;
154; CHECK64-LABEL: rotl_v4i32:
155; CHECK64:       # %bb.0:
156; CHECK64-NEXT:    vrlw 2, 2, 3
157; CHECK64-NEXT:    blr
158  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
159  ret <4 x i32> %f
160}
161
162; Vector rotate by constant splat amount.
163
164define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
165; CHECK32_32-LABEL: rotl_v4i32_const_shift:
166; CHECK32_32:       # %bb.0:
167; CHECK32_32-NEXT:    rotlwi 3, 3, 3
168; CHECK32_32-NEXT:    rotlwi 4, 4, 3
169; CHECK32_32-NEXT:    rotlwi 5, 5, 3
170; CHECK32_32-NEXT:    rotlwi 6, 6, 3
171; CHECK32_32-NEXT:    blr
172;
173; CHECK32_64-LABEL: rotl_v4i32_const_shift:
174; CHECK32_64:       # %bb.0:
175; CHECK32_64-NEXT:    vspltisw 3, 3
176; CHECK32_64-NEXT:    vrlw 2, 2, 3
177; CHECK32_64-NEXT:    blr
178;
179; CHECK64-LABEL: rotl_v4i32_const_shift:
180; CHECK64:       # %bb.0:
181; CHECK64-NEXT:    vspltisw 3, 3
182; CHECK64-NEXT:    vrlw 2, 2, 3
183; CHECK64-NEXT:    blr
184  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
185  ret <4 x i32> %f
186}
187
188; Repeat everything for funnel shift right.
189
190define i8 @rotr_i8_const_shift(i8 %x) {
191; CHECK-LABEL: rotr_i8_const_shift:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    rotlwi 4, 3, 29
194; CHECK-NEXT:    rlwimi 4, 3, 5, 0, 26
195; CHECK-NEXT:    mr 3, 4
196; CHECK-NEXT:    blr
197  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
198  ret i8 %f
199}
200
201define i32 @rotr_i32_const_shift(i32 %x) {
202; CHECK-LABEL: rotr_i32_const_shift:
203; CHECK:       # %bb.0:
204; CHECK-NEXT:    rotlwi 3, 3, 29
205; CHECK-NEXT:    blr
206  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
207  ret i32 %f
208}
209
210; When first 2 operands match, it's a rotate (by variable amount).
211
212define i16 @rotr_i16(i16 %x, i16 %z) {
213; CHECK32-LABEL: rotr_i16:
214; CHECK32:       # %bb.0:
215; CHECK32-NEXT:    clrlwi 6, 4, 28
216; CHECK32-NEXT:    neg 4, 4
217; CHECK32-NEXT:    clrlwi 5, 3, 16
218; CHECK32-NEXT:    clrlwi 4, 4, 28
219; CHECK32-NEXT:    srw 5, 5, 6
220; CHECK32-NEXT:    slw 3, 3, 4
221; CHECK32-NEXT:    or 3, 5, 3
222; CHECK32-NEXT:    blr
223;
224; CHECK64-LABEL: rotr_i16:
225; CHECK64:       # %bb.0:
226; CHECK64-NEXT:    neg 5, 4
227; CHECK64-NEXT:    clrlwi 6, 3, 16
228; CHECK64-NEXT:    clrlwi 4, 4, 28
229; CHECK64-NEXT:    clrlwi 5, 5, 28
230; CHECK64-NEXT:    srw 4, 6, 4
231; CHECK64-NEXT:    slw 3, 3, 5
232; CHECK64-NEXT:    or 3, 4, 3
233; CHECK64-NEXT:    blr
234  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
235  ret i16 %f
236}
237
238define i32 @rotr_i32(i32 %x, i32 %z) {
239; CHECK-LABEL: rotr_i32:
240; CHECK:       # %bb.0:
241; CHECK-NEXT:    neg 4, 4
242; CHECK-NEXT:    rotlw 3, 3, 4
243; CHECK-NEXT:    blr
244  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
245  ret i32 %f
246}
247
248define i64 @rotr_i64(i64 %x, i64 %z) {
249; CHECK32_32-LABEL: rotr_i64:
250; CHECK32_32:       # %bb.0:
251; CHECK32_32-NEXT:    andi. 5, 6, 32
252; CHECK32_32-NEXT:    clrlwi 5, 6, 27
253; CHECK32_32-NEXT:    subfic 6, 5, 32
254; CHECK32_32-NEXT:    bc 12, 2, .LBB11_2
255; CHECK32_32-NEXT:  # %bb.1:
256; CHECK32_32-NEXT:    ori 7, 4, 0
257; CHECK32_32-NEXT:    b .LBB11_3
258; CHECK32_32-NEXT:  .LBB11_2:
259; CHECK32_32-NEXT:    addi 7, 3, 0
260; CHECK32_32-NEXT:    addi 3, 4, 0
261; CHECK32_32-NEXT:  .LBB11_3:
262; CHECK32_32-NEXT:    srw 4, 7, 5
263; CHECK32_32-NEXT:    slw 8, 3, 6
264; CHECK32_32-NEXT:    srw 5, 3, 5
265; CHECK32_32-NEXT:    slw 6, 7, 6
266; CHECK32_32-NEXT:    or 3, 8, 4
267; CHECK32_32-NEXT:    or 4, 6, 5
268; CHECK32_32-NEXT:    blr
269;
270; CHECK32_64-LABEL: rotr_i64:
271; CHECK32_64:       # %bb.0:
272; CHECK32_64-NEXT:    andi. 5, 6, 32
273; CHECK32_64-NEXT:    clrlwi 5, 6, 27
274; CHECK32_64-NEXT:    bc 12, 2, .LBB11_2
275; CHECK32_64-NEXT:  # %bb.1:
276; CHECK32_64-NEXT:    ori 7, 4, 0
277; CHECK32_64-NEXT:    b .LBB11_3
278; CHECK32_64-NEXT:  .LBB11_2:
279; CHECK32_64-NEXT:    addi 7, 3, 0
280; CHECK32_64-NEXT:    addi 3, 4, 0
281; CHECK32_64-NEXT:  .LBB11_3:
282; CHECK32_64-NEXT:    subfic 6, 5, 32
283; CHECK32_64-NEXT:    srw 4, 7, 5
284; CHECK32_64-NEXT:    slw 8, 3, 6
285; CHECK32_64-NEXT:    srw 5, 3, 5
286; CHECK32_64-NEXT:    slw 6, 7, 6
287; CHECK32_64-NEXT:    or 3, 8, 4
288; CHECK32_64-NEXT:    or 4, 6, 5
289; CHECK32_64-NEXT:    blr
290;
291; CHECK64-LABEL: rotr_i64:
292; CHECK64:       # %bb.0:
293; CHECK64-NEXT:    neg 4, 4
294; CHECK64-NEXT:    rotld 3, 3, 4
295; CHECK64-NEXT:    blr
296  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
297  ret i64 %f
298}
299
300; Vector rotate.
301
302define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
303; CHECK32_32-LABEL: rotr_v4i32:
304; CHECK32_32:       # %bb.0:
305; CHECK32_32-NEXT:    neg 7, 7
306; CHECK32_32-NEXT:    neg 8, 8
307; CHECK32_32-NEXT:    neg 9, 9
308; CHECK32_32-NEXT:    neg 10, 10
309; CHECK32_32-NEXT:    rotlw 3, 3, 7
310; CHECK32_32-NEXT:    rotlw 4, 4, 8
311; CHECK32_32-NEXT:    rotlw 5, 5, 9
312; CHECK32_32-NEXT:    rotlw 6, 6, 10
313; CHECK32_32-NEXT:    blr
314;
315; CHECK32_64-LABEL: rotr_v4i32:
316; CHECK32_64:       # %bb.0:
317; CHECK32_64-NEXT:    vxor 4, 4, 4
318; CHECK32_64-NEXT:    vsubuwm 3, 4, 3
319; CHECK32_64-NEXT:    vrlw 2, 2, 3
320; CHECK32_64-NEXT:    blr
321;
322; CHECK64-LABEL: rotr_v4i32:
323; CHECK64:       # %bb.0:
324; CHECK64-NEXT:    xxlxor 36, 36, 36
325; CHECK64-NEXT:    vsubuwm 3, 4, 3
326; CHECK64-NEXT:    vrlw 2, 2, 3
327; CHECK64-NEXT:    blr
328  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
329  ret <4 x i32> %f
330}
331
332; Vector rotate by constant splat amount.
333
334define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
335; CHECK32_32-LABEL: rotr_v4i32_const_shift:
336; CHECK32_32:       # %bb.0:
337; CHECK32_32-NEXT:    rotlwi 3, 3, 29
338; CHECK32_32-NEXT:    rotlwi 4, 4, 29
339; CHECK32_32-NEXT:    rotlwi 5, 5, 29
340; CHECK32_32-NEXT:    rotlwi 6, 6, 29
341; CHECK32_32-NEXT:    blr
342;
343; CHECK32_64-LABEL: rotr_v4i32_const_shift:
344; CHECK32_64:       # %bb.0:
345; CHECK32_64-NEXT:    vspltisw 3, -16
346; CHECK32_64-NEXT:    vspltisw 4, 13
347; CHECK32_64-NEXT:    vsubuwm 3, 4, 3
348; CHECK32_64-NEXT:    vrlw 2, 2, 3
349; CHECK32_64-NEXT:    blr
350;
351; CHECK64-LABEL: rotr_v4i32_const_shift:
352; CHECK64:       # %bb.0:
353; CHECK64-NEXT:    vspltisw 3, -16
354; CHECK64-NEXT:    vspltisw 4, 13
355; CHECK64-NEXT:    vsubuwm 3, 4, 3
356; CHECK64-NEXT:    vrlw 2, 2, 3
357; CHECK64-NEXT:    blr
358  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
359  ret <4 x i32> %f
360}
361
362define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
363; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
364; CHECK:       # %bb.0:
365; CHECK-NEXT:    blr
366  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
367  ret i32 %f
368}
369
370define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
371; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
372; CHECK:       # %bb.0:
373; CHECK-NEXT:    blr
374  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
375  ret i32 %f
376}
377
378define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
379; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    blr
382  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
383  ret <4 x i32> %f
384}
385
386define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
387; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    blr
390  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
391  ret <4 x i32> %f
392}
393
394