1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
5
6declare i8 @llvm.fshl.i8(i8, i8, i8)
7declare i16 @llvm.fshl.i16(i16, i16, i16)
8declare i32 @llvm.fshl.i32(i32, i32, i32)
9declare i64 @llvm.fshl.i64(i64, i64, i64)
10declare i128 @llvm.fshl.i128(i128, i128, i128)
11declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
12
13declare i8 @llvm.fshr.i8(i8, i8, i8)
14declare i16 @llvm.fshr.i16(i16, i16, i16)
15declare i32 @llvm.fshr.i32(i32, i32, i32)
16declare i64 @llvm.fshr.i64(i64, i64, i64)
17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
18
19; General case - all operands can be variables.
20
21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
22; CHECK32-LABEL: fshl_i32:
23; CHECK32:       # %bb.0:
24; CHECK32-NEXT:    clrlwi 5, 5, 27
25; CHECK32-NEXT:    slw 3, 3, 5
26; CHECK32-NEXT:    subfic 5, 5, 32
27; CHECK32-NEXT:    srw 4, 4, 5
28; CHECK32-NEXT:    or 3, 3, 4
29; CHECK32-NEXT:    blr
30;
31; CHECK64-LABEL: fshl_i32:
32; CHECK64:       # %bb.0:
33; CHECK64-NEXT:    clrlwi 5, 5, 27
34; CHECK64-NEXT:    subfic 6, 5, 32
35; CHECK64-NEXT:    slw 3, 3, 5
36; CHECK64-NEXT:    srw 4, 4, 6
37; CHECK64-NEXT:    or 3, 3, 4
38; CHECK64-NEXT:    blr
39  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
40  ret i32 %f
41}
42
43define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
44; CHECK32_32-LABEL: fshl_i64:
45; CHECK32_32:       # %bb.0:
46; CHECK32_32-NEXT:    andi. 7, 8, 32
47; CHECK32_32-NEXT:    clrlwi 7, 8, 27
48; CHECK32_32-NEXT:    subfic 8, 7, 32
49; CHECK32_32-NEXT:    bc 12, 2, .LBB1_2
50; CHECK32_32-NEXT:  # %bb.1:
51; CHECK32_32-NEXT:    ori 9, 5, 0
52; CHECK32_32-NEXT:    ori 3, 4, 0
53; CHECK32_32-NEXT:    ori 4, 6, 0
54; CHECK32_32-NEXT:    b .LBB1_3
55; CHECK32_32-NEXT:  .LBB1_2:
56; CHECK32_32-NEXT:    addi 9, 4, 0
57; CHECK32_32-NEXT:    addi 4, 5, 0
58; CHECK32_32-NEXT:  .LBB1_3:
59; CHECK32_32-NEXT:    srw 5, 9, 8
60; CHECK32_32-NEXT:    slw 3, 3, 7
61; CHECK32_32-NEXT:    srw 4, 4, 8
62; CHECK32_32-NEXT:    slw 6, 9, 7
63; CHECK32_32-NEXT:    or 3, 3, 5
64; CHECK32_32-NEXT:    or 4, 6, 4
65; CHECK32_32-NEXT:    blr
66;
67; CHECK32_64-LABEL: fshl_i64:
68; CHECK32_64:       # %bb.0:
69; CHECK32_64-NEXT:    andi. 7, 8, 32
70; CHECK32_64-NEXT:    clrlwi 7, 8, 27
71; CHECK32_64-NEXT:    bc 12, 2, .LBB1_2
72; CHECK32_64-NEXT:  # %bb.1:
73; CHECK32_64-NEXT:    ori 9, 5, 0
74; CHECK32_64-NEXT:    ori 3, 4, 0
75; CHECK32_64-NEXT:    ori 5, 6, 0
76; CHECK32_64-NEXT:    b .LBB1_3
77; CHECK32_64-NEXT:  .LBB1_2:
78; CHECK32_64-NEXT:    addi 9, 4, 0
79; CHECK32_64-NEXT:  .LBB1_3:
80; CHECK32_64-NEXT:    subfic 8, 7, 32
81; CHECK32_64-NEXT:    srw 4, 9, 8
82; CHECK32_64-NEXT:    slw 3, 3, 7
83; CHECK32_64-NEXT:    srw 5, 5, 8
84; CHECK32_64-NEXT:    slw 6, 9, 7
85; CHECK32_64-NEXT:    or 3, 3, 4
86; CHECK32_64-NEXT:    or 4, 6, 5
87; CHECK32_64-NEXT:    blr
88;
89; CHECK64-LABEL: fshl_i64:
90; CHECK64:       # %bb.0:
91; CHECK64-NEXT:    clrlwi 5, 5, 26
92; CHECK64-NEXT:    subfic 6, 5, 64
93; CHECK64-NEXT:    sld 3, 3, 5
94; CHECK64-NEXT:    srd 4, 4, 6
95; CHECK64-NEXT:    or 3, 3, 4
96; CHECK64-NEXT:    blr
97  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
98  ret i64 %f
99}
100
101define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
102; CHECK32_32-LABEL: fshl_i128:
103; CHECK32_32:       # %bb.0:
104; CHECK32_32-NEXT:    lwz 11, 20(1)
105; CHECK32_32-NEXT:    andi. 12, 11, 64
106; CHECK32_32-NEXT:    mcrf 1, 0
107; CHECK32_32-NEXT:    andi. 12, 11, 32
108; CHECK32_32-NEXT:    clrlwi 11, 11, 27
109; CHECK32_32-NEXT:    bc 12, 6, .LBB2_2
110; CHECK32_32-NEXT:  # %bb.1:
111; CHECK32_32-NEXT:    ori 4, 6, 0
112; CHECK32_32-NEXT:    ori 12, 7, 0
113; CHECK32_32-NEXT:    ori 3, 5, 0
114; CHECK32_32-NEXT:    ori 5, 8, 0
115; CHECK32_32-NEXT:    ori 6, 9, 0
116; CHECK32_32-NEXT:    ori 7, 10, 0
117; CHECK32_32-NEXT:    b .LBB2_3
118; CHECK32_32-NEXT:  .LBB2_2:
119; CHECK32_32-NEXT:    addi 12, 5, 0
120; CHECK32_32-NEXT:    addi 5, 6, 0
121; CHECK32_32-NEXT:    addi 6, 7, 0
122; CHECK32_32-NEXT:    addi 7, 8, 0
123; CHECK32_32-NEXT:  .LBB2_3:
124; CHECK32_32-NEXT:    subfic 8, 11, 32
125; CHECK32_32-NEXT:    bc 12, 2, .LBB2_5
126; CHECK32_32-NEXT:  # %bb.4:
127; CHECK32_32-NEXT:    ori 9, 12, 0
128; CHECK32_32-NEXT:    ori 3, 4, 0
129; CHECK32_32-NEXT:    ori 4, 5, 0
130; CHECK32_32-NEXT:    ori 5, 6, 0
131; CHECK32_32-NEXT:    ori 6, 7, 0
132; CHECK32_32-NEXT:    b .LBB2_6
133; CHECK32_32-NEXT:  .LBB2_5:
134; CHECK32_32-NEXT:    addi 9, 4, 0
135; CHECK32_32-NEXT:    addi 4, 12, 0
136; CHECK32_32-NEXT:  .LBB2_6:
137; CHECK32_32-NEXT:    srw 7, 9, 8
138; CHECK32_32-NEXT:    slw 3, 3, 11
139; CHECK32_32-NEXT:    srw 10, 4, 8
140; CHECK32_32-NEXT:    slw 9, 9, 11
141; CHECK32_32-NEXT:    srw 12, 5, 8
142; CHECK32_32-NEXT:    slw 0, 4, 11
143; CHECK32_32-NEXT:    srw 6, 6, 8
144; CHECK32_32-NEXT:    slw 8, 5, 11
145; CHECK32_32-NEXT:    or 3, 3, 7
146; CHECK32_32-NEXT:    or 4, 9, 10
147; CHECK32_32-NEXT:    or 5, 0, 12
148; CHECK32_32-NEXT:    or 6, 8, 6
149; CHECK32_32-NEXT:    blr
150;
151; CHECK32_64-LABEL: fshl_i128:
152; CHECK32_64:       # %bb.0:
153; CHECK32_64-NEXT:    stwu 1, -16(1)
154; CHECK32_64-NEXT:    lwz 11, 36(1)
155; CHECK32_64-NEXT:    andi. 12, 11, 64
156; CHECK32_64-NEXT:    stw 30, 8(1) # 4-byte Folded Spill
157; CHECK32_64-NEXT:    mcrf 1, 0
158; CHECK32_64-NEXT:    clrlwi 12, 11, 27
159; CHECK32_64-NEXT:    andi. 11, 11, 32
160; CHECK32_64-NEXT:    bc 12, 6, .LBB2_2
161; CHECK32_64-NEXT:  # %bb.1:
162; CHECK32_64-NEXT:    ori 4, 6, 0
163; CHECK32_64-NEXT:    ori 30, 7, 0
164; CHECK32_64-NEXT:    ori 3, 5, 0
165; CHECK32_64-NEXT:    ori 7, 9, 0
166; CHECK32_64-NEXT:    b .LBB2_3
167; CHECK32_64-NEXT:  .LBB2_2:
168; CHECK32_64-NEXT:    addi 30, 5, 0
169; CHECK32_64-NEXT:  .LBB2_3:
170; CHECK32_64-NEXT:    bc 12, 2, .LBB2_5
171; CHECK32_64-NEXT:  # %bb.4:
172; CHECK32_64-NEXT:    ori 5, 30, 0
173; CHECK32_64-NEXT:    ori 3, 4, 0
174; CHECK32_64-NEXT:    b .LBB2_6
175; CHECK32_64-NEXT:  .LBB2_5:
176; CHECK32_64-NEXT:    addi 5, 4, 0
177; CHECK32_64-NEXT:  .LBB2_6:
178; CHECK32_64-NEXT:    bc 12, 6, .LBB2_8
179; CHECK32_64-NEXT:  # %bb.7:
180; CHECK32_64-NEXT:    ori 4, 8, 0
181; CHECK32_64-NEXT:    ori 8, 10, 0
182; CHECK32_64-NEXT:    b .LBB2_9
183; CHECK32_64-NEXT:  .LBB2_8:
184; CHECK32_64-NEXT:    addi 4, 6, 0
185; CHECK32_64-NEXT:  .LBB2_9:
186; CHECK32_64-NEXT:    subfic 11, 12, 32
187; CHECK32_64-NEXT:    bc 12, 2, .LBB2_11
188; CHECK32_64-NEXT:  # %bb.10:
189; CHECK32_64-NEXT:    ori 0, 4, 0
190; CHECK32_64-NEXT:    ori 4, 7, 0
191; CHECK32_64-NEXT:    ori 7, 8, 0
192; CHECK32_64-NEXT:    b .LBB2_12
193; CHECK32_64-NEXT:  .LBB2_11:
194; CHECK32_64-NEXT:    addi 0, 30, 0
195; CHECK32_64-NEXT:  .LBB2_12:
196; CHECK32_64-NEXT:    srw 6, 5, 11
197; CHECK32_64-NEXT:    lwz 30, 8(1) # 4-byte Folded Reload
198; CHECK32_64-NEXT:    slw 3, 3, 12
199; CHECK32_64-NEXT:    srw 9, 0, 11
200; CHECK32_64-NEXT:    slw 5, 5, 12
201; CHECK32_64-NEXT:    srw 10, 4, 11
202; CHECK32_64-NEXT:    slw 0, 0, 12
203; CHECK32_64-NEXT:    srw 7, 7, 11
204; CHECK32_64-NEXT:    slw 8, 4, 12
205; CHECK32_64-NEXT:    or 3, 3, 6
206; CHECK32_64-NEXT:    or 4, 5, 9
207; CHECK32_64-NEXT:    or 5, 0, 10
208; CHECK32_64-NEXT:    or 6, 8, 7
209; CHECK32_64-NEXT:    addi 1, 1, 16
210; CHECK32_64-NEXT:    blr
211;
212; CHECK64-LABEL: fshl_i128:
213; CHECK64:       # %bb.0:
214; CHECK64-NEXT:    andi. 8, 7, 64
215; CHECK64-NEXT:    clrlwi 7, 7, 26
216; CHECK64-NEXT:    iseleq 5, 6, 5
217; CHECK64-NEXT:    subfic 8, 7, 64
218; CHECK64-NEXT:    iseleq 6, 3, 6
219; CHECK64-NEXT:    iseleq 3, 4, 3
220; CHECK64-NEXT:    srd 4, 5, 8
221; CHECK64-NEXT:    sld 5, 6, 7
222; CHECK64-NEXT:    srd 6, 6, 8
223; CHECK64-NEXT:    sld 7, 3, 7
224; CHECK64-NEXT:    or 3, 5, 4
225; CHECK64-NEXT:    or 4, 7, 6
226; CHECK64-NEXT:    blr
227  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
228  ret i128 %f
229}
230
231; Verify that weird types are minimally supported.
232declare i37 @llvm.fshl.i37(i37, i37, i37)
233define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
234; CHECK32_32-LABEL: fshl_i37:
235; CHECK32_32:       # %bb.0:
236; CHECK32_32-NEXT:    mflr 0
237; CHECK32_32-NEXT:    stw 0, 4(1)
238; CHECK32_32-NEXT:    stwu 1, -32(1)
239; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
240; CHECK32_32-NEXT:    .cfi_offset lr, 4
241; CHECK32_32-NEXT:    .cfi_offset r27, -20
242; CHECK32_32-NEXT:    .cfi_offset r28, -16
243; CHECK32_32-NEXT:    .cfi_offset r29, -12
244; CHECK32_32-NEXT:    .cfi_offset r30, -8
245; CHECK32_32-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
246; CHECK32_32-NEXT:    mr 27, 3
247; CHECK32_32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
248; CHECK32_32-NEXT:    mr 28, 4
249; CHECK32_32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
250; CHECK32_32-NEXT:    mr 29, 5
251; CHECK32_32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
252; CHECK32_32-NEXT:    mr 30, 6
253; CHECK32_32-NEXT:    clrlwi 3, 7, 27
254; CHECK32_32-NEXT:    mr 4, 8
255; CHECK32_32-NEXT:    li 5, 0
256; CHECK32_32-NEXT:    li 6, 37
257; CHECK32_32-NEXT:    bl __umoddi3
258; CHECK32_32-NEXT:    rotlwi 3, 30, 27
259; CHECK32_32-NEXT:    slwi 5, 30, 27
260; CHECK32_32-NEXT:    andi. 6, 4, 32
261; CHECK32_32-NEXT:    rlwimi 3, 29, 27, 0, 4
262; CHECK32_32-NEXT:    clrlwi 4, 4, 27
263; CHECK32_32-NEXT:    subfic 6, 4, 32
264; CHECK32_32-NEXT:    bc 12, 2, .LBB3_2
265; CHECK32_32-NEXT:  # %bb.1:
266; CHECK32_32-NEXT:    ori 7, 3, 0
267; CHECK32_32-NEXT:    ori 8, 28, 0
268; CHECK32_32-NEXT:    ori 3, 5, 0
269; CHECK32_32-NEXT:    b .LBB3_3
270; CHECK32_32-NEXT:  .LBB3_2:
271; CHECK32_32-NEXT:    addi 7, 28, 0
272; CHECK32_32-NEXT:    addi 8, 27, 0
273; CHECK32_32-NEXT:  .LBB3_3:
274; CHECK32_32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
275; CHECK32_32-NEXT:    srw 5, 7, 6
276; CHECK32_32-NEXT:    slw 8, 8, 4
277; CHECK32_32-NEXT:    srw 6, 3, 6
278; CHECK32_32-NEXT:    slw 4, 7, 4
279; CHECK32_32-NEXT:    or 3, 8, 5
280; CHECK32_32-NEXT:    or 4, 4, 6
281; CHECK32_32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
282; CHECK32_32-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
283; CHECK32_32-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
284; CHECK32_32-NEXT:    lwz 0, 36(1)
285; CHECK32_32-NEXT:    addi 1, 1, 32
286; CHECK32_32-NEXT:    mtlr 0
287; CHECK32_32-NEXT:    blr
288;
289; CHECK32_64-LABEL: fshl_i37:
290; CHECK32_64:       # %bb.0:
291; CHECK32_64-NEXT:    mflr 0
292; CHECK32_64-NEXT:    stw 0, 4(1)
293; CHECK32_64-NEXT:    stwu 1, -32(1)
294; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
295; CHECK32_64-NEXT:    .cfi_offset lr, 4
296; CHECK32_64-NEXT:    .cfi_offset r27, -20
297; CHECK32_64-NEXT:    .cfi_offset r28, -16
298; CHECK32_64-NEXT:    .cfi_offset r29, -12
299; CHECK32_64-NEXT:    .cfi_offset r30, -8
300; CHECK32_64-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
301; CHECK32_64-NEXT:    mr 27, 3
302; CHECK32_64-NEXT:    clrlwi 3, 7, 27
303; CHECK32_64-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
304; CHECK32_64-NEXT:    mr 28, 4
305; CHECK32_64-NEXT:    mr 4, 8
306; CHECK32_64-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
307; CHECK32_64-NEXT:    mr 29, 5
308; CHECK32_64-NEXT:    li 5, 0
309; CHECK32_64-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
310; CHECK32_64-NEXT:    mr 30, 6
311; CHECK32_64-NEXT:    li 6, 37
312; CHECK32_64-NEXT:    bl __umoddi3
313; CHECK32_64-NEXT:    rotlwi 3, 30, 27
314; CHECK32_64-NEXT:    andi. 5, 4, 32
315; CHECK32_64-NEXT:    bc 12, 2, .LBB3_2
316; CHECK32_64-NEXT:  # %bb.1:
317; CHECK32_64-NEXT:    ori 8, 28, 0
318; CHECK32_64-NEXT:    b .LBB3_3
319; CHECK32_64-NEXT:  .LBB3_2:
320; CHECK32_64-NEXT:    addi 8, 27, 0
321; CHECK32_64-NEXT:  .LBB3_3:
322; CHECK32_64-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
323; CHECK32_64-NEXT:    rlwimi 3, 29, 27, 0, 4
324; CHECK32_64-NEXT:    clrlwi 4, 4, 27
325; CHECK32_64-NEXT:    bc 12, 2, .LBB3_5
326; CHECK32_64-NEXT:  # %bb.4:
327; CHECK32_64-NEXT:    ori 7, 3, 0
328; CHECK32_64-NEXT:    b .LBB3_6
329; CHECK32_64-NEXT:  .LBB3_5:
330; CHECK32_64-NEXT:    addi 7, 28, 0
331; CHECK32_64-NEXT:  .LBB3_6:
332; CHECK32_64-NEXT:    slwi 5, 30, 27
333; CHECK32_64-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
334; CHECK32_64-NEXT:    bc 12, 2, .LBB3_8
335; CHECK32_64-NEXT:  # %bb.7:
336; CHECK32_64-NEXT:    ori 3, 5, 0
337; CHECK32_64-NEXT:    b .LBB3_8
338; CHECK32_64-NEXT:  .LBB3_8:
339; CHECK32_64-NEXT:    subfic 6, 4, 32
340; CHECK32_64-NEXT:    slw 8, 8, 4
341; CHECK32_64-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
342; CHECK32_64-NEXT:    srw 9, 7, 6
343; CHECK32_64-NEXT:    srw 5, 3, 6
344; CHECK32_64-NEXT:    slw 4, 7, 4
345; CHECK32_64-NEXT:    or 3, 8, 9
346; CHECK32_64-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
347; CHECK32_64-NEXT:    or 4, 4, 5
348; CHECK32_64-NEXT:    lwz 0, 36(1)
349; CHECK32_64-NEXT:    addi 1, 1, 32
350; CHECK32_64-NEXT:    mtlr 0
351; CHECK32_64-NEXT:    blr
352;
353; CHECK64-LABEL: fshl_i37:
354; CHECK64:       # %bb.0:
355; CHECK64-NEXT:    lis 6, 28339
356; CHECK64-NEXT:    clrldi 7, 5, 27
357; CHECK64-NEXT:    ori 6, 6, 58451
358; CHECK64-NEXT:    sldi 4, 4, 27
359; CHECK64-NEXT:    rldic 6, 6, 33, 0
360; CHECK64-NEXT:    oris 6, 6, 3542
361; CHECK64-NEXT:    ori 6, 6, 31883
362; CHECK64-NEXT:    mulhdu 6, 7, 6
363; CHECK64-NEXT:    rldicl 6, 6, 59, 5
364; CHECK64-NEXT:    mulli 6, 6, 37
365; CHECK64-NEXT:    sub 5, 5, 6
366; CHECK64-NEXT:    clrlwi 5, 5, 26
367; CHECK64-NEXT:    subfic 6, 5, 64
368; CHECK64-NEXT:    sld 3, 3, 5
369; CHECK64-NEXT:    srd 4, 4, 6
370; CHECK64-NEXT:    or 3, 3, 4
371; CHECK64-NEXT:    blr
372  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
373  ret i37 %f
374}
375
376; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
377
378declare i7 @llvm.fshl.i7(i7, i7, i7)
379define i7 @fshl_i7_const_fold() {
380; CHECK-LABEL: fshl_i7_const_fold:
381; CHECK:       # %bb.0:
382; CHECK-NEXT:    li 3, 67
383; CHECK-NEXT:    blr
384  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
385  ret i7 %f
386}
387
388; With constant shift amount, this is rotate + insert (missing extended mnemonics).
389
390define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
391; CHECK-LABEL: fshl_i32_const_shift:
392; CHECK:       # %bb.0:
393; CHECK-NEXT:    rotlwi 4, 4, 9
394; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
395; CHECK-NEXT:    mr 3, 4
396; CHECK-NEXT:    blr
397  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
398  ret i32 %f
399}
400
401; Check modulo math on shift amount.
402
403define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
404; CHECK-LABEL: fshl_i32_const_overshift:
405; CHECK:       # %bb.0:
406; CHECK-NEXT:    rotlwi 4, 4, 9
407; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
408; CHECK-NEXT:    mr 3, 4
409; CHECK-NEXT:    blr
410  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
411  ret i32 %f
412}
413
414; 64-bit should also work.
415
416define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
417; CHECK32-LABEL: fshl_i64_const_overshift:
418; CHECK32:       # %bb.0:
419; CHECK32-NEXT:    rotlwi 6, 6, 9
420; CHECK32-NEXT:    rotlwi 3, 5, 9
421; CHECK32-NEXT:    rlwimi 6, 5, 9, 0, 22
422; CHECK32-NEXT:    rlwimi 3, 4, 9, 0, 22
423; CHECK32-NEXT:    mr 4, 6
424; CHECK32-NEXT:    blr
425;
426; CHECK64-LABEL: fshl_i64_const_overshift:
427; CHECK64:       # %bb.0:
428; CHECK64-NEXT:    rotldi 4, 4, 41
429; CHECK64-NEXT:    rldimi 4, 3, 41, 0
430; CHECK64-NEXT:    mr 3, 4
431; CHECK64-NEXT:    blr
432  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
433  ret i64 %f
434}
435
436; This should work without any node-specific logic.
437
438define i8 @fshl_i8_const_fold() {
439; CHECK-LABEL: fshl_i8_const_fold:
440; CHECK:       # %bb.0:
441; CHECK-NEXT:    li 3, 128
442; CHECK-NEXT:    blr
443  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
444  ret i8 %f
445}
446
447; Repeat everything for funnel shift right.
448
449; General case - all operands can be variables.
450
451define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
452; CHECK32-LABEL: fshr_i32:
453; CHECK32:       # %bb.0:
454; CHECK32-NEXT:    clrlwi 5, 5, 27
455; CHECK32-NEXT:    srw 4, 4, 5
456; CHECK32-NEXT:    subfic 5, 5, 32
457; CHECK32-NEXT:    slw 3, 3, 5
458; CHECK32-NEXT:    or 3, 3, 4
459; CHECK32-NEXT:    blr
460;
461; CHECK64-LABEL: fshr_i32:
462; CHECK64:       # %bb.0:
463; CHECK64-NEXT:    clrlwi 5, 5, 27
464; CHECK64-NEXT:    subfic 6, 5, 32
465; CHECK64-NEXT:    srw 4, 4, 5
466; CHECK64-NEXT:    slw 3, 3, 6
467; CHECK64-NEXT:    or 3, 3, 4
468; CHECK64-NEXT:    blr
469  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
470  ret i32 %f
471}
472
473define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
474; CHECK32_32-LABEL: fshr_i64:
475; CHECK32_32:       # %bb.0:
476; CHECK32_32-NEXT:    andi. 7, 8, 32
477; CHECK32_32-NEXT:    clrlwi 7, 8, 27
478; CHECK32_32-NEXT:    subfic 8, 7, 32
479; CHECK32_32-NEXT:    bc 12, 2, .LBB10_2
480; CHECK32_32-NEXT:  # %bb.1:
481; CHECK32_32-NEXT:    ori 9, 4, 0
482; CHECK32_32-NEXT:    ori 4, 5, 0
483; CHECK32_32-NEXT:    b .LBB10_3
484; CHECK32_32-NEXT:  .LBB10_2:
485; CHECK32_32-NEXT:    addi 9, 5, 0
486; CHECK32_32-NEXT:    addi 3, 4, 0
487; CHECK32_32-NEXT:    addi 4, 6, 0
488; CHECK32_32-NEXT:  .LBB10_3:
489; CHECK32_32-NEXT:    srw 5, 9, 7
490; CHECK32_32-NEXT:    slw 3, 3, 8
491; CHECK32_32-NEXT:    srw 4, 4, 7
492; CHECK32_32-NEXT:    slw 6, 9, 8
493; CHECK32_32-NEXT:    or 3, 3, 5
494; CHECK32_32-NEXT:    or 4, 6, 4
495; CHECK32_32-NEXT:    blr
496;
497; CHECK32_64-LABEL: fshr_i64:
498; CHECK32_64:       # %bb.0:
499; CHECK32_64-NEXT:    andi. 7, 8, 32
500; CHECK32_64-NEXT:    clrlwi 7, 8, 27
501; CHECK32_64-NEXT:    bc 12, 2, .LBB10_2
502; CHECK32_64-NEXT:  # %bb.1:
503; CHECK32_64-NEXT:    ori 9, 4, 0
504; CHECK32_64-NEXT:    b .LBB10_3
505; CHECK32_64-NEXT:  .LBB10_2:
506; CHECK32_64-NEXT:    addi 9, 5, 0
507; CHECK32_64-NEXT:    addi 3, 4, 0
508; CHECK32_64-NEXT:    addi 5, 6, 0
509; CHECK32_64-NEXT:  .LBB10_3:
510; CHECK32_64-NEXT:    subfic 8, 7, 32
511; CHECK32_64-NEXT:    srw 4, 9, 7
512; CHECK32_64-NEXT:    slw 3, 3, 8
513; CHECK32_64-NEXT:    srw 5, 5, 7
514; CHECK32_64-NEXT:    slw 6, 9, 8
515; CHECK32_64-NEXT:    or 3, 3, 4
516; CHECK32_64-NEXT:    or 4, 6, 5
517; CHECK32_64-NEXT:    blr
518;
519; CHECK64-LABEL: fshr_i64:
520; CHECK64:       # %bb.0:
521; CHECK64-NEXT:    clrlwi 5, 5, 26
522; CHECK64-NEXT:    subfic 6, 5, 64
523; CHECK64-NEXT:    srd 4, 4, 5
524; CHECK64-NEXT:    sld 3, 3, 6
525; CHECK64-NEXT:    or 3, 3, 4
526; CHECK64-NEXT:    blr
527  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
528  ret i64 %f
529}
530
531; Verify that weird types are minimally supported.
532declare i37 @llvm.fshr.i37(i37, i37, i37)
533define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
534; CHECK32_32-LABEL: fshr_i37:
535; CHECK32_32:       # %bb.0:
536; CHECK32_32-NEXT:    mflr 0
537; CHECK32_32-NEXT:    stw 0, 4(1)
538; CHECK32_32-NEXT:    stwu 1, -32(1)
539; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
540; CHECK32_32-NEXT:    .cfi_offset lr, 4
541; CHECK32_32-NEXT:    .cfi_offset r27, -20
542; CHECK32_32-NEXT:    .cfi_offset r28, -16
543; CHECK32_32-NEXT:    .cfi_offset r29, -12
544; CHECK32_32-NEXT:    .cfi_offset r30, -8
545; CHECK32_32-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
546; CHECK32_32-NEXT:    mr 27, 3
547; CHECK32_32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
548; CHECK32_32-NEXT:    mr 28, 4
549; CHECK32_32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
550; CHECK32_32-NEXT:    mr 29, 5
551; CHECK32_32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
552; CHECK32_32-NEXT:    mr 30, 6
553; CHECK32_32-NEXT:    clrlwi 3, 7, 27
554; CHECK32_32-NEXT:    mr 4, 8
555; CHECK32_32-NEXT:    li 5, 0
556; CHECK32_32-NEXT:    li 6, 37
557; CHECK32_32-NEXT:    bl __umoddi3
558; CHECK32_32-NEXT:    rotlwi 3, 30, 27
559; CHECK32_32-NEXT:    addi 4, 4, 27
560; CHECK32_32-NEXT:    slwi 5, 30, 27
561; CHECK32_32-NEXT:    rlwimi 3, 29, 27, 0, 4
562; CHECK32_32-NEXT:    andi. 6, 4, 32
563; CHECK32_32-NEXT:    clrlwi 4, 4, 27
564; CHECK32_32-NEXT:    subfic 6, 4, 32
565; CHECK32_32-NEXT:    bc 12, 2, .LBB11_2
566; CHECK32_32-NEXT:  # %bb.1:
567; CHECK32_32-NEXT:    ori 7, 28, 0
568; CHECK32_32-NEXT:    ori 8, 27, 0
569; CHECK32_32-NEXT:    b .LBB11_3
570; CHECK32_32-NEXT:  .LBB11_2:
571; CHECK32_32-NEXT:    addi 7, 3, 0
572; CHECK32_32-NEXT:    addi 8, 28, 0
573; CHECK32_32-NEXT:    addi 3, 5, 0
574; CHECK32_32-NEXT:  .LBB11_3:
575; CHECK32_32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
576; CHECK32_32-NEXT:    srw 5, 7, 4
577; CHECK32_32-NEXT:    slw 8, 8, 6
578; CHECK32_32-NEXT:    srw 4, 3, 4
579; CHECK32_32-NEXT:    slw 6, 7, 6
580; CHECK32_32-NEXT:    or 3, 8, 5
581; CHECK32_32-NEXT:    or 4, 6, 4
582; CHECK32_32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
583; CHECK32_32-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
584; CHECK32_32-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
585; CHECK32_32-NEXT:    lwz 0, 36(1)
586; CHECK32_32-NEXT:    addi 1, 1, 32
587; CHECK32_32-NEXT:    mtlr 0
588; CHECK32_32-NEXT:    blr
589;
590; CHECK32_64-LABEL: fshr_i37:
591; CHECK32_64:       # %bb.0:
592; CHECK32_64-NEXT:    mflr 0
593; CHECK32_64-NEXT:    stw 0, 4(1)
594; CHECK32_64-NEXT:    stwu 1, -32(1)
595; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
596; CHECK32_64-NEXT:    .cfi_offset lr, 4
597; CHECK32_64-NEXT:    .cfi_offset r27, -20
598; CHECK32_64-NEXT:    .cfi_offset r28, -16
599; CHECK32_64-NEXT:    .cfi_offset r29, -12
600; CHECK32_64-NEXT:    .cfi_offset r30, -8
601; CHECK32_64-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
602; CHECK32_64-NEXT:    mr 27, 3
603; CHECK32_64-NEXT:    clrlwi 3, 7, 27
604; CHECK32_64-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
605; CHECK32_64-NEXT:    mr 28, 4
606; CHECK32_64-NEXT:    mr 4, 8
607; CHECK32_64-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
608; CHECK32_64-NEXT:    mr 29, 5
609; CHECK32_64-NEXT:    li 5, 0
610; CHECK32_64-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
611; CHECK32_64-NEXT:    mr 30, 6
612; CHECK32_64-NEXT:    li 6, 37
613; CHECK32_64-NEXT:    bl __umoddi3
614; CHECK32_64-NEXT:    addi 4, 4, 27
615; CHECK32_64-NEXT:    rotlwi 3, 30, 27
616; CHECK32_64-NEXT:    andi. 5, 4, 32
617; CHECK32_64-NEXT:    rlwimi 3, 29, 27, 0, 4
618; CHECK32_64-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
619; CHECK32_64-NEXT:    bc 12, 2, .LBB11_2
620; CHECK32_64-NEXT:  # %bb.1:
621; CHECK32_64-NEXT:    ori 7, 28, 0
622; CHECK32_64-NEXT:    ori 8, 27, 0
623; CHECK32_64-NEXT:    b .LBB11_3
624; CHECK32_64-NEXT:  .LBB11_2:
625; CHECK32_64-NEXT:    addi 7, 3, 0
626; CHECK32_64-NEXT:    addi 8, 28, 0
627; CHECK32_64-NEXT:  .LBB11_3:
628; CHECK32_64-NEXT:    clrlwi 4, 4, 27
629; CHECK32_64-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
630; CHECK32_64-NEXT:    slwi 5, 30, 27
631; CHECK32_64-NEXT:    subfic 6, 4, 32
632; CHECK32_64-NEXT:    bc 12, 2, .LBB11_4
633; CHECK32_64-NEXT:    b .LBB11_5
634; CHECK32_64-NEXT:  .LBB11_4:
635; CHECK32_64-NEXT:    addi 3, 5, 0
636; CHECK32_64-NEXT:  .LBB11_5:
637; CHECK32_64-NEXT:    srw 9, 7, 4
638; CHECK32_64-NEXT:    slw 8, 8, 6
639; CHECK32_64-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
640; CHECK32_64-NEXT:    srw 4, 3, 4
641; CHECK32_64-NEXT:    slw 5, 7, 6
642; CHECK32_64-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
643; CHECK32_64-NEXT:    or 3, 8, 9
644; CHECK32_64-NEXT:    or 4, 5, 4
645; CHECK32_64-NEXT:    lwz 0, 36(1)
646; CHECK32_64-NEXT:    addi 1, 1, 32
647; CHECK32_64-NEXT:    mtlr 0
648; CHECK32_64-NEXT:    blr
649;
650; CHECK64-LABEL: fshr_i37:
651; CHECK64:       # %bb.0:
652; CHECK64-NEXT:    lis 6, 28339
653; CHECK64-NEXT:    clrldi 7, 5, 27
654; CHECK64-NEXT:    ori 6, 6, 58451
655; CHECK64-NEXT:    sldi 4, 4, 27
656; CHECK64-NEXT:    rldic 6, 6, 33, 0
657; CHECK64-NEXT:    oris 6, 6, 3542
658; CHECK64-NEXT:    ori 6, 6, 31883
659; CHECK64-NEXT:    mulhdu 6, 7, 6
660; CHECK64-NEXT:    rldicl 6, 6, 59, 5
661; CHECK64-NEXT:    mulli 6, 6, 37
662; CHECK64-NEXT:    sub 5, 5, 6
663; CHECK64-NEXT:    addi 5, 5, 27
664; CHECK64-NEXT:    clrlwi 5, 5, 26
665; CHECK64-NEXT:    subfic 6, 5, 64
666; CHECK64-NEXT:    srd 4, 4, 5
667; CHECK64-NEXT:    sld 3, 3, 6
668; CHECK64-NEXT:    or 3, 3, 4
669; CHECK64-NEXT:    blr
670  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
671  ret i37 %f
672}
673
674; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
675
676declare i7 @llvm.fshr.i7(i7, i7, i7)
677define i7 @fshr_i7_const_fold() {
678; CHECK-LABEL: fshr_i7_const_fold:
679; CHECK:       # %bb.0:
680; CHECK-NEXT:    li 3, 31
681; CHECK-NEXT:    blr
682  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
683  ret i7 %f
684}
685
686; With constant shift amount, this is rotate + insert (missing extended mnemonics).
687
688define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
689; CHECK-LABEL: fshr_i32_const_shift:
690; CHECK:       # %bb.0:
691; CHECK-NEXT:    rotlwi 4, 4, 23
692; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
693; CHECK-NEXT:    mr 3, 4
694; CHECK-NEXT:    blr
695  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
696  ret i32 %f
697}
698
699; Check modulo math on shift amount. 41-32=9.
700
701define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
702; CHECK-LABEL: fshr_i32_const_overshift:
703; CHECK:       # %bb.0:
704; CHECK-NEXT:    rotlwi 4, 4, 23
705; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
706; CHECK-NEXT:    mr 3, 4
707; CHECK-NEXT:    blr
708  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
709  ret i32 %f
710}
711
712; 64-bit should also work. 105-64 = 41.
713
714define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
715; CHECK32-LABEL: fshr_i64_const_overshift:
716; CHECK32:       # %bb.0:
717; CHECK32-NEXT:    rotlwi 6, 4, 23
718; CHECK32-NEXT:    rotlwi 5, 5, 23
719; CHECK32-NEXT:    rlwimi 6, 3, 23, 0, 8
720; CHECK32-NEXT:    rlwimi 5, 4, 23, 0, 8
721; CHECK32-NEXT:    mr 3, 6
722; CHECK32-NEXT:    mr 4, 5
723; CHECK32-NEXT:    blr
724;
725; CHECK64-LABEL: fshr_i64_const_overshift:
726; CHECK64:       # %bb.0:
727; CHECK64-NEXT:    rotldi 4, 4, 23
728; CHECK64-NEXT:    rldimi 4, 3, 23, 0
729; CHECK64-NEXT:    mr 3, 4
730; CHECK64-NEXT:    blr
731  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
732  ret i64 %f
733}
734
735; This should work without any node-specific logic.
736
737define i8 @fshr_i8_const_fold() {
738; CHECK-LABEL: fshr_i8_const_fold:
739; CHECK:       # %bb.0:
740; CHECK-NEXT:    li 3, 254
741; CHECK-NEXT:    blr
742  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
743  ret i8 %f
744}
745
746define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
747; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
748; CHECK:       # %bb.0:
749; CHECK-NEXT:    blr
750  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
751  ret i32 %f
752}
753
754define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
755; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
756; CHECK:       # %bb.0:
757; CHECK-NEXT:    mr 3, 4
758; CHECK-NEXT:    blr
759  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
760  ret i32 %f
761}
762
763define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
764; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
765; CHECK:       # %bb.0:
766; CHECK-NEXT:    blr
767  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
768  ret <4 x i32> %f
769}
770
771define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
772; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth:
773; CHECK32_32:       # %bb.0:
774; CHECK32_32-NEXT:    mr 6, 10
775; CHECK32_32-NEXT:    mr 5, 9
776; CHECK32_32-NEXT:    mr 4, 8
777; CHECK32_32-NEXT:    mr 3, 7
778; CHECK32_32-NEXT:    blr
779;
780; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth:
781; CHECK32_64:       # %bb.0:
782; CHECK32_64-NEXT:    vmr 2, 3
783; CHECK32_64-NEXT:    blr
784;
785; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth:
786; CHECK64:       # %bb.0:
787; CHECK64-NEXT:    vmr 2, 3
788; CHECK64-NEXT:    blr
789  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
790  ret <4 x i32> %f
791}
792
793