1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
5
6declare i8 @llvm.fshl.i8(i8, i8, i8)
7declare i16 @llvm.fshl.i16(i16, i16, i16)
8declare i32 @llvm.fshl.i32(i32, i32, i32)
9declare i64 @llvm.fshl.i64(i64, i64, i64)
10declare i128 @llvm.fshl.i128(i128, i128, i128)
11declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
12
13declare i8 @llvm.fshr.i8(i8, i8, i8)
14declare i16 @llvm.fshr.i16(i16, i16, i16)
15declare i32 @llvm.fshr.i32(i32, i32, i32)
16declare i64 @llvm.fshr.i64(i64, i64, i64)
17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
18
19; General case - all operands can be variables.
20
21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
22; CHECK32-LABEL: fshl_i32:
23; CHECK32:       # %bb.0:
24; CHECK32-NEXT:    clrlwi 5, 5, 27
25; CHECK32-NEXT:    slw 3, 3, 5
26; CHECK32-NEXT:    subfic 5, 5, 32
27; CHECK32-NEXT:    srw 4, 4, 5
28; CHECK32-NEXT:    or 3, 3, 4
29; CHECK32-NEXT:    blr
30;
31; CHECK64-LABEL: fshl_i32:
32; CHECK64:       # %bb.0:
33; CHECK64-NEXT:    clrlwi 5, 5, 27
34; CHECK64-NEXT:    subfic 6, 5, 32
35; CHECK64-NEXT:    slw 3, 3, 5
36; CHECK64-NEXT:    srw 4, 4, 6
37; CHECK64-NEXT:    or 3, 3, 4
38; CHECK64-NEXT:    blr
39  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
40  ret i32 %f
41}
42
43define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
44; CHECK32_32-LABEL: fshl_i64:
45; CHECK32_32:       # %bb.0:
46; CHECK32_32-NEXT:    andi. 7, 8, 32
47; CHECK32_32-NEXT:    clrlwi 7, 8, 27
48; CHECK32_32-NEXT:    subfic 8, 7, 32
49; CHECK32_32-NEXT:    bc 12, 2, .LBB1_2
50; CHECK32_32-NEXT:  # %bb.1:
51; CHECK32_32-NEXT:    ori 9, 5, 0
52; CHECK32_32-NEXT:    ori 3, 4, 0
53; CHECK32_32-NEXT:    ori 4, 6, 0
54; CHECK32_32-NEXT:    b .LBB1_3
55; CHECK32_32-NEXT:  .LBB1_2:
56; CHECK32_32-NEXT:    addi 9, 4, 0
57; CHECK32_32-NEXT:    addi 4, 5, 0
58; CHECK32_32-NEXT:  .LBB1_3:
59; CHECK32_32-NEXT:    srw 5, 9, 8
60; CHECK32_32-NEXT:    slw 3, 3, 7
61; CHECK32_32-NEXT:    srw 4, 4, 8
62; CHECK32_32-NEXT:    slw 6, 9, 7
63; CHECK32_32-NEXT:    or 3, 3, 5
64; CHECK32_32-NEXT:    or 4, 6, 4
65; CHECK32_32-NEXT:    blr
66;
67; CHECK32_64-LABEL: fshl_i64:
68; CHECK32_64:       # %bb.0:
69; CHECK32_64-NEXT:    andi. 7, 8, 32
70; CHECK32_64-NEXT:    clrlwi 7, 8, 27
71; CHECK32_64-NEXT:    bc 12, 2, .LBB1_2
72; CHECK32_64-NEXT:  # %bb.1:
73; CHECK32_64-NEXT:    ori 9, 5, 0
74; CHECK32_64-NEXT:    ori 3, 4, 0
75; CHECK32_64-NEXT:    ori 5, 6, 0
76; CHECK32_64-NEXT:    b .LBB1_3
77; CHECK32_64-NEXT:  .LBB1_2:
78; CHECK32_64-NEXT:    addi 9, 4, 0
79; CHECK32_64-NEXT:  .LBB1_3:
80; CHECK32_64-NEXT:    subfic 8, 7, 32
81; CHECK32_64-NEXT:    srw 4, 9, 8
82; CHECK32_64-NEXT:    slw 3, 3, 7
83; CHECK32_64-NEXT:    srw 5, 5, 8
84; CHECK32_64-NEXT:    slw 6, 9, 7
85; CHECK32_64-NEXT:    or 3, 3, 4
86; CHECK32_64-NEXT:    or 4, 6, 5
87; CHECK32_64-NEXT:    blr
88;
89; CHECK64-LABEL: fshl_i64:
90; CHECK64:       # %bb.0:
91; CHECK64-NEXT:    clrlwi 5, 5, 26
92; CHECK64-NEXT:    subfic 6, 5, 64
93; CHECK64-NEXT:    sld 3, 3, 5
94; CHECK64-NEXT:    srd 4, 4, 6
95; CHECK64-NEXT:    or 3, 3, 4
96; CHECK64-NEXT:    blr
97  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
98  ret i64 %f
99}
100
101define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
102; CHECK32_32-LABEL: fshl_i128:
103; CHECK32_32:       # %bb.0:
104; CHECK32_32-NEXT:    lwz 11, 20(1)
105; CHECK32_32-NEXT:    andi. 12, 11, 64
106; CHECK32_32-NEXT:    mcrf 1, 0
107; CHECK32_32-NEXT:    andi. 12, 11, 32
108; CHECK32_32-NEXT:    clrlwi 11, 11, 27
109; CHECK32_32-NEXT:    bc 12, 6, .LBB2_2
110; CHECK32_32-NEXT:  # %bb.1:
111; CHECK32_32-NEXT:    ori 4, 6, 0
112; CHECK32_32-NEXT:    ori 12, 7, 0
113; CHECK32_32-NEXT:    ori 3, 5, 0
114; CHECK32_32-NEXT:    ori 5, 8, 0
115; CHECK32_32-NEXT:    ori 6, 9, 0
116; CHECK32_32-NEXT:    ori 7, 10, 0
117; CHECK32_32-NEXT:    b .LBB2_3
118; CHECK32_32-NEXT:  .LBB2_2:
119; CHECK32_32-NEXT:    addi 12, 5, 0
120; CHECK32_32-NEXT:    addi 5, 6, 0
121; CHECK32_32-NEXT:    addi 6, 7, 0
122; CHECK32_32-NEXT:    addi 7, 8, 0
123; CHECK32_32-NEXT:  .LBB2_3:
124; CHECK32_32-NEXT:    subfic 8, 11, 32
125; CHECK32_32-NEXT:    bc 12, 2, .LBB2_5
126; CHECK32_32-NEXT:  # %bb.4:
127; CHECK32_32-NEXT:    ori 9, 12, 0
128; CHECK32_32-NEXT:    ori 3, 4, 0
129; CHECK32_32-NEXT:    ori 4, 5, 0
130; CHECK32_32-NEXT:    ori 5, 6, 0
131; CHECK32_32-NEXT:    ori 6, 7, 0
132; CHECK32_32-NEXT:    b .LBB2_6
133; CHECK32_32-NEXT:  .LBB2_5:
134; CHECK32_32-NEXT:    addi 9, 4, 0
135; CHECK32_32-NEXT:    addi 4, 12, 0
136; CHECK32_32-NEXT:  .LBB2_6:
137; CHECK32_32-NEXT:    srw 7, 9, 8
138; CHECK32_32-NEXT:    slw 3, 3, 11
139; CHECK32_32-NEXT:    srw 10, 4, 8
140; CHECK32_32-NEXT:    slw 9, 9, 11
141; CHECK32_32-NEXT:    srw 12, 5, 8
142; CHECK32_32-NEXT:    slw 0, 4, 11
143; CHECK32_32-NEXT:    srw 6, 6, 8
144; CHECK32_32-NEXT:    slw 8, 5, 11
145; CHECK32_32-NEXT:    or 3, 3, 7
146; CHECK32_32-NEXT:    or 4, 9, 10
147; CHECK32_32-NEXT:    or 5, 0, 12
148; CHECK32_32-NEXT:    or 6, 8, 6
149; CHECK32_32-NEXT:    blr
150;
151; CHECK32_64-LABEL: fshl_i128:
152; CHECK32_64:       # %bb.0:
153; CHECK32_64-NEXT:    stwu 1, -16(1)
154; CHECK32_64-NEXT:    lwz 11, 36(1)
155; CHECK32_64-NEXT:    andi. 12, 11, 64
156; CHECK32_64-NEXT:    stw 30, 8(1) # 4-byte Folded Spill
157; CHECK32_64-NEXT:    mcrf 1, 0
158; CHECK32_64-NEXT:    clrlwi 12, 11, 27
159; CHECK32_64-NEXT:    andi. 11, 11, 32
160; CHECK32_64-NEXT:    bc 12, 6, .LBB2_2
161; CHECK32_64-NEXT:  # %bb.1:
162; CHECK32_64-NEXT:    ori 4, 6, 0
163; CHECK32_64-NEXT:    ori 30, 7, 0
164; CHECK32_64-NEXT:    ori 3, 5, 0
165; CHECK32_64-NEXT:    ori 7, 9, 0
166; CHECK32_64-NEXT:    b .LBB2_3
167; CHECK32_64-NEXT:  .LBB2_2:
168; CHECK32_64-NEXT:    addi 30, 5, 0
169; CHECK32_64-NEXT:  .LBB2_3:
170; CHECK32_64-NEXT:    bc 12, 2, .LBB2_5
171; CHECK32_64-NEXT:  # %bb.4:
172; CHECK32_64-NEXT:    ori 5, 30, 0
173; CHECK32_64-NEXT:    ori 3, 4, 0
174; CHECK32_64-NEXT:    b .LBB2_6
175; CHECK32_64-NEXT:  .LBB2_5:
176; CHECK32_64-NEXT:    addi 5, 4, 0
177; CHECK32_64-NEXT:  .LBB2_6:
178; CHECK32_64-NEXT:    bc 12, 6, .LBB2_8
179; CHECK32_64-NEXT:  # %bb.7:
180; CHECK32_64-NEXT:    ori 4, 8, 0
181; CHECK32_64-NEXT:    ori 8, 10, 0
182; CHECK32_64-NEXT:    b .LBB2_9
183; CHECK32_64-NEXT:  .LBB2_8:
184; CHECK32_64-NEXT:    addi 4, 6, 0
185; CHECK32_64-NEXT:  .LBB2_9:
186; CHECK32_64-NEXT:    subfic 11, 12, 32
187; CHECK32_64-NEXT:    bc 12, 2, .LBB2_11
188; CHECK32_64-NEXT:  # %bb.10:
189; CHECK32_64-NEXT:    ori 0, 4, 0
190; CHECK32_64-NEXT:    ori 4, 7, 0
191; CHECK32_64-NEXT:    ori 7, 8, 0
192; CHECK32_64-NEXT:    b .LBB2_12
193; CHECK32_64-NEXT:  .LBB2_11:
194; CHECK32_64-NEXT:    addi 0, 30, 0
195; CHECK32_64-NEXT:  .LBB2_12:
196; CHECK32_64-NEXT:    srw 6, 5, 11
197; CHECK32_64-NEXT:    lwz 30, 8(1) # 4-byte Folded Reload
198; CHECK32_64-NEXT:    slw 3, 3, 12
199; CHECK32_64-NEXT:    srw 9, 0, 11
200; CHECK32_64-NEXT:    slw 5, 5, 12
201; CHECK32_64-NEXT:    srw 10, 4, 11
202; CHECK32_64-NEXT:    slw 0, 0, 12
203; CHECK32_64-NEXT:    srw 7, 7, 11
204; CHECK32_64-NEXT:    slw 8, 4, 12
205; CHECK32_64-NEXT:    or 3, 3, 6
206; CHECK32_64-NEXT:    or 4, 5, 9
207; CHECK32_64-NEXT:    or 5, 0, 10
208; CHECK32_64-NEXT:    or 6, 8, 7
209; CHECK32_64-NEXT:    addi 1, 1, 16
210; CHECK32_64-NEXT:    blr
211;
212; CHECK64-LABEL: fshl_i128:
213; CHECK64:       # %bb.0:
214; CHECK64-NEXT:    andi. 8, 7, 64
215; CHECK64-NEXT:    clrlwi 7, 7, 26
216; CHECK64-NEXT:    iseleq 5, 6, 5
217; CHECK64-NEXT:    subfic 8, 7, 64
218; CHECK64-NEXT:    iseleq 6, 3, 6
219; CHECK64-NEXT:    iseleq 3, 4, 3
220; CHECK64-NEXT:    srd 4, 5, 8
221; CHECK64-NEXT:    sld 5, 6, 7
222; CHECK64-NEXT:    srd 6, 6, 8
223; CHECK64-NEXT:    sld 7, 3, 7
224; CHECK64-NEXT:    or 3, 5, 4
225; CHECK64-NEXT:    or 4, 7, 6
226; CHECK64-NEXT:    blr
227  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
228  ret i128 %f
229}
230
231; Verify that weird types are minimally supported.
232declare i37 @llvm.fshl.i37(i37, i37, i37)
233define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
234; CHECK32_32-LABEL: fshl_i37:
235; CHECK32_32:       # %bb.0:
236; CHECK32_32-NEXT:    mflr 0
237; CHECK32_32-NEXT:    stw 0, 4(1)
238; CHECK32_32-NEXT:    stwu 1, -32(1)
239; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
240; CHECK32_32-NEXT:    .cfi_offset lr, 4
241; CHECK32_32-NEXT:    .cfi_offset r27, -20
242; CHECK32_32-NEXT:    .cfi_offset r28, -16
243; CHECK32_32-NEXT:    .cfi_offset r29, -12
244; CHECK32_32-NEXT:    .cfi_offset r30, -8
245; CHECK32_32-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
246; CHECK32_32-NEXT:    mr 27, 3
247; CHECK32_32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
248; CHECK32_32-NEXT:    mr 28, 4
249; CHECK32_32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
250; CHECK32_32-NEXT:    mr 29, 5
251; CHECK32_32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
252; CHECK32_32-NEXT:    mr 30, 6
253; CHECK32_32-NEXT:    mr 3, 7
254; CHECK32_32-NEXT:    mr 4, 8
255; CHECK32_32-NEXT:    li 5, 0
256; CHECK32_32-NEXT:    li 6, 37
257; CHECK32_32-NEXT:    bl __umoddi3
258; CHECK32_32-NEXT:    rotlwi 3, 30, 27
259; CHECK32_32-NEXT:    slwi 5, 30, 27
260; CHECK32_32-NEXT:    andi. 6, 4, 32
261; CHECK32_32-NEXT:    rlwimi 3, 29, 27, 0, 4
262; CHECK32_32-NEXT:    clrlwi 4, 4, 27
263; CHECK32_32-NEXT:    subfic 6, 4, 32
264; CHECK32_32-NEXT:    bc 12, 2, .LBB3_2
265; CHECK32_32-NEXT:  # %bb.1:
266; CHECK32_32-NEXT:    ori 7, 3, 0
267; CHECK32_32-NEXT:    ori 8, 28, 0
268; CHECK32_32-NEXT:    ori 3, 5, 0
269; CHECK32_32-NEXT:    b .LBB3_3
270; CHECK32_32-NEXT:  .LBB3_2:
271; CHECK32_32-NEXT:    addi 7, 28, 0
272; CHECK32_32-NEXT:    addi 8, 27, 0
273; CHECK32_32-NEXT:  .LBB3_3:
274; CHECK32_32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
275; CHECK32_32-NEXT:    srw 5, 7, 6
276; CHECK32_32-NEXT:    slw 8, 8, 4
277; CHECK32_32-NEXT:    srw 6, 3, 6
278; CHECK32_32-NEXT:    slw 4, 7, 4
279; CHECK32_32-NEXT:    or 3, 8, 5
280; CHECK32_32-NEXT:    or 4, 4, 6
281; CHECK32_32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
282; CHECK32_32-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
283; CHECK32_32-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
284; CHECK32_32-NEXT:    lwz 0, 36(1)
285; CHECK32_32-NEXT:    addi 1, 1, 32
286; CHECK32_32-NEXT:    mtlr 0
287; CHECK32_32-NEXT:    blr
288;
289; CHECK32_64-LABEL: fshl_i37:
290; CHECK32_64:       # %bb.0:
291; CHECK32_64-NEXT:    mflr 0
292; CHECK32_64-NEXT:    stw 0, 4(1)
293; CHECK32_64-NEXT:    stwu 1, -32(1)
294; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
295; CHECK32_64-NEXT:    .cfi_offset lr, 4
296; CHECK32_64-NEXT:    .cfi_offset r27, -20
297; CHECK32_64-NEXT:    .cfi_offset r28, -16
298; CHECK32_64-NEXT:    .cfi_offset r29, -12
299; CHECK32_64-NEXT:    .cfi_offset r30, -8
300; CHECK32_64-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
301; CHECK32_64-NEXT:    mr 27, 3
302; CHECK32_64-NEXT:    mr 3, 7
303; CHECK32_64-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
304; CHECK32_64-NEXT:    mr 28, 4
305; CHECK32_64-NEXT:    mr 4, 8
306; CHECK32_64-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
307; CHECK32_64-NEXT:    mr 29, 5
308; CHECK32_64-NEXT:    li 5, 0
309; CHECK32_64-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
310; CHECK32_64-NEXT:    mr 30, 6
311; CHECK32_64-NEXT:    li 6, 37
312; CHECK32_64-NEXT:    bl __umoddi3
313; CHECK32_64-NEXT:    rotlwi 3, 30, 27
314; CHECK32_64-NEXT:    andi. 5, 4, 32
315; CHECK32_64-NEXT:    bc 12, 2, .LBB3_2
316; CHECK32_64-NEXT:  # %bb.1:
317; CHECK32_64-NEXT:    ori 8, 28, 0
318; CHECK32_64-NEXT:    b .LBB3_3
319; CHECK32_64-NEXT:  .LBB3_2:
320; CHECK32_64-NEXT:    addi 8, 27, 0
321; CHECK32_64-NEXT:  .LBB3_3:
322; CHECK32_64-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
323; CHECK32_64-NEXT:    rlwimi 3, 29, 27, 0, 4
324; CHECK32_64-NEXT:    clrlwi 4, 4, 27
325; CHECK32_64-NEXT:    bc 12, 2, .LBB3_5
326; CHECK32_64-NEXT:  # %bb.4:
327; CHECK32_64-NEXT:    ori 7, 3, 0
328; CHECK32_64-NEXT:    b .LBB3_6
329; CHECK32_64-NEXT:  .LBB3_5:
330; CHECK32_64-NEXT:    addi 7, 28, 0
331; CHECK32_64-NEXT:  .LBB3_6:
332; CHECK32_64-NEXT:    slwi 5, 30, 27
333; CHECK32_64-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
334; CHECK32_64-NEXT:    bc 12, 2, .LBB3_8
335; CHECK32_64-NEXT:  # %bb.7:
336; CHECK32_64-NEXT:    ori 3, 5, 0
337; CHECK32_64-NEXT:    b .LBB3_8
338; CHECK32_64-NEXT:  .LBB3_8:
339; CHECK32_64-NEXT:    subfic 6, 4, 32
340; CHECK32_64-NEXT:    slw 8, 8, 4
341; CHECK32_64-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
342; CHECK32_64-NEXT:    srw 9, 7, 6
343; CHECK32_64-NEXT:    srw 5, 3, 6
344; CHECK32_64-NEXT:    slw 4, 7, 4
345; CHECK32_64-NEXT:    or 3, 8, 9
346; CHECK32_64-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
347; CHECK32_64-NEXT:    or 4, 4, 5
348; CHECK32_64-NEXT:    lwz 0, 36(1)
349; CHECK32_64-NEXT:    addi 1, 1, 32
350; CHECK32_64-NEXT:    mtlr 0
351; CHECK32_64-NEXT:    blr
352;
353; CHECK64-LABEL: fshl_i37:
354; CHECK64:       # %bb.0:
355; CHECK64-NEXT:    lis 6, 28339
356; CHECK64-NEXT:    sldi 4, 4, 27
357; CHECK64-NEXT:    ori 6, 6, 58451
358; CHECK64-NEXT:    rldic 6, 6, 33, 0
359; CHECK64-NEXT:    oris 6, 6, 3542
360; CHECK64-NEXT:    ori 6, 6, 31883
361; CHECK64-NEXT:    mulhdu 6, 5, 6
362; CHECK64-NEXT:    rldicl 6, 6, 59, 5
363; CHECK64-NEXT:    mulli 6, 6, 37
364; CHECK64-NEXT:    sub 5, 5, 6
365; CHECK64-NEXT:    clrlwi 5, 5, 26
366; CHECK64-NEXT:    subfic 6, 5, 64
367; CHECK64-NEXT:    sld 3, 3, 5
368; CHECK64-NEXT:    srd 4, 4, 6
369; CHECK64-NEXT:    or 3, 3, 4
370; CHECK64-NEXT:    blr
371  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
372  ret i37 %f
373}
374
375; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
376
377declare i7 @llvm.fshl.i7(i7, i7, i7)
378define i7 @fshl_i7_const_fold() {
379; CHECK-LABEL: fshl_i7_const_fold:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    li 3, 67
382; CHECK-NEXT:    blr
383  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
384  ret i7 %f
385}
386
387; With constant shift amount, this is rotate + insert (missing extended mnemonics).
388
389define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
390; CHECK-LABEL: fshl_i32_const_shift:
391; CHECK:       # %bb.0:
392; CHECK-NEXT:    rotlwi 4, 4, 9
393; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
394; CHECK-NEXT:    mr 3, 4
395; CHECK-NEXT:    blr
396  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
397  ret i32 %f
398}
399
400; Check modulo math on shift amount.
401
402define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
403; CHECK-LABEL: fshl_i32_const_overshift:
404; CHECK:       # %bb.0:
405; CHECK-NEXT:    rotlwi 4, 4, 9
406; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
407; CHECK-NEXT:    mr 3, 4
408; CHECK-NEXT:    blr
409  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
410  ret i32 %f
411}
412
413; 64-bit should also work.
414
415define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
416; CHECK32-LABEL: fshl_i64_const_overshift:
417; CHECK32:       # %bb.0:
418; CHECK32-NEXT:    rotlwi 6, 6, 9
419; CHECK32-NEXT:    rotlwi 3, 5, 9
420; CHECK32-NEXT:    rlwimi 6, 5, 9, 0, 22
421; CHECK32-NEXT:    rlwimi 3, 4, 9, 0, 22
422; CHECK32-NEXT:    mr 4, 6
423; CHECK32-NEXT:    blr
424;
425; CHECK64-LABEL: fshl_i64_const_overshift:
426; CHECK64:       # %bb.0:
427; CHECK64-NEXT:    rotldi 4, 4, 41
428; CHECK64-NEXT:    rldimi 4, 3, 41, 0
429; CHECK64-NEXT:    mr 3, 4
430; CHECK64-NEXT:    blr
431  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
432  ret i64 %f
433}
434
435; This should work without any node-specific logic.
436
437define i8 @fshl_i8_const_fold() {
438; CHECK-LABEL: fshl_i8_const_fold:
439; CHECK:       # %bb.0:
440; CHECK-NEXT:    li 3, 128
441; CHECK-NEXT:    blr
442  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
443  ret i8 %f
444}
445
446; Repeat everything for funnel shift right.
447
448; General case - all operands can be variables.
449
450define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
451; CHECK32-LABEL: fshr_i32:
452; CHECK32:       # %bb.0:
453; CHECK32-NEXT:    clrlwi 5, 5, 27
454; CHECK32-NEXT:    srw 4, 4, 5
455; CHECK32-NEXT:    subfic 5, 5, 32
456; CHECK32-NEXT:    slw 3, 3, 5
457; CHECK32-NEXT:    or 3, 3, 4
458; CHECK32-NEXT:    blr
459;
460; CHECK64-LABEL: fshr_i32:
461; CHECK64:       # %bb.0:
462; CHECK64-NEXT:    clrlwi 5, 5, 27
463; CHECK64-NEXT:    subfic 6, 5, 32
464; CHECK64-NEXT:    srw 4, 4, 5
465; CHECK64-NEXT:    slw 3, 3, 6
466; CHECK64-NEXT:    or 3, 3, 4
467; CHECK64-NEXT:    blr
468  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
469  ret i32 %f
470}
471
472define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
473; CHECK32_32-LABEL: fshr_i64:
474; CHECK32_32:       # %bb.0:
475; CHECK32_32-NEXT:    andi. 7, 8, 32
476; CHECK32_32-NEXT:    clrlwi 7, 8, 27
477; CHECK32_32-NEXT:    subfic 8, 7, 32
478; CHECK32_32-NEXT:    bc 12, 2, .LBB10_2
479; CHECK32_32-NEXT:  # %bb.1:
480; CHECK32_32-NEXT:    ori 9, 4, 0
481; CHECK32_32-NEXT:    ori 4, 5, 0
482; CHECK32_32-NEXT:    b .LBB10_3
483; CHECK32_32-NEXT:  .LBB10_2:
484; CHECK32_32-NEXT:    addi 9, 5, 0
485; CHECK32_32-NEXT:    addi 3, 4, 0
486; CHECK32_32-NEXT:    addi 4, 6, 0
487; CHECK32_32-NEXT:  .LBB10_3:
488; CHECK32_32-NEXT:    srw 5, 9, 7
489; CHECK32_32-NEXT:    slw 3, 3, 8
490; CHECK32_32-NEXT:    srw 4, 4, 7
491; CHECK32_32-NEXT:    slw 6, 9, 8
492; CHECK32_32-NEXT:    or 3, 3, 5
493; CHECK32_32-NEXT:    or 4, 6, 4
494; CHECK32_32-NEXT:    blr
495;
496; CHECK32_64-LABEL: fshr_i64:
497; CHECK32_64:       # %bb.0:
498; CHECK32_64-NEXT:    andi. 7, 8, 32
499; CHECK32_64-NEXT:    clrlwi 7, 8, 27
500; CHECK32_64-NEXT:    bc 12, 2, .LBB10_2
501; CHECK32_64-NEXT:  # %bb.1:
502; CHECK32_64-NEXT:    ori 9, 4, 0
503; CHECK32_64-NEXT:    b .LBB10_3
504; CHECK32_64-NEXT:  .LBB10_2:
505; CHECK32_64-NEXT:    addi 9, 5, 0
506; CHECK32_64-NEXT:    addi 3, 4, 0
507; CHECK32_64-NEXT:    addi 5, 6, 0
508; CHECK32_64-NEXT:  .LBB10_3:
509; CHECK32_64-NEXT:    subfic 8, 7, 32
510; CHECK32_64-NEXT:    srw 4, 9, 7
511; CHECK32_64-NEXT:    slw 3, 3, 8
512; CHECK32_64-NEXT:    srw 5, 5, 7
513; CHECK32_64-NEXT:    slw 6, 9, 8
514; CHECK32_64-NEXT:    or 3, 3, 4
515; CHECK32_64-NEXT:    or 4, 6, 5
516; CHECK32_64-NEXT:    blr
517;
518; CHECK64-LABEL: fshr_i64:
519; CHECK64:       # %bb.0:
520; CHECK64-NEXT:    clrlwi 5, 5, 26
521; CHECK64-NEXT:    subfic 6, 5, 64
522; CHECK64-NEXT:    srd 4, 4, 5
523; CHECK64-NEXT:    sld 3, 3, 6
524; CHECK64-NEXT:    or 3, 3, 4
525; CHECK64-NEXT:    blr
526  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
527  ret i64 %f
528}
529
530; Verify that weird types are minimally supported.
531declare i37 @llvm.fshr.i37(i37, i37, i37)
532define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
533; CHECK32_32-LABEL: fshr_i37:
534; CHECK32_32:       # %bb.0:
535; CHECK32_32-NEXT:    mflr 0
536; CHECK32_32-NEXT:    stw 0, 4(1)
537; CHECK32_32-NEXT:    stwu 1, -32(1)
538; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
539; CHECK32_32-NEXT:    .cfi_offset lr, 4
540; CHECK32_32-NEXT:    .cfi_offset r27, -20
541; CHECK32_32-NEXT:    .cfi_offset r28, -16
542; CHECK32_32-NEXT:    .cfi_offset r29, -12
543; CHECK32_32-NEXT:    .cfi_offset r30, -8
544; CHECK32_32-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
545; CHECK32_32-NEXT:    mr 27, 3
546; CHECK32_32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
547; CHECK32_32-NEXT:    mr 28, 4
548; CHECK32_32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
549; CHECK32_32-NEXT:    mr 29, 5
550; CHECK32_32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
551; CHECK32_32-NEXT:    mr 30, 6
552; CHECK32_32-NEXT:    mr 3, 7
553; CHECK32_32-NEXT:    mr 4, 8
554; CHECK32_32-NEXT:    li 5, 0
555; CHECK32_32-NEXT:    li 6, 37
556; CHECK32_32-NEXT:    bl __umoddi3
557; CHECK32_32-NEXT:    rotlwi 3, 30, 27
558; CHECK32_32-NEXT:    addi 4, 4, 27
559; CHECK32_32-NEXT:    slwi 5, 30, 27
560; CHECK32_32-NEXT:    rlwimi 3, 29, 27, 0, 4
561; CHECK32_32-NEXT:    andi. 6, 4, 32
562; CHECK32_32-NEXT:    clrlwi 4, 4, 27
563; CHECK32_32-NEXT:    subfic 6, 4, 32
564; CHECK32_32-NEXT:    bc 12, 2, .LBB11_2
565; CHECK32_32-NEXT:  # %bb.1:
566; CHECK32_32-NEXT:    ori 7, 28, 0
567; CHECK32_32-NEXT:    ori 8, 27, 0
568; CHECK32_32-NEXT:    b .LBB11_3
569; CHECK32_32-NEXT:  .LBB11_2:
570; CHECK32_32-NEXT:    addi 7, 3, 0
571; CHECK32_32-NEXT:    addi 8, 28, 0
572; CHECK32_32-NEXT:    addi 3, 5, 0
573; CHECK32_32-NEXT:  .LBB11_3:
574; CHECK32_32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
575; CHECK32_32-NEXT:    srw 5, 7, 4
576; CHECK32_32-NEXT:    slw 8, 8, 6
577; CHECK32_32-NEXT:    srw 4, 3, 4
578; CHECK32_32-NEXT:    slw 6, 7, 6
579; CHECK32_32-NEXT:    or 3, 8, 5
580; CHECK32_32-NEXT:    or 4, 6, 4
581; CHECK32_32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
582; CHECK32_32-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
583; CHECK32_32-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
584; CHECK32_32-NEXT:    lwz 0, 36(1)
585; CHECK32_32-NEXT:    addi 1, 1, 32
586; CHECK32_32-NEXT:    mtlr 0
587; CHECK32_32-NEXT:    blr
588;
589; CHECK32_64-LABEL: fshr_i37:
590; CHECK32_64:       # %bb.0:
591; CHECK32_64-NEXT:    mflr 0
592; CHECK32_64-NEXT:    stw 0, 4(1)
593; CHECK32_64-NEXT:    stwu 1, -32(1)
594; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
595; CHECK32_64-NEXT:    .cfi_offset lr, 4
596; CHECK32_64-NEXT:    .cfi_offset r27, -20
597; CHECK32_64-NEXT:    .cfi_offset r28, -16
598; CHECK32_64-NEXT:    .cfi_offset r29, -12
599; CHECK32_64-NEXT:    .cfi_offset r30, -8
600; CHECK32_64-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
601; CHECK32_64-NEXT:    mr 27, 3
602; CHECK32_64-NEXT:    mr 3, 7
603; CHECK32_64-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
604; CHECK32_64-NEXT:    mr 28, 4
605; CHECK32_64-NEXT:    mr 4, 8
606; CHECK32_64-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
607; CHECK32_64-NEXT:    mr 29, 5
608; CHECK32_64-NEXT:    li 5, 0
609; CHECK32_64-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
610; CHECK32_64-NEXT:    mr 30, 6
611; CHECK32_64-NEXT:    li 6, 37
612; CHECK32_64-NEXT:    bl __umoddi3
613; CHECK32_64-NEXT:    addi 4, 4, 27
614; CHECK32_64-NEXT:    rotlwi 3, 30, 27
615; CHECK32_64-NEXT:    andi. 5, 4, 32
616; CHECK32_64-NEXT:    rlwimi 3, 29, 27, 0, 4
617; CHECK32_64-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
618; CHECK32_64-NEXT:    bc 12, 2, .LBB11_2
619; CHECK32_64-NEXT:  # %bb.1:
620; CHECK32_64-NEXT:    ori 7, 28, 0
621; CHECK32_64-NEXT:    ori 8, 27, 0
622; CHECK32_64-NEXT:    b .LBB11_3
623; CHECK32_64-NEXT:  .LBB11_2:
624; CHECK32_64-NEXT:    addi 7, 3, 0
625; CHECK32_64-NEXT:    addi 8, 28, 0
626; CHECK32_64-NEXT:  .LBB11_3:
627; CHECK32_64-NEXT:    clrlwi 4, 4, 27
628; CHECK32_64-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
629; CHECK32_64-NEXT:    slwi 5, 30, 27
630; CHECK32_64-NEXT:    subfic 6, 4, 32
631; CHECK32_64-NEXT:    bc 12, 2, .LBB11_4
632; CHECK32_64-NEXT:    b .LBB11_5
633; CHECK32_64-NEXT:  .LBB11_4:
634; CHECK32_64-NEXT:    addi 3, 5, 0
635; CHECK32_64-NEXT:  .LBB11_5:
636; CHECK32_64-NEXT:    srw 9, 7, 4
637; CHECK32_64-NEXT:    slw 8, 8, 6
638; CHECK32_64-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
639; CHECK32_64-NEXT:    srw 4, 3, 4
640; CHECK32_64-NEXT:    slw 5, 7, 6
641; CHECK32_64-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
642; CHECK32_64-NEXT:    or 3, 8, 9
643; CHECK32_64-NEXT:    or 4, 5, 4
644; CHECK32_64-NEXT:    lwz 0, 36(1)
645; CHECK32_64-NEXT:    addi 1, 1, 32
646; CHECK32_64-NEXT:    mtlr 0
647; CHECK32_64-NEXT:    blr
648;
649; CHECK64-LABEL: fshr_i37:
650; CHECK64:       # %bb.0:
651; CHECK64-NEXT:    lis 6, 28339
652; CHECK64-NEXT:    sldi 4, 4, 27
653; CHECK64-NEXT:    ori 6, 6, 58451
654; CHECK64-NEXT:    rldic 6, 6, 33, 0
655; CHECK64-NEXT:    oris 6, 6, 3542
656; CHECK64-NEXT:    ori 6, 6, 31883
657; CHECK64-NEXT:    mulhdu 6, 5, 6
658; CHECK64-NEXT:    rldicl 6, 6, 59, 5
659; CHECK64-NEXT:    mulli 6, 6, 37
660; CHECK64-NEXT:    sub 5, 5, 6
661; CHECK64-NEXT:    addi 5, 5, 27
662; CHECK64-NEXT:    clrlwi 5, 5, 26
663; CHECK64-NEXT:    subfic 6, 5, 64
664; CHECK64-NEXT:    srd 4, 4, 5
665; CHECK64-NEXT:    sld 3, 3, 6
666; CHECK64-NEXT:    or 3, 3, 4
667; CHECK64-NEXT:    blr
668  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
669  ret i37 %f
670}
671
672; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
673
674declare i7 @llvm.fshr.i7(i7, i7, i7)
675define i7 @fshr_i7_const_fold() {
676; CHECK-LABEL: fshr_i7_const_fold:
677; CHECK:       # %bb.0:
678; CHECK-NEXT:    li 3, 31
679; CHECK-NEXT:    blr
680  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
681  ret i7 %f
682}
683
684; With constant shift amount, this is rotate + insert (missing extended mnemonics).
685
686define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
687; CHECK-LABEL: fshr_i32_const_shift:
688; CHECK:       # %bb.0:
689; CHECK-NEXT:    rotlwi 4, 4, 23
690; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
691; CHECK-NEXT:    mr 3, 4
692; CHECK-NEXT:    blr
693  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
694  ret i32 %f
695}
696
697; Check modulo math on shift amount. 41-32=9.
698
699define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
700; CHECK-LABEL: fshr_i32_const_overshift:
701; CHECK:       # %bb.0:
702; CHECK-NEXT:    rotlwi 4, 4, 23
703; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
704; CHECK-NEXT:    mr 3, 4
705; CHECK-NEXT:    blr
706  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
707  ret i32 %f
708}
709
710; 64-bit should also work. 105-64 = 41.
711
712define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
713; CHECK32-LABEL: fshr_i64_const_overshift:
714; CHECK32:       # %bb.0:
715; CHECK32-NEXT:    rotlwi 6, 4, 23
716; CHECK32-NEXT:    rotlwi 5, 5, 23
717; CHECK32-NEXT:    rlwimi 6, 3, 23, 0, 8
718; CHECK32-NEXT:    rlwimi 5, 4, 23, 0, 8
719; CHECK32-NEXT:    mr 3, 6
720; CHECK32-NEXT:    mr 4, 5
721; CHECK32-NEXT:    blr
722;
723; CHECK64-LABEL: fshr_i64_const_overshift:
724; CHECK64:       # %bb.0:
725; CHECK64-NEXT:    rotldi 4, 4, 23
726; CHECK64-NEXT:    rldimi 4, 3, 23, 0
727; CHECK64-NEXT:    mr 3, 4
728; CHECK64-NEXT:    blr
729  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
730  ret i64 %f
731}
732
733; This should work without any node-specific logic.
734
735define i8 @fshr_i8_const_fold() {
736; CHECK-LABEL: fshr_i8_const_fold:
737; CHECK:       # %bb.0:
738; CHECK-NEXT:    li 3, 254
739; CHECK-NEXT:    blr
740  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
741  ret i8 %f
742}
743
744define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
745; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
746; CHECK:       # %bb.0:
747; CHECK-NEXT:    blr
748  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
749  ret i32 %f
750}
751
752define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
753; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
754; CHECK:       # %bb.0:
755; CHECK-NEXT:    mr 3, 4
756; CHECK-NEXT:    blr
757  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
758  ret i32 %f
759}
760
761define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
762; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
763; CHECK:       # %bb.0:
764; CHECK-NEXT:    blr
765  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
766  ret <4 x i32> %f
767}
768
769define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
770; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth:
771; CHECK32_32:       # %bb.0:
772; CHECK32_32-NEXT:    mr 6, 10
773; CHECK32_32-NEXT:    mr 5, 9
774; CHECK32_32-NEXT:    mr 4, 8
775; CHECK32_32-NEXT:    mr 3, 7
776; CHECK32_32-NEXT:    blr
777;
778; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth:
779; CHECK32_64:       # %bb.0:
780; CHECK32_64-NEXT:    vmr 2, 3
781; CHECK32_64-NEXT:    blr
782;
783; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth:
784; CHECK64:       # %bb.0:
785; CHECK64-NEXT:    vmr 2, 3
786; CHECK64-NEXT:    blr
787  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
788  ret <4 x i32> %f
789}
790
791