1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-FAST
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X86,X86-SLOW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-FAST
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW
6
7declare i8 @llvm.fshr.i8(i8, i8, i8) nounwind readnone
8declare i16 @llvm.fshr.i16(i16, i16, i16) nounwind readnone
9declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone
10declare i64 @llvm.fshr.i64(i64, i64, i64) nounwind readnone
11declare i128 @llvm.fshr.i128(i128, i128, i128) nounwind readnone
12
13;
14; Variable Funnel Shift
15;
16
17define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
18; X86-LABEL: var_shift_i8:
19; X86:       # %bb.0:
20; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
21; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
22; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
23; X86-NEXT:    shll $8, %eax
24; X86-NEXT:    orl %edx, %eax
25; X86-NEXT:    andb $7, %cl
26; X86-NEXT:    shrl %cl, %eax
27; X86-NEXT:    # kill: def $al killed $al killed $eax
28; X86-NEXT:    retl
29;
30; X64-LABEL: var_shift_i8:
31; X64:       # %bb.0:
32; X64-NEXT:    movl %edx, %ecx
33; X64-NEXT:    shll $8, %edi
34; X64-NEXT:    movzbl %sil, %eax
35; X64-NEXT:    orl %edi, %eax
36; X64-NEXT:    andb $7, %cl
37; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
38; X64-NEXT:    shrl %cl, %eax
39; X64-NEXT:    # kill: def $al killed $al killed $eax
40; X64-NEXT:    retq
41  %tmp = tail call i8 @llvm.fshr.i8(i8 %x, i8 %y, i8 %z)
42  ret i8 %tmp
43}
44
45define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
46; X86-FAST-LABEL: var_shift_i16:
47; X86-FAST:       # %bb.0:
48; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
49; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
50; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
51; X86-FAST-NEXT:    andb $15, %cl
52; X86-FAST-NEXT:    shrdw %cl, %dx, %ax
53; X86-FAST-NEXT:    retl
54;
55; X86-SLOW-LABEL: var_shift_i16:
56; X86-SLOW:       # %bb.0:
57; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
58; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
59; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
60; X86-SLOW-NEXT:    shll $16, %eax
61; X86-SLOW-NEXT:    orl %edx, %eax
62; X86-SLOW-NEXT:    andb $15, %cl
63; X86-SLOW-NEXT:    shrl %cl, %eax
64; X86-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
65; X86-SLOW-NEXT:    retl
66;
67; X64-FAST-LABEL: var_shift_i16:
68; X64-FAST:       # %bb.0:
69; X64-FAST-NEXT:    movl %edx, %ecx
70; X64-FAST-NEXT:    movl %esi, %eax
71; X64-FAST-NEXT:    andb $15, %cl
72; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
73; X64-FAST-NEXT:    shrdw %cl, %di, %ax
74; X64-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
75; X64-FAST-NEXT:    retq
76;
77; X64-SLOW-LABEL: var_shift_i16:
78; X64-SLOW:       # %bb.0:
79; X64-SLOW-NEXT:    movl %edx, %ecx
80; X64-SLOW-NEXT:    shll $16, %edi
81; X64-SLOW-NEXT:    movzwl %si, %eax
82; X64-SLOW-NEXT:    orl %edi, %eax
83; X64-SLOW-NEXT:    andb $15, %cl
84; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
85; X64-SLOW-NEXT:    shrl %cl, %eax
86; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
87; X64-SLOW-NEXT:    retq
88  %tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
89  ret i16 %tmp
90}
91
92define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
93; X86-FAST-LABEL: var_shift_i32:
94; X86-FAST:       # %bb.0:
95; X86-FAST-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
96; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
97; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
98; X86-FAST-NEXT:    shrdl %cl, %edx, %eax
99; X86-FAST-NEXT:    retl
100;
101; X86-SLOW-LABEL: var_shift_i32:
102; X86-SLOW:       # %bb.0:
103; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
104; X86-SLOW-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
105; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
106; X86-SLOW-NEXT:    shrl %cl, %edx
107; X86-SLOW-NEXT:    notb %cl
108; X86-SLOW-NEXT:    addl %eax, %eax
109; X86-SLOW-NEXT:    shll %cl, %eax
110; X86-SLOW-NEXT:    orl %edx, %eax
111; X86-SLOW-NEXT:    retl
112;
113; X64-FAST-LABEL: var_shift_i32:
114; X64-FAST:       # %bb.0:
115; X64-FAST-NEXT:    movl %edx, %ecx
116; X64-FAST-NEXT:    movl %esi, %eax
117; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
118; X64-FAST-NEXT:    shrdl %cl, %edi, %eax
119; X64-FAST-NEXT:    retq
120;
121; X64-SLOW-LABEL: var_shift_i32:
122; X64-SLOW:       # %bb.0:
123; X64-SLOW-NEXT:    movl %edx, %ecx
124; X64-SLOW-NEXT:    # kill: def $edi killed $edi def $rdi
125; X64-SLOW-NEXT:    shrl %cl, %esi
126; X64-SLOW-NEXT:    leal (%rdi,%rdi), %eax
127; X64-SLOW-NEXT:    notb %cl
128; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
129; X64-SLOW-NEXT:    shll %cl, %eax
130; X64-SLOW-NEXT:    orl %esi, %eax
131; X64-SLOW-NEXT:    retq
132  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
133  ret i32 %tmp
134}
135
136define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
137; X86-LABEL: var_shift_i32_optsize:
138; X86:       # %bb.0:
139; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
140; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
141; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
142; X86-NEXT:    shrdl %cl, %edx, %eax
143; X86-NEXT:    retl
144;
145; X64-LABEL: var_shift_i32_optsize:
146; X64:       # %bb.0:
147; X64-NEXT:    movl %edx, %ecx
148; X64-NEXT:    movl %esi, %eax
149; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
150; X64-NEXT:    shrdl %cl, %edi, %eax
151; X64-NEXT:    retq
152  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
153  ret i32 %tmp
154}
155
156define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
157; X86-LABEL: var_shift_i32_pgso:
158; X86:       # %bb.0:
159; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
160; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
161; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
162; X86-NEXT:    shrdl %cl, %edx, %eax
163; X86-NEXT:    retl
164;
165; X64-LABEL: var_shift_i32_pgso:
166; X64:       # %bb.0:
167; X64-NEXT:    movl %edx, %ecx
168; X64-NEXT:    movl %esi, %eax
169; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
170; X64-NEXT:    shrdl %cl, %edi, %eax
171; X64-NEXT:    retq
172  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
173  ret i32 %tmp
174}
175
176define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
177; X86-FAST-LABEL: var_shift_i64:
178; X86-FAST:       # %bb.0:
179; X86-FAST-NEXT:    pushl %esi
180; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
181; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
182; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
183; X86-FAST-NEXT:    testb $32, %cl
184; X86-FAST-NEXT:    je .LBB5_1
185; X86-FAST-NEXT:  # %bb.2:
186; X86-FAST-NEXT:    movl %esi, %edx
187; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
188; X86-FAST-NEXT:    jmp .LBB5_3
189; X86-FAST-NEXT:  .LBB5_1:
190; X86-FAST-NEXT:    movl %eax, %edx
191; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
192; X86-FAST-NEXT:  .LBB5_3:
193; X86-FAST-NEXT:    shrdl %cl, %edx, %eax
194; X86-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
195; X86-FAST-NEXT:    shrdl %cl, %esi, %edx
196; X86-FAST-NEXT:    popl %esi
197; X86-FAST-NEXT:    retl
198;
199; X86-SLOW-LABEL: var_shift_i64:
200; X86-SLOW:       # %bb.0:
201; X86-SLOW-NEXT:    pushl %ebx
202; X86-SLOW-NEXT:    pushl %edi
203; X86-SLOW-NEXT:    pushl %esi
204; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
205; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
206; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
207; X86-SLOW-NEXT:    testb $32, %bl
208; X86-SLOW-NEXT:    je .LBB5_1
209; X86-SLOW-NEXT:  # %bb.2:
210; X86-SLOW-NEXT:    movl %edx, %esi
211; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
212; X86-SLOW-NEXT:    jmp .LBB5_3
213; X86-SLOW-NEXT:  .LBB5_1:
214; X86-SLOW-NEXT:    movl %eax, %esi
215; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
216; X86-SLOW-NEXT:  .LBB5_3:
217; X86-SLOW-NEXT:    leal (%esi,%esi), %edi
218; X86-SLOW-NEXT:    movb %bl, %ch
219; X86-SLOW-NEXT:    notb %ch
220; X86-SLOW-NEXT:    movb %ch, %cl
221; X86-SLOW-NEXT:    shll %cl, %edi
222; X86-SLOW-NEXT:    movb %bl, %cl
223; X86-SLOW-NEXT:    shrl %cl, %eax
224; X86-SLOW-NEXT:    orl %edi, %eax
225; X86-SLOW-NEXT:    shrl %cl, %esi
226; X86-SLOW-NEXT:    addl %edx, %edx
227; X86-SLOW-NEXT:    movb %ch, %cl
228; X86-SLOW-NEXT:    shll %cl, %edx
229; X86-SLOW-NEXT:    orl %esi, %edx
230; X86-SLOW-NEXT:    popl %esi
231; X86-SLOW-NEXT:    popl %edi
232; X86-SLOW-NEXT:    popl %ebx
233; X86-SLOW-NEXT:    retl
234;
235; X64-FAST-LABEL: var_shift_i64:
236; X64-FAST:       # %bb.0:
237; X64-FAST-NEXT:    movq %rdx, %rcx
238; X64-FAST-NEXT:    movq %rsi, %rax
239; X64-FAST-NEXT:    # kill: def $cl killed $cl killed $rcx
240; X64-FAST-NEXT:    shrdq %cl, %rdi, %rax
241; X64-FAST-NEXT:    retq
242;
243; X64-SLOW-LABEL: var_shift_i64:
244; X64-SLOW:       # %bb.0:
245; X64-SLOW-NEXT:    movq %rdx, %rcx
246; X64-SLOW-NEXT:    shrq %cl, %rsi
247; X64-SLOW-NEXT:    leaq (%rdi,%rdi), %rax
248; X64-SLOW-NEXT:    notb %cl
249; X64-SLOW-NEXT:    # kill: def $cl killed $cl killed $rcx
250; X64-SLOW-NEXT:    shlq %cl, %rax
251; X64-SLOW-NEXT:    orq %rsi, %rax
252; X64-SLOW-NEXT:    retq
253  %tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
254  ret i64 %tmp
255}
256
257define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
258; X86-FAST-LABEL: var_shift_i128:
259; X86-FAST:       # %bb.0:
260; X86-FAST-NEXT:    pushl %ebp
261; X86-FAST-NEXT:    pushl %ebx
262; X86-FAST-NEXT:    pushl %edi
263; X86-FAST-NEXT:    pushl %esi
264; X86-FAST-NEXT:    pushl %eax
265; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
266; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
267; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebx
268; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edi
269; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %esi
270; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
271; X86-FAST-NEXT:    testb $64, %cl
272; X86-FAST-NEXT:    je .LBB6_1
273; X86-FAST-NEXT:  # %bb.2:
274; X86-FAST-NEXT:    movl %edi, %ebp
275; X86-FAST-NEXT:    movl %ebx, %edi
276; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebx
277; X86-FAST-NEXT:    movl %esi, (%esp) # 4-byte Spill
278; X86-FAST-NEXT:    movl %edx, %esi
279; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
280; X86-FAST-NEXT:    testb $32, %cl
281; X86-FAST-NEXT:    je .LBB6_4
282; X86-FAST-NEXT:    jmp .LBB6_5
283; X86-FAST-NEXT:  .LBB6_1:
284; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebp
285; X86-FAST-NEXT:    movl %ebp, (%esp) # 4-byte Spill
286; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ebp
287; X86-FAST-NEXT:    testb $32, %cl
288; X86-FAST-NEXT:    jne .LBB6_5
289; X86-FAST-NEXT:  .LBB6_4:
290; X86-FAST-NEXT:    movl %edx, %ebx
291; X86-FAST-NEXT:    movl %edi, %edx
292; X86-FAST-NEXT:    movl %esi, %edi
293; X86-FAST-NEXT:    movl %ebp, %esi
294; X86-FAST-NEXT:    movl (%esp), %ebp # 4-byte Reload
295; X86-FAST-NEXT:  .LBB6_5:
296; X86-FAST-NEXT:    shrdl %cl, %esi, %ebp
297; X86-FAST-NEXT:    shrdl %cl, %edi, %esi
298; X86-FAST-NEXT:    shrdl %cl, %edx, %edi
299; X86-FAST-NEXT:    # kill: def $cl killed $cl killed $ecx
300; X86-FAST-NEXT:    shrdl %cl, %ebx, %edx
301; X86-FAST-NEXT:    movl %edx, 12(%eax)
302; X86-FAST-NEXT:    movl %edi, 8(%eax)
303; X86-FAST-NEXT:    movl %esi, 4(%eax)
304; X86-FAST-NEXT:    movl %ebp, (%eax)
305; X86-FAST-NEXT:    addl $4, %esp
306; X86-FAST-NEXT:    popl %esi
307; X86-FAST-NEXT:    popl %edi
308; X86-FAST-NEXT:    popl %ebx
309; X86-FAST-NEXT:    popl %ebp
310; X86-FAST-NEXT:    retl $4
311;
312; X86-SLOW-LABEL: var_shift_i128:
313; X86-SLOW:       # %bb.0:
314; X86-SLOW-NEXT:    pushl %ebp
315; X86-SLOW-NEXT:    pushl %ebx
316; X86-SLOW-NEXT:    pushl %edi
317; X86-SLOW-NEXT:    pushl %esi
318; X86-SLOW-NEXT:    subl $8, %esp
319; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
320; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
321; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebx
322; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ebp
323; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
324; X86-SLOW-NEXT:    testb $64, %cl
325; X86-SLOW-NEXT:    je .LBB6_1
326; X86-SLOW-NEXT:  # %bb.2:
327; X86-SLOW-NEXT:    movl %ebx, %edx
328; X86-SLOW-NEXT:    movl %edi, %ebx
329; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edi
330; X86-SLOW-NEXT:    movl %ebp, %eax
331; X86-SLOW-NEXT:    movl %esi, %ebp
332; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
333; X86-SLOW-NEXT:    testb $32, %cl
334; X86-SLOW-NEXT:    jne .LBB6_5
335; X86-SLOW-NEXT:  .LBB6_4:
336; X86-SLOW-NEXT:    movl %esi, %edi
337; X86-SLOW-NEXT:    movl %ebx, (%esp) # 4-byte Spill
338; X86-SLOW-NEXT:    movl %ebp, %esi
339; X86-SLOW-NEXT:    movl %edx, %ebp
340; X86-SLOW-NEXT:    movl %eax, %edx
341; X86-SLOW-NEXT:    jmp .LBB6_6
342; X86-SLOW-NEXT:  .LBB6_1:
343; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
344; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
345; X86-SLOW-NEXT:    testb $32, %cl
346; X86-SLOW-NEXT:    je .LBB6_4
347; X86-SLOW-NEXT:  .LBB6_5:
348; X86-SLOW-NEXT:    movl %esi, (%esp) # 4-byte Spill
349; X86-SLOW-NEXT:    movl %ebx, %esi
350; X86-SLOW-NEXT:  .LBB6_6:
351; X86-SLOW-NEXT:    shrl %cl, %edx
352; X86-SLOW-NEXT:    movl %ecx, %ebx
353; X86-SLOW-NEXT:    notb %bl
354; X86-SLOW-NEXT:    leal (%ebp,%ebp), %eax
355; X86-SLOW-NEXT:    movl %ebx, %ecx
356; X86-SLOW-NEXT:    shll %cl, %eax
357; X86-SLOW-NEXT:    orl %edx, %eax
358; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
359; X86-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
360; X86-SLOW-NEXT:    shrl %cl, %ebp
361; X86-SLOW-NEXT:    leal (%esi,%esi), %edx
362; X86-SLOW-NEXT:    movl %ebx, %ecx
363; X86-SLOW-NEXT:    shll %cl, %edx
364; X86-SLOW-NEXT:    orl %ebp, %edx
365; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
366; X86-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
367; X86-SLOW-NEXT:    shrl %cl, %esi
368; X86-SLOW-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
369; X86-SLOW-NEXT:    movl (%esp), %esi # 4-byte Reload
370; X86-SLOW-NEXT:    leal (%esi,%esi), %ebp
371; X86-SLOW-NEXT:    movl %ebx, %ecx
372; X86-SLOW-NEXT:    shll %cl, %ebp
373; X86-SLOW-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
374; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
375; X86-SLOW-NEXT:    # kill: def $cl killed $cl killed $ecx
376; X86-SLOW-NEXT:    shrl %cl, %esi
377; X86-SLOW-NEXT:    addl %edi, %edi
378; X86-SLOW-NEXT:    movl %ebx, %ecx
379; X86-SLOW-NEXT:    shll %cl, %edi
380; X86-SLOW-NEXT:    orl %esi, %edi
381; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
382; X86-SLOW-NEXT:    movl %edi, 12(%ecx)
383; X86-SLOW-NEXT:    movl %ebp, 8(%ecx)
384; X86-SLOW-NEXT:    movl %edx, 4(%ecx)
385; X86-SLOW-NEXT:    movl %eax, (%ecx)
386; X86-SLOW-NEXT:    movl %ecx, %eax
387; X86-SLOW-NEXT:    addl $8, %esp
388; X86-SLOW-NEXT:    popl %esi
389; X86-SLOW-NEXT:    popl %edi
390; X86-SLOW-NEXT:    popl %ebx
391; X86-SLOW-NEXT:    popl %ebp
392; X86-SLOW-NEXT:    retl $4
393;
394; X64-FAST-LABEL: var_shift_i128:
395; X64-FAST:       # %bb.0:
396; X64-FAST-NEXT:    movq %rdx, %rax
397; X64-FAST-NEXT:    testb $64, %r8b
398; X64-FAST-NEXT:    cmoveq %rdi, %rsi
399; X64-FAST-NEXT:    cmoveq %rcx, %rdi
400; X64-FAST-NEXT:    cmovneq %rcx, %rax
401; X64-FAST-NEXT:    movl %r8d, %ecx
402; X64-FAST-NEXT:    shrdq %cl, %rdi, %rax
403; X64-FAST-NEXT:    shrdq %cl, %rsi, %rdi
404; X64-FAST-NEXT:    movq %rdi, %rdx
405; X64-FAST-NEXT:    retq
406;
407; X64-SLOW-LABEL: var_shift_i128:
408; X64-SLOW:       # %bb.0:
409; X64-SLOW-NEXT:    testb $64, %r8b
410; X64-SLOW-NEXT:    cmoveq %rdi, %rsi
411; X64-SLOW-NEXT:    cmoveq %rcx, %rdi
412; X64-SLOW-NEXT:    cmovneq %rcx, %rdx
413; X64-SLOW-NEXT:    movl %r8d, %ecx
414; X64-SLOW-NEXT:    shrq %cl, %rdx
415; X64-SLOW-NEXT:    leaq (%rdi,%rdi), %rax
416; X64-SLOW-NEXT:    movl %r8d, %r9d
417; X64-SLOW-NEXT:    notb %r9b
418; X64-SLOW-NEXT:    movl %r9d, %ecx
419; X64-SLOW-NEXT:    shlq %cl, %rax
420; X64-SLOW-NEXT:    orq %rdx, %rax
421; X64-SLOW-NEXT:    movl %r8d, %ecx
422; X64-SLOW-NEXT:    shrq %cl, %rdi
423; X64-SLOW-NEXT:    leaq (%rsi,%rsi), %rdx
424; X64-SLOW-NEXT:    movl %r9d, %ecx
425; X64-SLOW-NEXT:    shlq %cl, %rdx
426; X64-SLOW-NEXT:    orq %rdi, %rdx
427; X64-SLOW-NEXT:    retq
428  %tmp = tail call i128 @llvm.fshr.i128(i128 %x, i128 %y, i128 %z)
429  ret i128 %tmp
430}
431
432;
433; Const Funnel Shift
434;
435
436define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
437; X86-LABEL: const_shift_i8:
438; X86:       # %bb.0:
439; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
440; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
441; X86-NEXT:    shrb $7, %cl
442; X86-NEXT:    addb %al, %al
443; X86-NEXT:    orb %cl, %al
444; X86-NEXT:    retl
445;
446; X64-LABEL: const_shift_i8:
447; X64:       # %bb.0:
448; X64-NEXT:    # kill: def $edi killed $edi def $rdi
449; X64-NEXT:    shrb $7, %sil
450; X64-NEXT:    leal (%rdi,%rdi), %eax
451; X64-NEXT:    orb %sil, %al
452; X64-NEXT:    # kill: def $al killed $al killed $eax
453; X64-NEXT:    retq
454  %tmp = tail call i8 @llvm.fshr.i8(i8 %x, i8 %y, i8 7)
455  ret i8 %tmp
456}
457
458define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
459; X86-FAST-LABEL: const_shift_i16:
460; X86-FAST:       # %bb.0:
461; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
462; X86-FAST-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
463; X86-FAST-NEXT:    shrdw $7, %cx, %ax
464; X86-FAST-NEXT:    retl
465;
466; X86-SLOW-LABEL: const_shift_i16:
467; X86-SLOW:       # %bb.0:
468; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
469; X86-SLOW-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
470; X86-SLOW-NEXT:    shrl $7, %ecx
471; X86-SLOW-NEXT:    shll $9, %eax
472; X86-SLOW-NEXT:    orl %ecx, %eax
473; X86-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
474; X86-SLOW-NEXT:    retl
475;
476; X64-FAST-LABEL: const_shift_i16:
477; X64-FAST:       # %bb.0:
478; X64-FAST-NEXT:    movl %esi, %eax
479; X64-FAST-NEXT:    shrdw $7, %di, %ax
480; X64-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
481; X64-FAST-NEXT:    retq
482;
483; X64-SLOW-LABEL: const_shift_i16:
484; X64-SLOW:       # %bb.0:
485; X64-SLOW-NEXT:    movzwl %si, %eax
486; X64-SLOW-NEXT:    shll $9, %edi
487; X64-SLOW-NEXT:    shrl $7, %eax
488; X64-SLOW-NEXT:    orl %edi, %eax
489; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
490; X64-SLOW-NEXT:    retq
491  %tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 7)
492  ret i16 %tmp
493}
494
495define i32 @const_shift_i32(i32 %x, i32 %y) nounwind {
496; X86-FAST-LABEL: const_shift_i32:
497; X86-FAST:       # %bb.0:
498; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
499; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
500; X86-FAST-NEXT:    shrdl $7, %ecx, %eax
501; X86-FAST-NEXT:    retl
502;
503; X86-SLOW-LABEL: const_shift_i32:
504; X86-SLOW:       # %bb.0:
505; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
506; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
507; X86-SLOW-NEXT:    shrl $7, %ecx
508; X86-SLOW-NEXT:    shll $25, %eax
509; X86-SLOW-NEXT:    orl %ecx, %eax
510; X86-SLOW-NEXT:    retl
511;
512; X64-FAST-LABEL: const_shift_i32:
513; X64-FAST:       # %bb.0:
514; X64-FAST-NEXT:    movl %edi, %eax
515; X64-FAST-NEXT:    shldl $25, %esi, %eax
516; X64-FAST-NEXT:    retq
517;
518; X64-SLOW-LABEL: const_shift_i32:
519; X64-SLOW:       # %bb.0:
520; X64-SLOW-NEXT:    # kill: def $esi killed $esi def $rsi
521; X64-SLOW-NEXT:    # kill: def $edi killed $edi def $rdi
522; X64-SLOW-NEXT:    shrl $7, %esi
523; X64-SLOW-NEXT:    shll $25, %edi
524; X64-SLOW-NEXT:    leal (%rdi,%rsi), %eax
525; X64-SLOW-NEXT:    retq
526  %tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7)
527  ret i32 %tmp
528}
529
530define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
531; X86-FAST-LABEL: const_shift_i64:
532; X86-FAST:       # %bb.0:
533; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %edx
534; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
535; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
536; X86-FAST-NEXT:    shldl $25, %ecx, %edx
537; X86-FAST-NEXT:    shrdl $7, %ecx, %eax
538; X86-FAST-NEXT:    retl
539;
540; X86-SLOW-LABEL: const_shift_i64:
541; X86-SLOW:       # %bb.0:
542; X86-SLOW-NEXT:    pushl %esi
543; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %edx
544; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
545; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %esi
546; X86-SLOW-NEXT:    shrl $7, %ecx
547; X86-SLOW-NEXT:    movl %esi, %eax
548; X86-SLOW-NEXT:    shll $25, %eax
549; X86-SLOW-NEXT:    orl %ecx, %eax
550; X86-SLOW-NEXT:    shrl $7, %esi
551; X86-SLOW-NEXT:    shll $25, %edx
552; X86-SLOW-NEXT:    orl %esi, %edx
553; X86-SLOW-NEXT:    popl %esi
554; X86-SLOW-NEXT:    retl
555;
556; X64-FAST-LABEL: const_shift_i64:
557; X64-FAST:       # %bb.0:
558; X64-FAST-NEXT:    movq %rdi, %rax
559; X64-FAST-NEXT:    shldq $57, %rsi, %rax
560; X64-FAST-NEXT:    retq
561;
562; X64-SLOW-LABEL: const_shift_i64:
563; X64-SLOW:       # %bb.0:
564; X64-SLOW-NEXT:    shrq $7, %rsi
565; X64-SLOW-NEXT:    shlq $57, %rdi
566; X64-SLOW-NEXT:    leaq (%rdi,%rsi), %rax
567; X64-SLOW-NEXT:    retq
568  %tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 7)
569  ret i64 %tmp
570}
571
572;
573; Combine Consecutive Loads
574;
575
576define i8 @combine_fshr_load_i8(ptr %p) nounwind {
577; X86-LABEL: combine_fshr_load_i8:
578; X86:       # %bb.0:
579; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
580; X86-NEXT:    movzbl (%eax), %eax
581; X86-NEXT:    retl
582;
583; X64-LABEL: combine_fshr_load_i8:
584; X64:       # %bb.0:
585; X64-NEXT:    movzbl (%rdi), %eax
586; X64-NEXT:    retq
587  %p1 = getelementptr i8, ptr %p, i32 1
588  %ld0 = load i8, ptr%p
589  %ld1 = load i8, ptr%p1
590  %res = call i8 @llvm.fshr.i8(i8 %ld1, i8 %ld0, i8 8)
591  ret i8 %res
592}
593
594define i16 @combine_fshr_load_i16(ptr %p) nounwind {
595; X86-LABEL: combine_fshr_load_i16:
596; X86:       # %bb.0:
597; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
598; X86-NEXT:    movzwl 1(%eax), %eax
599; X86-NEXT:    retl
600;
601; X64-LABEL: combine_fshr_load_i16:
602; X64:       # %bb.0:
603; X64-NEXT:    movzwl 1(%rdi), %eax
604; X64-NEXT:    retq
605  %p1 = getelementptr i16, ptr %p, i32 1
606  %ld0 = load i16, ptr%p
607  %ld1 = load i16, ptr%p1
608  %res = call i16 @llvm.fshr.i16(i16 %ld1, i16 %ld0, i16 8)
609  ret i16 %res
610}
611
612define i32 @combine_fshr_load_i32(ptr %p) nounwind {
613; X86-LABEL: combine_fshr_load_i32:
614; X86:       # %bb.0:
615; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
616; X86-NEXT:    movl 9(%eax), %eax
617; X86-NEXT:    retl
618;
619; X64-LABEL: combine_fshr_load_i32:
620; X64:       # %bb.0:
621; X64-NEXT:    movl 9(%rdi), %eax
622; X64-NEXT:    retq
623  %p0 = getelementptr i32, ptr %p, i32 2
624  %p1 = getelementptr i32, ptr %p, i32 3
625  %ld0 = load i32, ptr%p0
626  %ld1 = load i32, ptr%p1
627  %res = call i32 @llvm.fshr.i32(i32 %ld1, i32 %ld0, i32 8)
628  ret i32 %res
629}
630
631define i64 @combine_fshr_load_i64(ptr %p) nounwind {
632; X86-LABEL: combine_fshr_load_i64:
633; X86:       # %bb.0:
634; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
635; X86-NEXT:    movl 11(%ecx), %eax
636; X86-NEXT:    movl 15(%ecx), %edx
637; X86-NEXT:    retl
638;
639; X64-LABEL: combine_fshr_load_i64:
640; X64:       # %bb.0:
641; X64-NEXT:    movq 11(%rdi), %rax
642; X64-NEXT:    retq
643  %p0 = getelementptr i64, ptr %p, i64 1
644  %p1 = getelementptr i64, ptr %p, i64 2
645  %ld0 = load i64, ptr%p0
646  %ld1 = load i64, ptr%p1
647  %res = call i64 @llvm.fshr.i64(i64 %ld1, i64 %ld0, i64 24)
648  ret i64 %res
649}
650
651!llvm.module.flags = !{!0}
652!0 = !{i32 1, !"ProfileSummary", !1}
653!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
654!2 = !{!"ProfileFormat", !"InstrProf"}
655!3 = !{!"TotalCount", i64 10000}
656!4 = !{!"MaxCount", i64 10}
657!5 = !{!"MaxInternalCount", i64 1}
658!6 = !{!"MaxFunctionCount", i64 1000}
659!7 = !{!"NumCounts", i64 3}
660!8 = !{!"NumFunctions", i64 3}
661!9 = !{!"DetailedSummary", !10}
662!10 = !{!11, !12, !13}
663!11 = !{i32 10000, i64 100, i32 1}
664!12 = !{i32 999000, i64 100, i32 1}
665!13 = !{i32 999999, i64 1, i32 2}
666!14 = !{!"function_entry_count", i64 0}
667