1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686--   -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,X86-SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64-AVX2
4
5declare i8 @llvm.fshl.i8(i8, i8, i8)
6declare i16 @llvm.fshl.i16(i16, i16, i16)
7declare i32 @llvm.fshl.i32(i32, i32, i32)
8declare i64 @llvm.fshl.i64(i64, i64, i64)
9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10declare i128 @llvm.fshl.i128(i128, i128, i128)
11
12declare i8 @llvm.fshr.i8(i8, i8, i8)
13declare i16 @llvm.fshr.i16(i16, i16, i16)
14declare i32 @llvm.fshr.i32(i32, i32, i32)
15declare i64 @llvm.fshr.i64(i64, i64, i64)
16declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
17
18; General case - all operands can be variables
19
20define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
21; X86-SSE2-LABEL: fshl_i32:
22; X86-SSE2:       # %bb.0:
23; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
24; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
25; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
26; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
27; X86-SSE2-NEXT:    retl
28;
29; X64-AVX2-LABEL: fshl_i32:
30; X64-AVX2:       # %bb.0:
31; X64-AVX2-NEXT:    movl %edx, %ecx
32; X64-AVX2-NEXT:    movl %edi, %eax
33; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
34; X64-AVX2-NEXT:    shldl %cl, %esi, %eax
35; X64-AVX2-NEXT:    retq
36  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
37  ret i32 %f
38}
39
40define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind {
41; X86-SSE2-LABEL: fshl_i64:
42; X86-SSE2:       # %bb.0:
43; X86-SSE2-NEXT:    pushl %edi
44; X86-SSE2-NEXT:    pushl %esi
45; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
46; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
47; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
48; X86-SSE2-NEXT:    testb $32, %cl
49; X86-SSE2-NEXT:    movl %edx, %edi
50; X86-SSE2-NEXT:    cmovnel %esi, %edi
51; X86-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %edx
52; X86-SSE2-NEXT:    cmovnel {{[0-9]+}}(%esp), %esi
53; X86-SSE2-NEXT:    movl %edi, %eax
54; X86-SSE2-NEXT:    shldl %cl, %esi, %eax
55; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
56; X86-SSE2-NEXT:    shldl %cl, %edi, %edx
57; X86-SSE2-NEXT:    popl %esi
58; X86-SSE2-NEXT:    popl %edi
59; X86-SSE2-NEXT:    retl
60;
61; X64-AVX2-LABEL: fshl_i64:
62; X64-AVX2:       # %bb.0:
63; X64-AVX2-NEXT:    movq %rdx, %rcx
64; X64-AVX2-NEXT:    movq %rdi, %rax
65; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
66; X64-AVX2-NEXT:    shldq %cl, %rsi, %rax
67; X64-AVX2-NEXT:    retq
68  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
69  ret i64 %f
70}
71
72define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
73; X86-SSE2-LABEL: fshl_i128:
74; X86-SSE2:       # %bb.0:
75; X86-SSE2-NEXT:    pushl %ebp
76; X86-SSE2-NEXT:    pushl %ebx
77; X86-SSE2-NEXT:    pushl %edi
78; X86-SSE2-NEXT:    pushl %esi
79; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
80; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
81; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
82; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
83; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
84; X86-SSE2-NEXT:    testb $64, %cl
85; X86-SSE2-NEXT:    movl %esi, %eax
86; X86-SSE2-NEXT:    cmovnel %ebx, %eax
87; X86-SSE2-NEXT:    movl %edx, %ebp
88; X86-SSE2-NEXT:    cmovnel %edi, %ebp
89; X86-SSE2-NEXT:    cmovnel {{[0-9]+}}(%esp), %edi
90; X86-SSE2-NEXT:    cmovnel {{[0-9]+}}(%esp), %ebx
91; X86-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %edx
92; X86-SSE2-NEXT:    cmovel {{[0-9]+}}(%esp), %esi
93; X86-SSE2-NEXT:    testb $32, %cl
94; X86-SSE2-NEXT:    cmovnel %esi, %edx
95; X86-SSE2-NEXT:    cmovnel %ebp, %esi
96; X86-SSE2-NEXT:    cmovnel %eax, %ebp
97; X86-SSE2-NEXT:    cmovel %edi, %ebx
98; X86-SSE2-NEXT:    cmovel %eax, %edi
99; X86-SSE2-NEXT:    movl %edi, %eax
100; X86-SSE2-NEXT:    shldl %cl, %ebx, %eax
101; X86-SSE2-NEXT:    movl %ebp, %ebx
102; X86-SSE2-NEXT:    shldl %cl, %edi, %ebx
103; X86-SSE2-NEXT:    movl %esi, %edi
104; X86-SSE2-NEXT:    shldl %cl, %ebp, %edi
105; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
106; X86-SSE2-NEXT:    shldl %cl, %esi, %edx
107; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
108; X86-SSE2-NEXT:    movl %edx, 12(%ecx)
109; X86-SSE2-NEXT:    movl %edi, 8(%ecx)
110; X86-SSE2-NEXT:    movl %ebx, 4(%ecx)
111; X86-SSE2-NEXT:    movl %eax, (%ecx)
112; X86-SSE2-NEXT:    movl %ecx, %eax
113; X86-SSE2-NEXT:    popl %esi
114; X86-SSE2-NEXT:    popl %edi
115; X86-SSE2-NEXT:    popl %ebx
116; X86-SSE2-NEXT:    popl %ebp
117; X86-SSE2-NEXT:    retl $4
118;
119; X64-AVX2-LABEL: fshl_i128:
120; X64-AVX2:       # %bb.0:
121; X64-AVX2-NEXT:    testb $64, %r8b
122; X64-AVX2-NEXT:    cmovneq %rdi, %rsi
123; X64-AVX2-NEXT:    cmoveq %rcx, %rdx
124; X64-AVX2-NEXT:    cmovneq %rcx, %rdi
125; X64-AVX2-NEXT:    movq %rdi, %rax
126; X64-AVX2-NEXT:    movl %r8d, %ecx
127; X64-AVX2-NEXT:    shldq %cl, %rdx, %rax
128; X64-AVX2-NEXT:    shldq %cl, %rdi, %rsi
129; X64-AVX2-NEXT:    movq %rsi, %rdx
130; X64-AVX2-NEXT:    retq
131  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
132  ret i128 %f
133}
134
135; Verify that weird types are minimally supported.
136declare i37 @llvm.fshl.i37(i37, i37, i37)
137define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
138; X86-SSE2-LABEL: fshl_i37:
139; X86-SSE2:       # %bb.0:
140; X86-SSE2-NEXT:    pushl %ebx
141; X86-SSE2-NEXT:    pushl %edi
142; X86-SSE2-NEXT:    pushl %esi
143; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
144; X86-SSE2-NEXT:    andl $31, %eax
145; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
146; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
147; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
148; X86-SSE2-NEXT:    shldl $27, %ebx, %edi
149; X86-SSE2-NEXT:    pushl $0
150; X86-SSE2-NEXT:    pushl $37
151; X86-SSE2-NEXT:    pushl %eax
152; X86-SSE2-NEXT:    pushl {{[0-9]+}}(%esp)
153; X86-SSE2-NEXT:    calll __umoddi3
154; X86-SSE2-NEXT:    addl $16, %esp
155; X86-SSE2-NEXT:    movl %eax, %ecx
156; X86-SSE2-NEXT:    testb $32, %cl
157; X86-SSE2-NEXT:    jne .LBB3_1
158; X86-SSE2-NEXT:  # %bb.2:
159; X86-SSE2-NEXT:    movl %edi, %ebx
160; X86-SSE2-NEXT:    movl %esi, %edi
161; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
162; X86-SSE2-NEXT:    jmp .LBB3_3
163; X86-SSE2-NEXT:  .LBB3_1:
164; X86-SSE2-NEXT:    shll $27, %ebx
165; X86-SSE2-NEXT:  .LBB3_3:
166; X86-SSE2-NEXT:    movl %edi, %eax
167; X86-SSE2-NEXT:    shldl %cl, %ebx, %eax
168; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
169; X86-SSE2-NEXT:    shldl %cl, %edi, %esi
170; X86-SSE2-NEXT:    movl %esi, %edx
171; X86-SSE2-NEXT:    popl %esi
172; X86-SSE2-NEXT:    popl %edi
173; X86-SSE2-NEXT:    popl %ebx
174; X86-SSE2-NEXT:    retl
175;
176; X64-AVX2-LABEL: fshl_i37:
177; X64-AVX2:       # %bb.0:
178; X64-AVX2-NEXT:    movq %rdx, %rcx
179; X64-AVX2-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
180; X64-AVX2-NEXT:    andq %rdx, %rax
181; X64-AVX2-NEXT:    movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
182; X64-AVX2-NEXT:    mulq %rdx
183; X64-AVX2-NEXT:    shrq $5, %rdx
184; X64-AVX2-NEXT:    leal (%rdx,%rdx,8), %eax
185; X64-AVX2-NEXT:    leal (%rdx,%rax,4), %eax
186; X64-AVX2-NEXT:    subl %eax, %ecx
187; X64-AVX2-NEXT:    shlq $27, %rsi
188; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
189; X64-AVX2-NEXT:    shldq %cl, %rsi, %rdi
190; X64-AVX2-NEXT:    movq %rdi, %rax
191; X64-AVX2-NEXT:    retq
192  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
193  ret i37 %f
194}
195
196; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
197
198declare i7 @llvm.fshl.i7(i7, i7, i7)
199define i7 @fshl_i7_const_fold() {
200; CHECK-LABEL: fshl_i7_const_fold:
201; CHECK:       # %bb.0:
202; CHECK-NEXT:    movb $67, %al
203; CHECK-NEXT:    ret{{[l|q]}}
204  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
205  ret i7 %f
206}
207
208; With constant shift amount, this is 'shld' with constant operand.
209
210define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind {
211; X86-SSE2-LABEL: fshl_i32_const_shift:
212; X86-SSE2:       # %bb.0:
213; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
214; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
215; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
216; X86-SSE2-NEXT:    retl
217;
218; X64-AVX2-LABEL: fshl_i32_const_shift:
219; X64-AVX2:       # %bb.0:
220; X64-AVX2-NEXT:    movl %edi, %eax
221; X64-AVX2-NEXT:    shldl $9, %esi, %eax
222; X64-AVX2-NEXT:    retq
223  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
224  ret i32 %f
225}
226
227; Check modulo math on shift amount.
228
229define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind {
230; X86-SSE2-LABEL: fshl_i32_const_overshift:
231; X86-SSE2:       # %bb.0:
232; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
233; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
234; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
235; X86-SSE2-NEXT:    retl
236;
237; X64-AVX2-LABEL: fshl_i32_const_overshift:
238; X64-AVX2:       # %bb.0:
239; X64-AVX2-NEXT:    movl %edi, %eax
240; X64-AVX2-NEXT:    shldl $9, %esi, %eax
241; X64-AVX2-NEXT:    retq
242  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
243  ret i32 %f
244}
245
246; 64-bit should also work.
247
248define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind {
249; X86-SSE2-LABEL: fshl_i64_const_overshift:
250; X86-SSE2:       # %bb.0:
251; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
252; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
253; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
254; X86-SSE2-NEXT:    shldl $9, %ecx, %edx
255; X86-SSE2-NEXT:    shrdl $23, %ecx, %eax
256; X86-SSE2-NEXT:    retl
257;
258; X64-AVX2-LABEL: fshl_i64_const_overshift:
259; X64-AVX2:       # %bb.0:
260; X64-AVX2-NEXT:    movq %rdi, %rax
261; X64-AVX2-NEXT:    shldq $41, %rsi, %rax
262; X64-AVX2-NEXT:    retq
263  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
264  ret i64 %f
265}
266
267; This should work without any node-specific logic.
268
269define i8 @fshl_i8_const_fold() nounwind {
270; CHECK-LABEL: fshl_i8_const_fold:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    movb $-128, %al
273; CHECK-NEXT:    ret{{[l|q]}}
274  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
275  ret i8 %f
276}
277
278; Repeat everything for funnel shift right.
279
280; General case - all operands can be variables
281
282define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
283; X86-SSE2-LABEL: fshr_i32:
284; X86-SSE2:       # %bb.0:
285; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
286; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
287; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
288; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
289; X86-SSE2-NEXT:    retl
290;
291; X64-AVX2-LABEL: fshr_i32:
292; X64-AVX2:       # %bb.0:
293; X64-AVX2-NEXT:    movl %edx, %ecx
294; X64-AVX2-NEXT:    movl %esi, %eax
295; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
296; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
297; X64-AVX2-NEXT:    retq
298  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
299  ret i32 %f
300}
301
302; Verify that weird types are minimally supported.
303declare i37 @llvm.fshr.i37(i37, i37, i37)
304define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
305; X86-SSE2-LABEL: fshr_i37:
306; X86-SSE2:       # %bb.0:
307; X86-SSE2-NEXT:    pushl %ebx
308; X86-SSE2-NEXT:    pushl %edi
309; X86-SSE2-NEXT:    pushl %esi
310; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
311; X86-SSE2-NEXT:    andl $31, %eax
312; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
313; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
314; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
315; X86-SSE2-NEXT:    shldl $27, %ebx, %esi
316; X86-SSE2-NEXT:    pushl $0
317; X86-SSE2-NEXT:    pushl $37
318; X86-SSE2-NEXT:    pushl %eax
319; X86-SSE2-NEXT:    pushl {{[0-9]+}}(%esp)
320; X86-SSE2-NEXT:    calll __umoddi3
321; X86-SSE2-NEXT:    addl $16, %esp
322; X86-SSE2-NEXT:    movl %eax, %ecx
323; X86-SSE2-NEXT:    addl $27, %ecx
324; X86-SSE2-NEXT:    testb $32, %cl
325; X86-SSE2-NEXT:    je .LBB10_1
326; X86-SSE2-NEXT:  # %bb.2:
327; X86-SSE2-NEXT:    movl %edi, %edx
328; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edi
329; X86-SSE2-NEXT:    jmp .LBB10_3
330; X86-SSE2-NEXT:  .LBB10_1:
331; X86-SSE2-NEXT:    shll $27, %ebx
332; X86-SSE2-NEXT:    movl %esi, %edx
333; X86-SSE2-NEXT:    movl %ebx, %esi
334; X86-SSE2-NEXT:  .LBB10_3:
335; X86-SSE2-NEXT:    shrdl %cl, %edx, %esi
336; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
337; X86-SSE2-NEXT:    shrdl %cl, %edi, %edx
338; X86-SSE2-NEXT:    movl %esi, %eax
339; X86-SSE2-NEXT:    popl %esi
340; X86-SSE2-NEXT:    popl %edi
341; X86-SSE2-NEXT:    popl %ebx
342; X86-SSE2-NEXT:    retl
343;
344; X64-AVX2-LABEL: fshr_i37:
345; X64-AVX2:       # %bb.0:
346; X64-AVX2-NEXT:    movq %rdx, %rcx
347; X64-AVX2-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
348; X64-AVX2-NEXT:    andq %rdx, %rax
349; X64-AVX2-NEXT:    movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
350; X64-AVX2-NEXT:    mulq %rdx
351; X64-AVX2-NEXT:    shrq $5, %rdx
352; X64-AVX2-NEXT:    leal (%rdx,%rdx,8), %eax
353; X64-AVX2-NEXT:    leal (%rdx,%rax,4), %eax
354; X64-AVX2-NEXT:    subl %eax, %ecx
355; X64-AVX2-NEXT:    addl $27, %ecx
356; X64-AVX2-NEXT:    shlq $27, %rsi
357; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
358; X64-AVX2-NEXT:    shrdq %cl, %rdi, %rsi
359; X64-AVX2-NEXT:    movq %rsi, %rax
360; X64-AVX2-NEXT:    retq
361  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
362  ret i37 %f
363}
364
365; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
366
367declare i7 @llvm.fshr.i7(i7, i7, i7)
368define i7 @fshr_i7_const_fold() nounwind {
369; CHECK-LABEL: fshr_i7_const_fold:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    movb $31, %al
372; CHECK-NEXT:    ret{{[l|q]}}
373  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
374  ret i7 %f
375}
376
377; demanded bits tests
378
379define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
380; X86-SSE2-LABEL: fshl_i32_demandedbits:
381; X86-SSE2:       # %bb.0:
382; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
383; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
384; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
385; X86-SSE2-NEXT:    retl
386;
387; X64-AVX2-LABEL: fshl_i32_demandedbits:
388; X64-AVX2:       # %bb.0:
389; X64-AVX2-NEXT:    movl %edi, %eax
390; X64-AVX2-NEXT:    shldl $9, %esi, %eax
391; X64-AVX2-NEXT:    retq
392  %x = or i32 %a0, 2147483648
393  %y = or i32 %a1, 1
394  %res = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
395  ret i32 %res
396}
397
398define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
399; X86-SSE2-LABEL: fshr_i32_demandedbits:
400; X86-SSE2:       # %bb.0:
401; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
402; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
403; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
404; X86-SSE2-NEXT:    retl
405;
406; X64-AVX2-LABEL: fshr_i32_demandedbits:
407; X64-AVX2:       # %bb.0:
408; X64-AVX2-NEXT:    movl %edi, %eax
409; X64-AVX2-NEXT:    shldl $23, %esi, %eax
410; X64-AVX2-NEXT:    retq
411  %x = or i32 %a0, 2147483648
412  %y = or i32 %a1, 1
413  %res = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
414  ret i32 %res
415}
416
417; undef handling
418
419define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind {
420; X86-SSE2-LABEL: fshl_i32_undef0:
421; X86-SSE2:       # %bb.0:
422; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
423; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
424; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
425; X86-SSE2-NEXT:    retl
426;
427; X64-AVX2-LABEL: fshl_i32_undef0:
428; X64-AVX2:       # %bb.0:
429; X64-AVX2-NEXT:    movl %esi, %ecx
430; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
431; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
432; X64-AVX2-NEXT:    retq
433  %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %a1)
434  ret i32 %res
435}
436
437define i32 @fshl_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
438; X86-SSE2-LABEL: fshl_i32_undef0_msk:
439; X86-SSE2:       # %bb.0:
440; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
441; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
442; X86-SSE2-NEXT:    andl $7, %ecx
443; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
444; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
445; X86-SSE2-NEXT:    retl
446;
447; X64-AVX2-LABEL: fshl_i32_undef0_msk:
448; X64-AVX2:       # %bb.0:
449; X64-AVX2-NEXT:    movl %esi, %ecx
450; X64-AVX2-NEXT:    andl $7, %ecx
451; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
452; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
453; X64-AVX2-NEXT:    retq
454  %m = and i32 %a1, 7
455  %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %m)
456  ret i32 %res
457}
458
459define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
460; X86-SSE2-LABEL: fshl_i32_undef0_cst:
461; X86-SSE2:       # %bb.0:
462; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
463; X86-SSE2-NEXT:    shrl $23, %eax
464; X86-SSE2-NEXT:    retl
465;
466; X64-AVX2-LABEL: fshl_i32_undef0_cst:
467; X64-AVX2:       # %bb.0:
468; X64-AVX2-NEXT:    movl %edi, %eax
469; X64-AVX2-NEXT:    shrl $23, %eax
470; X64-AVX2-NEXT:    retq
471  %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
472  ret i32 %res
473}
474
475define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
476; X86-SSE2-LABEL: fshl_i32_undef1:
477; X86-SSE2:       # %bb.0:
478; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
479; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
480; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
481; X86-SSE2-NEXT:    retl
482;
483; X64-AVX2-LABEL: fshl_i32_undef1:
484; X64-AVX2:       # %bb.0:
485; X64-AVX2-NEXT:    movl %esi, %ecx
486; X64-AVX2-NEXT:    movl %edi, %eax
487; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
488; X64-AVX2-NEXT:    shldl %cl, %eax, %eax
489; X64-AVX2-NEXT:    retq
490  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1)
491  ret i32 %res
492}
493
494define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
495; X86-SSE2-LABEL: fshl_i32_undef1_msk:
496; X86-SSE2:       # %bb.0:
497; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
498; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
499; X86-SSE2-NEXT:    andb $7, %cl
500; X86-SSE2-NEXT:    shll %cl, %eax
501; X86-SSE2-NEXT:    retl
502;
503; X64-AVX2-LABEL: fshl_i32_undef1_msk:
504; X64-AVX2:       # %bb.0:
505; X64-AVX2-NEXT:    movl %esi, %ecx
506; X64-AVX2-NEXT:    movl %edi, %eax
507; X64-AVX2-NEXT:    andb $7, %cl
508; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
509; X64-AVX2-NEXT:    shll %cl, %eax
510; X64-AVX2-NEXT:    retq
511  %m = and i32 %a1, 7
512  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
513  ret i32 %res
514}
515
516define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind {
517; X86-SSE2-LABEL: fshl_i32_undef1_cst:
518; X86-SSE2:       # %bb.0:
519; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
520; X86-SSE2-NEXT:    shll $9, %eax
521; X86-SSE2-NEXT:    retl
522;
523; X64-AVX2-LABEL: fshl_i32_undef1_cst:
524; X64-AVX2:       # %bb.0:
525; X64-AVX2-NEXT:    movl %edi, %eax
526; X64-AVX2-NEXT:    shll $9, %eax
527; X64-AVX2-NEXT:    retq
528  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
529  ret i32 %res
530}
531
532define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind {
533; X86-SSE2-LABEL: fshl_i32_undef2:
534; X86-SSE2:       # %bb.0:
535; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
536; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
537; X86-SSE2-NEXT:    shldl %cl, %ecx, %eax
538; X86-SSE2-NEXT:    retl
539;
540; X64-AVX2-LABEL: fshl_i32_undef2:
541; X64-AVX2:       # %bb.0:
542; X64-AVX2-NEXT:    movl %edi, %eax
543; X64-AVX2-NEXT:    shldl %cl, %esi, %eax
544; X64-AVX2-NEXT:    retq
545  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 undef)
546  ret i32 %res
547}
548
549define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind {
550; X86-SSE2-LABEL: fshr_i32_undef0:
551; X86-SSE2:       # %bb.0:
552; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
553; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
554; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
555; X86-SSE2-NEXT:    retl
556;
557; X64-AVX2-LABEL: fshr_i32_undef0:
558; X64-AVX2:       # %bb.0:
559; X64-AVX2-NEXT:    movl %esi, %ecx
560; X64-AVX2-NEXT:    movl %edi, %eax
561; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
562; X64-AVX2-NEXT:    shrdl %cl, %eax, %eax
563; X64-AVX2-NEXT:    retq
564  %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1)
565  ret i32 %res
566}
567
568define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
569; X86-SSE2-LABEL: fshr_i32_undef0_msk:
570; X86-SSE2:       # %bb.0:
571; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
572; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
573; X86-SSE2-NEXT:    andb $7, %cl
574; X86-SSE2-NEXT:    shrl %cl, %eax
575; X86-SSE2-NEXT:    retl
576;
577; X64-AVX2-LABEL: fshr_i32_undef0_msk:
578; X64-AVX2:       # %bb.0:
579; X64-AVX2-NEXT:    movl %esi, %ecx
580; X64-AVX2-NEXT:    movl %edi, %eax
581; X64-AVX2-NEXT:    andb $7, %cl
582; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
583; X64-AVX2-NEXT:    shrl %cl, %eax
584; X64-AVX2-NEXT:    retq
585  %m = and i32 %a1, 7
586  %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
587  ret i32 %res
588}
589
590define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
591; X86-SSE2-LABEL: fshr_i32_undef0_cst:
592; X86-SSE2:       # %bb.0:
593; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
594; X86-SSE2-NEXT:    shrl $9, %eax
595; X86-SSE2-NEXT:    retl
596;
597; X64-AVX2-LABEL: fshr_i32_undef0_cst:
598; X64-AVX2:       # %bb.0:
599; X64-AVX2-NEXT:    movl %edi, %eax
600; X64-AVX2-NEXT:    shrl $9, %eax
601; X64-AVX2-NEXT:    retq
602  %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
603  ret i32 %res
604}
605
606define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
607; X86-SSE2-LABEL: fshr_i32_undef1:
608; X86-SSE2:       # %bb.0:
609; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
610; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
611; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
612; X86-SSE2-NEXT:    retl
613;
614; X64-AVX2-LABEL: fshr_i32_undef1:
615; X64-AVX2:       # %bb.0:
616; X64-AVX2-NEXT:    movl %esi, %ecx
617; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
618; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
619; X64-AVX2-NEXT:    retq
620  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %a1)
621  ret i32 %res
622}
623
624define i32 @fshr_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
625; X86-SSE2-LABEL: fshr_i32_undef1_msk:
626; X86-SSE2:       # %bb.0:
627; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
628; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
629; X86-SSE2-NEXT:    andl $7, %ecx
630; X86-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
631; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
632; X86-SSE2-NEXT:    retl
633;
634; X64-AVX2-LABEL: fshr_i32_undef1_msk:
635; X64-AVX2:       # %bb.0:
636; X64-AVX2-NEXT:    movl %esi, %ecx
637; X64-AVX2-NEXT:    andl $7, %ecx
638; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
639; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
640; X64-AVX2-NEXT:    retq
641  %m = and i32 %a1, 7
642  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %m)
643  ret i32 %res
644}
645
646define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind {
647; X86-SSE2-LABEL: fshr_i32_undef1_cst:
648; X86-SSE2:       # %bb.0:
649; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
650; X86-SSE2-NEXT:    shll $23, %eax
651; X86-SSE2-NEXT:    retl
652;
653; X64-AVX2-LABEL: fshr_i32_undef1_cst:
654; X64-AVX2:       # %bb.0:
655; X64-AVX2-NEXT:    movl %edi, %eax
656; X64-AVX2-NEXT:    shll $23, %eax
657; X64-AVX2-NEXT:    retq
658  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
659  ret i32 %res
660}
661
662define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind {
663; X86-SSE2-LABEL: fshr_i32_undef2:
664; X86-SSE2:       # %bb.0:
665; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
666; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
667; X86-SSE2-NEXT:    shrdl %cl, %ecx, %eax
668; X86-SSE2-NEXT:    retl
669;
670; X64-AVX2-LABEL: fshr_i32_undef2:
671; X64-AVX2:       # %bb.0:
672; X64-AVX2-NEXT:    movl %esi, %eax
673; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
674; X64-AVX2-NEXT:    retq
675  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 undef)
676  ret i32 %res
677}
678
679; shift zero args
680
681define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
682; X86-SSE2-LABEL: fshl_i32_zero0:
683; X86-SSE2:       # %bb.0:
684; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
685; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
686; X86-SSE2-NEXT:    xorl %eax, %eax
687; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
688; X86-SSE2-NEXT:    retl
689;
690; X64-AVX2-LABEL: fshl_i32_zero0:
691; X64-AVX2:       # %bb.0:
692; X64-AVX2-NEXT:    movl %esi, %ecx
693; X64-AVX2-NEXT:    xorl %eax, %eax
694; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
695; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
696; X64-AVX2-NEXT:    retq
697  %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 %a1)
698  ret i32 %res
699}
700
701define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
702; X86-SSE2-LABEL: fshl_i32_zero0_cst:
703; X86-SSE2:       # %bb.0:
704; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
705; X86-SSE2-NEXT:    shrl $23, %eax
706; X86-SSE2-NEXT:    retl
707;
708; X64-AVX2-LABEL: fshl_i32_zero0_cst:
709; X64-AVX2:       # %bb.0:
710; X64-AVX2-NEXT:    movl %edi, %eax
711; X64-AVX2-NEXT:    shrl $23, %eax
712; X64-AVX2-NEXT:    retq
713  %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
714  ret i32 %res
715}
716
717define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
718; X86-SSE2-LABEL: fshl_i32_zero1:
719; X86-SSE2:       # %bb.0:
720; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
721; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
722; X86-SSE2-NEXT:    xorl %edx, %edx
723; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
724; X86-SSE2-NEXT:    retl
725;
726; X64-AVX2-LABEL: fshl_i32_zero1:
727; X64-AVX2:       # %bb.0:
728; X64-AVX2-NEXT:    movl %esi, %ecx
729; X64-AVX2-NEXT:    movl %edi, %eax
730; X64-AVX2-NEXT:    xorl %edx, %edx
731; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
732; X64-AVX2-NEXT:    shldl %cl, %edx, %eax
733; X64-AVX2-NEXT:    retq
734  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 %a1)
735  ret i32 %res
736}
737
738define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
739; X86-SSE2-LABEL: fshl_i32_zero1_cst:
740; X86-SSE2:       # %bb.0:
741; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
742; X86-SSE2-NEXT:    shll $9, %eax
743; X86-SSE2-NEXT:    retl
744;
745; X64-AVX2-LABEL: fshl_i32_zero1_cst:
746; X64-AVX2:       # %bb.0:
747; X64-AVX2-NEXT:    movl %edi, %eax
748; X64-AVX2-NEXT:    shll $9, %eax
749; X64-AVX2-NEXT:    retq
750  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
751  ret i32 %res
752}
753
754define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
755; X86-SSE2-LABEL: fshr_i32_zero0:
756; X86-SSE2:       # %bb.0:
757; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
758; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
759; X86-SSE2-NEXT:    xorl %edx, %edx
760; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
761; X86-SSE2-NEXT:    retl
762;
763; X64-AVX2-LABEL: fshr_i32_zero0:
764; X64-AVX2:       # %bb.0:
765; X64-AVX2-NEXT:    movl %esi, %ecx
766; X64-AVX2-NEXT:    movl %edi, %eax
767; X64-AVX2-NEXT:    xorl %edx, %edx
768; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
769; X64-AVX2-NEXT:    shrdl %cl, %edx, %eax
770; X64-AVX2-NEXT:    retq
771  %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 %a1)
772  ret i32 %res
773}
774
775define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
776; X86-SSE2-LABEL: fshr_i32_zero0_cst:
777; X86-SSE2:       # %bb.0:
778; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
779; X86-SSE2-NEXT:    shrl $9, %eax
780; X86-SSE2-NEXT:    retl
781;
782; X64-AVX2-LABEL: fshr_i32_zero0_cst:
783; X64-AVX2:       # %bb.0:
784; X64-AVX2-NEXT:    movl %edi, %eax
785; X64-AVX2-NEXT:    shrl $9, %eax
786; X64-AVX2-NEXT:    retq
787  %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
788  ret i32 %res
789}
790
791define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
792; X86-SSE2-LABEL: fshr_i32_zero1:
793; X86-SSE2:       # %bb.0:
794; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
795; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
796; X86-SSE2-NEXT:    xorl %eax, %eax
797; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
798; X86-SSE2-NEXT:    retl
799;
800; X64-AVX2-LABEL: fshr_i32_zero1:
801; X64-AVX2:       # %bb.0:
802; X64-AVX2-NEXT:    movl %esi, %ecx
803; X64-AVX2-NEXT:    xorl %eax, %eax
804; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
805; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
806; X64-AVX2-NEXT:    retq
807  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 %a1)
808  ret i32 %res
809}
810
811define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
812; X86-SSE2-LABEL: fshr_i32_zero1_cst:
813; X86-SSE2:       # %bb.0:
814; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
815; X86-SSE2-NEXT:    shll $23, %eax
816; X86-SSE2-NEXT:    retl
817;
818; X64-AVX2-LABEL: fshr_i32_zero1_cst:
819; X64-AVX2:       # %bb.0:
820; X64-AVX2-NEXT:    movl %edi, %eax
821; X64-AVX2-NEXT:    shll $23, %eax
822; X64-AVX2-NEXT:    retq
823  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
824  ret i32 %res
825}
826
827; shift by zero
828
829define i32 @fshl_i32_zero2(i32 %a0, i32 %a1) nounwind {
830; X86-SSE2-LABEL: fshl_i32_zero2:
831; X86-SSE2:       # %bb.0:
832; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
833; X86-SSE2-NEXT:    retl
834;
835; X64-AVX2-LABEL: fshl_i32_zero2:
836; X64-AVX2:       # %bb.0:
837; X64-AVX2-NEXT:    movl %edi, %eax
838; X64-AVX2-NEXT:    retq
839  %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 0)
840  ret i32 %res
841}
842
843define i32 @fshr_i32_zero2(i32 %a0, i32 %a1) nounwind {
844; X86-SSE2-LABEL: fshr_i32_zero2:
845; X86-SSE2:       # %bb.0:
846; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
847; X86-SSE2-NEXT:    retl
848;
849; X64-AVX2-LABEL: fshr_i32_zero2:
850; X64-AVX2:       # %bb.0:
851; X64-AVX2-NEXT:    movl %esi, %eax
852; X64-AVX2-NEXT:    retq
853  %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 0)
854  ret i32 %res
855}
856
857; With constant shift amount, this is 'shrd' or 'shld'.
858
859define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind {
860; X86-SSE2-LABEL: fshr_i32_const_shift:
861; X86-SSE2:       # %bb.0:
862; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
863; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
864; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
865; X86-SSE2-NEXT:    retl
866;
867; X64-AVX2-LABEL: fshr_i32_const_shift:
868; X64-AVX2:       # %bb.0:
869; X64-AVX2-NEXT:    movl %edi, %eax
870; X64-AVX2-NEXT:    shldl $23, %esi, %eax
871; X64-AVX2-NEXT:    retq
872  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
873  ret i32 %f
874}
875
876; Check modulo math on shift amount. 41-32=9, but right-shift may became left, so 32-9=23.
877
878define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
879; X86-SSE2-LABEL: fshr_i32_const_overshift:
880; X86-SSE2:       # %bb.0:
881; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
882; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
883; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
884; X86-SSE2-NEXT:    retl
885;
886; X64-AVX2-LABEL: fshr_i32_const_overshift:
887; X64-AVX2:       # %bb.0:
888; X64-AVX2-NEXT:    movl %edi, %eax
889; X64-AVX2-NEXT:    shldl $23, %esi, %eax
890; X64-AVX2-NEXT:    retq
891  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
892  ret i32 %f
893}
894
895; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23.
896
897define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
898; X86-SSE2-LABEL: fshr_i64_const_overshift:
899; X86-SSE2:       # %bb.0:
900; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
901; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
902; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
903; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
904; X86-SSE2-NEXT:    shldl $23, %ecx, %edx
905; X86-SSE2-NEXT:    retl
906;
907; X64-AVX2-LABEL: fshr_i64_const_overshift:
908; X64-AVX2:       # %bb.0:
909; X64-AVX2-NEXT:    movq %rdi, %rax
910; X64-AVX2-NEXT:    shldq $23, %rsi, %rax
911; X64-AVX2-NEXT:    retq
912  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
913  ret i64 %f
914}
915
916; This should work without any node-specific logic.
917
918define i8 @fshr_i8_const_fold() nounwind {
919; CHECK-LABEL: fshr_i8_const_fold:
920; CHECK:       # %bb.0:
921; CHECK-NEXT:    movb $-2, %al
922; CHECK-NEXT:    ret{{[l|q]}}
923  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
924  ret i8 %f
925}
926
927define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
928; X86-SSE2-LABEL: fshl_i32_shift_by_bitwidth:
929; X86-SSE2:       # %bb.0:
930; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
931; X86-SSE2-NEXT:    retl
932;
933; X64-AVX2-LABEL: fshl_i32_shift_by_bitwidth:
934; X64-AVX2:       # %bb.0:
935; X64-AVX2-NEXT:    movl %edi, %eax
936; X64-AVX2-NEXT:    retq
937  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
938  ret i32 %f
939}
940
941define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
942; X86-SSE2-LABEL: fshr_i32_shift_by_bitwidth:
943; X86-SSE2:       # %bb.0:
944; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
945; X86-SSE2-NEXT:    retl
946;
947; X64-AVX2-LABEL: fshr_i32_shift_by_bitwidth:
948; X64-AVX2:       # %bb.0:
949; X64-AVX2-NEXT:    movl %esi, %eax
950; X64-AVX2-NEXT:    retq
951  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
952  ret i32 %f
953}
954
955define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
956; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
957; CHECK:       # %bb.0:
958; CHECK-NEXT:    ret{{[l|q]}}
959  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
960  ret <4 x i32> %f
961}
962
963define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
964; X86-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth:
965; X86-SSE2:       # %bb.0:
966; X86-SSE2-NEXT:    movaps %xmm1, %xmm0
967; X86-SSE2-NEXT:    retl
968;
969; X64-AVX2-LABEL: fshr_v4i32_shift_by_bitwidth:
970; X64-AVX2:       # %bb.0:
971; X64-AVX2-NEXT:    vmovaps %xmm1, %xmm0
972; X64-AVX2-NEXT:    retq
973  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
974  ret <4 x i32> %f
975}
976
977%struct.S = type { [11 x i8], i8 }
978define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
979; X86-SSE2-LABEL: PR45265:
980; X86-SSE2:       # %bb.0:
981; X86-SSE2-NEXT:    pushl %edi
982; X86-SSE2-NEXT:    pushl %esi
983; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
984; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
985; X86-SSE2-NEXT:    leal (%eax,%eax,2), %edx
986; X86-SSE2-NEXT:    movzwl 8(%ecx,%edx,4), %esi
987; X86-SSE2-NEXT:    movl 4(%ecx,%edx,4), %edi
988; X86-SSE2-NEXT:    shrdl $8, %esi, %edi
989; X86-SSE2-NEXT:    xorl %eax, %edi
990; X86-SSE2-NEXT:    sarl $31, %eax
991; X86-SSE2-NEXT:    movzbl 10(%ecx,%edx,4), %ecx
992; X86-SSE2-NEXT:    shll $16, %ecx
993; X86-SSE2-NEXT:    orl %esi, %ecx
994; X86-SSE2-NEXT:    shll $8, %ecx
995; X86-SSE2-NEXT:    movl %ecx, %edx
996; X86-SSE2-NEXT:    sarl $8, %edx
997; X86-SSE2-NEXT:    sarl $31, %ecx
998; X86-SSE2-NEXT:    shldl $24, %edx, %ecx
999; X86-SSE2-NEXT:    xorl %eax, %ecx
1000; X86-SSE2-NEXT:    orl %ecx, %edi
1001; X86-SSE2-NEXT:    jne .LBB46_1
1002; X86-SSE2-NEXT:  # %bb.2:
1003; X86-SSE2-NEXT:    popl %esi
1004; X86-SSE2-NEXT:    popl %edi
1005; X86-SSE2-NEXT:    jmp _Z3foov # TAILCALL
1006; X86-SSE2-NEXT:  .LBB46_1:
1007; X86-SSE2-NEXT:    popl %esi
1008; X86-SSE2-NEXT:    popl %edi
1009; X86-SSE2-NEXT:    retl
1010;
1011; X64-AVX2-LABEL: PR45265:
1012; X64-AVX2:       # %bb.0:
1013; X64-AVX2-NEXT:    movslq %edi, %rax
1014; X64-AVX2-NEXT:    leaq (%rax,%rax,2), %rcx
1015; X64-AVX2-NEXT:    movsbq 10(%rsi,%rcx,4), %rdx
1016; X64-AVX2-NEXT:    shlq $16, %rdx
1017; X64-AVX2-NEXT:    movzwl 8(%rsi,%rcx,4), %edi
1018; X64-AVX2-NEXT:    orq %rdx, %rdi
1019; X64-AVX2-NEXT:    movq (%rsi,%rcx,4), %rcx
1020; X64-AVX2-NEXT:    shrdq $40, %rdi, %rcx
1021; X64-AVX2-NEXT:    cmpq %rax, %rcx
1022; X64-AVX2-NEXT:    jne .LBB46_1
1023; X64-AVX2-NEXT:  # %bb.2:
1024; X64-AVX2-NEXT:    jmp _Z3foov # TAILCALL
1025; X64-AVX2-NEXT:  .LBB46_1:
1026; X64-AVX2-NEXT:    retq
1027  %3 = sext i32 %0 to i64
1028  %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3
1029  %5 = bitcast %struct.S* %4 to i88*
1030  %6 = load i88, i88* %5, align 1
1031  %7 = ashr i88 %6, 40
1032  %8 = trunc i88 %7 to i64
1033  %9 = icmp eq i64 %8, %3
1034  br i1 %9, label %10, label %11
1035
103610:
1037  tail call void @_Z3foov()
1038  br label %11
1039
104011:
1041  ret void
1042}
1043declare dso_local void @_Z3foov()
1044
1045define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind {
1046; X86-SSE2-LABEL: or_shl_fshl:
1047; X86-SSE2:       # %bb.0:
1048; X86-SSE2-NEXT:    pushl %esi
1049; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1050; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1051; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1052; X86-SSE2-NEXT:    movl %edx, %esi
1053; X86-SSE2-NEXT:    shll %cl, %esi
1054; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
1055; X86-SSE2-NEXT:    orl %esi, %eax
1056; X86-SSE2-NEXT:    popl %esi
1057; X86-SSE2-NEXT:    retl
1058;
1059; X64-AVX2-LABEL: or_shl_fshl:
1060; X64-AVX2:       # %bb.0:
1061; X64-AVX2-NEXT:    movl %edx, %ecx
1062; X64-AVX2-NEXT:    movl %esi, %eax
1063; X64-AVX2-NEXT:    shll %cl, %eax
1064; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1065; X64-AVX2-NEXT:    shldl %cl, %esi, %edi
1066; X64-AVX2-NEXT:    orl %edi, %eax
1067; X64-AVX2-NEXT:    retq
1068  %shy = shl i32 %y, %s
1069  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
1070  %or = or i32 %fun, %shy
1071  ret i32 %or
1072}
1073
1074define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind {
1075; X86-SSE2-LABEL: or_shl_rotl:
1076; X86-SSE2:       # %bb.0:
1077; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1078; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1079; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1080; X86-SSE2-NEXT:    shll %cl, %edx
1081; X86-SSE2-NEXT:    roll %cl, %eax
1082; X86-SSE2-NEXT:    orl %edx, %eax
1083; X86-SSE2-NEXT:    retl
1084;
1085; X64-AVX2-LABEL: or_shl_rotl:
1086; X64-AVX2:       # %bb.0:
1087; X64-AVX2-NEXT:    movl %edx, %ecx
1088; X64-AVX2-NEXT:    movl %esi, %eax
1089; X64-AVX2-NEXT:    shll %cl, %edi
1090; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1091; X64-AVX2-NEXT:    roll %cl, %eax
1092; X64-AVX2-NEXT:    orl %edi, %eax
1093; X64-AVX2-NEXT:    retq
1094  %shx = shl i32 %x, %s
1095  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
1096  %or = or i32 %rot, %shx
1097  ret i32 %or
1098}
1099
1100define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind {
1101; X86-SSE2-LABEL: or_shl_fshl_commute:
1102; X86-SSE2:       # %bb.0:
1103; X86-SSE2-NEXT:    pushl %esi
1104; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1105; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1106; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1107; X86-SSE2-NEXT:    movl %edx, %esi
1108; X86-SSE2-NEXT:    shll %cl, %esi
1109; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
1110; X86-SSE2-NEXT:    orl %esi, %eax
1111; X86-SSE2-NEXT:    popl %esi
1112; X86-SSE2-NEXT:    retl
1113;
1114; X64-AVX2-LABEL: or_shl_fshl_commute:
1115; X64-AVX2:       # %bb.0:
1116; X64-AVX2-NEXT:    movl %edx, %ecx
1117; X64-AVX2-NEXT:    movl %esi, %eax
1118; X64-AVX2-NEXT:    shll %cl, %eax
1119; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1120; X64-AVX2-NEXT:    shldl %cl, %esi, %edi
1121; X64-AVX2-NEXT:    orl %edi, %eax
1122; X64-AVX2-NEXT:    retq
1123  %shy = shl i32 %y, %s
1124  %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
1125  %or = or i32 %shy, %fun
1126  ret i32 %or
1127}
1128
1129define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind {
1130; X86-SSE2-LABEL: or_shl_rotl_commute:
1131; X86-SSE2:       # %bb.0:
1132; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1133; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1134; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1135; X86-SSE2-NEXT:    shll %cl, %edx
1136; X86-SSE2-NEXT:    roll %cl, %eax
1137; X86-SSE2-NEXT:    orl %edx, %eax
1138; X86-SSE2-NEXT:    retl
1139;
1140; X64-AVX2-LABEL: or_shl_rotl_commute:
1141; X64-AVX2:       # %bb.0:
1142; X64-AVX2-NEXT:    movl %edx, %ecx
1143; X64-AVX2-NEXT:    movl %esi, %eax
1144; X64-AVX2-NEXT:    shll %cl, %edi
1145; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1146; X64-AVX2-NEXT:    roll %cl, %eax
1147; X64-AVX2-NEXT:    orl %edi, %eax
1148; X64-AVX2-NEXT:    retq
1149  %shx = shl i32 %x, %s
1150  %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
1151  %or = or i32 %shx, %rot
1152  ret i32 %or
1153}
1154
1155define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind {
1156; X86-SSE2-LABEL: or_lshr_fshr:
1157; X86-SSE2:       # %bb.0:
1158; X86-SSE2-NEXT:    pushl %esi
1159; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1160; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1161; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1162; X86-SSE2-NEXT:    movl %edx, %esi
1163; X86-SSE2-NEXT:    shrl %cl, %esi
1164; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
1165; X86-SSE2-NEXT:    orl %esi, %eax
1166; X86-SSE2-NEXT:    popl %esi
1167; X86-SSE2-NEXT:    retl
1168;
1169; X64-AVX2-LABEL: or_lshr_fshr:
1170; X64-AVX2:       # %bb.0:
1171; X64-AVX2-NEXT:    movl %edx, %ecx
1172; X64-AVX2-NEXT:    movl %esi, %eax
1173; X64-AVX2-NEXT:    shrl %cl, %eax
1174; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1175; X64-AVX2-NEXT:    shrdl %cl, %esi, %edi
1176; X64-AVX2-NEXT:    orl %edi, %eax
1177; X64-AVX2-NEXT:    retq
1178  %shy = lshr i32 %y, %s
1179  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
1180  %or = or i32 %fun, %shy
1181  ret i32 %or
1182}
1183
1184define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind {
1185; X86-SSE2-LABEL: or_lshr_rotr:
1186; X86-SSE2:       # %bb.0:
1187; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1188; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1189; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1190; X86-SSE2-NEXT:    shrl %cl, %edx
1191; X86-SSE2-NEXT:    rorl %cl, %eax
1192; X86-SSE2-NEXT:    orl %edx, %eax
1193; X86-SSE2-NEXT:    retl
1194;
1195; X64-AVX2-LABEL: or_lshr_rotr:
1196; X64-AVX2:       # %bb.0:
1197; X64-AVX2-NEXT:    movl %edx, %ecx
1198; X64-AVX2-NEXT:    movl %esi, %eax
1199; X64-AVX2-NEXT:    shrl %cl, %edi
1200; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1201; X64-AVX2-NEXT:    rorl %cl, %eax
1202; X64-AVX2-NEXT:    orl %edi, %eax
1203; X64-AVX2-NEXT:    retq
1204  %shx = lshr i32 %x, %s
1205  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
1206  %or = or i32 %rot, %shx
1207  ret i32 %or
1208}
1209
1210define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind {
1211; X86-SSE2-LABEL: or_lshr_fshr_commute:
1212; X86-SSE2:       # %bb.0:
1213; X86-SSE2-NEXT:    pushl %esi
1214; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1215; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1216; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1217; X86-SSE2-NEXT:    movl %edx, %esi
1218; X86-SSE2-NEXT:    shrl %cl, %esi
1219; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
1220; X86-SSE2-NEXT:    orl %esi, %eax
1221; X86-SSE2-NEXT:    popl %esi
1222; X86-SSE2-NEXT:    retl
1223;
1224; X64-AVX2-LABEL: or_lshr_fshr_commute:
1225; X64-AVX2:       # %bb.0:
1226; X64-AVX2-NEXT:    movl %edx, %ecx
1227; X64-AVX2-NEXT:    movl %esi, %eax
1228; X64-AVX2-NEXT:    shrl %cl, %eax
1229; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1230; X64-AVX2-NEXT:    shrdl %cl, %esi, %edi
1231; X64-AVX2-NEXT:    orl %edi, %eax
1232; X64-AVX2-NEXT:    retq
1233  %shy = lshr i32 %y, %s
1234  %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
1235  %or = or i32 %shy, %fun
1236  ret i32 %or
1237}
1238
1239define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
1240; X86-SSE2-LABEL: or_lshr_rotr_commute:
1241; X86-SSE2:       # %bb.0:
1242; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1243; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1244; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1245; X86-SSE2-NEXT:    shrl %cl, %edx
1246; X86-SSE2-NEXT:    rorl %cl, %eax
1247; X86-SSE2-NEXT:    orl %edx, %eax
1248; X86-SSE2-NEXT:    retl
1249;
1250; X64-AVX2-LABEL: or_lshr_rotr_commute:
1251; X64-AVX2:       # %bb.0:
1252; X64-AVX2-NEXT:    movl %edx, %ecx
1253; X64-AVX2-NEXT:    movl %esi, %eax
1254; X64-AVX2-NEXT:    shrl %cl, %edi
1255; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1256; X64-AVX2-NEXT:    rorl %cl, %eax
1257; X64-AVX2-NEXT:    orl %edi, %eax
1258; X64-AVX2-NEXT:    retq
1259  %shx = lshr i32 %x, %s
1260  %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
1261  %or = or i32 %shx, %rot
1262  ret i32 %or
1263}
1264
1265define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
1266; X86-SSE2-LABEL: or_shl_fshl_simplify:
1267; X86-SSE2:       # %bb.0:
1268; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1269; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1270; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1271; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
1272; X86-SSE2-NEXT:    retl
1273;
1274; X64-AVX2-LABEL: or_shl_fshl_simplify:
1275; X64-AVX2:       # %bb.0:
1276; X64-AVX2-NEXT:    movl %edx, %ecx
1277; X64-AVX2-NEXT:    movl %esi, %eax
1278; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1279; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
1280; X64-AVX2-NEXT:    retq
1281  %shy = shl i32 %y, %s
1282  %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
1283  %or = or i32 %fun, %shy
1284  ret i32 %or
1285}
1286
1287define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
1288; X86-SSE2-LABEL: or_lshr_fshr_simplify:
1289; X86-SSE2:       # %bb.0:
1290; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
1291; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
1292; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1293; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
1294; X86-SSE2-NEXT:    retl
1295;
1296; X64-AVX2-LABEL: or_lshr_fshr_simplify:
1297; X64-AVX2:       # %bb.0:
1298; X64-AVX2-NEXT:    movl %edx, %ecx
1299; X64-AVX2-NEXT:    movl %esi, %eax
1300; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
1301; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
1302; X64-AVX2-NEXT:    retq
1303  %shy = lshr i32 %y, %s
1304  %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
1305  %or = or i32 %shy, %fun
1306  ret i32 %or
1307}
1308