1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5; These tests just check that the plumbing is in place for @llvm.bitreverse. The
6; actual output is massive at the moment as llvm.bitreverse is not yet legal.
7
8declare i32 @llvm.bitreverse.i32(i32) readnone
9declare i64 @llvm.bitreverse.i64(i64) readnone
10declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone
11
12; fold (bitreverse undef) -> undef
13define i32 @test_undef() nounwind {
14; X86-LABEL: test_undef:
15; X86:       # %bb.0:
16; X86-NEXT:    retl
17;
18; X64-LABEL: test_undef:
19; X64:       # %bb.0:
20; X64-NEXT:    retq
21  %b = call i32 @llvm.bitreverse.i32(i32 undef)
22  ret i32 %b
23}
24
25; fold (bitreverse (bitreverse x)) -> x
26define i32 @test_bitreverse_bitreverse(i32 %a0) nounwind {
27; X86-LABEL: test_bitreverse_bitreverse:
28; X86:       # %bb.0:
29; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
30; X86-NEXT:    retl
31;
32; X64-LABEL: test_bitreverse_bitreverse:
33; X64:       # %bb.0:
34; X64-NEXT:    movl %edi, %eax
35; X64-NEXT:    retq
36  %b = call i32 @llvm.bitreverse.i32(i32 %a0)
37  %c = call i32 @llvm.bitreverse.i32(i32 %b)
38  ret i32 %c
39}
40
41; TODO: fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x)
42define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind {
43; X86-LABEL: test_bitreverse_srli_bitreverse:
44; X86:       # %bb.0:
45; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
46; X86-NEXT:    bswapl %eax
47; X86-NEXT:    movl %eax, %ecx
48; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
49; X86-NEXT:    shll $4, %ecx
50; X86-NEXT:    shrl $4, %eax
51; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
52; X86-NEXT:    orl %ecx, %eax
53; X86-NEXT:    movl %eax, %ecx
54; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
55; X86-NEXT:    shrl $2, %eax
56; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
57; X86-NEXT:    leal (%eax,%ecx,4), %eax
58; X86-NEXT:    movl %eax, %ecx
59; X86-NEXT:    andl $1431655744, %ecx # imm = 0x55555540
60; X86-NEXT:    shrl %eax
61; X86-NEXT:    andl $1431655680, %eax # imm = 0x55555500
62; X86-NEXT:    leal (%eax,%ecx,2), %eax
63; X86-NEXT:    shrl $7, %eax
64; X86-NEXT:    bswapl %eax
65; X86-NEXT:    movl %eax, %ecx
66; X86-NEXT:    andl $252645121, %ecx # imm = 0xF0F0F01
67; X86-NEXT:    shll $4, %ecx
68; X86-NEXT:    shrl $4, %eax
69; X86-NEXT:    andl $252645120, %eax # imm = 0xF0F0F00
70; X86-NEXT:    orl %ecx, %eax
71; X86-NEXT:    movl %eax, %ecx
72; X86-NEXT:    andl $858993424, %ecx # imm = 0x33333310
73; X86-NEXT:    shrl $2, %eax
74; X86-NEXT:    andl $858993408, %eax # imm = 0x33333300
75; X86-NEXT:    leal (%eax,%ecx,4), %eax
76; X86-NEXT:    movl %eax, %ecx
77; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
78; X86-NEXT:    shrl %eax
79; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
80; X86-NEXT:    leal (%eax,%ecx,2), %eax
81; X86-NEXT:    retl
82;
83; X64-LABEL: test_bitreverse_srli_bitreverse:
84; X64:       # %bb.0:
85; X64-NEXT:    # kill: def $edi killed $edi def $rdi
86; X64-NEXT:    bswapl %edi
87; X64-NEXT:    movl %edi, %eax
88; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
89; X64-NEXT:    shll $4, %eax
90; X64-NEXT:    shrl $4, %edi
91; X64-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
92; X64-NEXT:    orl %eax, %edi
93; X64-NEXT:    movl %edi, %eax
94; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
95; X64-NEXT:    shrl $2, %edi
96; X64-NEXT:    andl $858993459, %edi # imm = 0x33333333
97; X64-NEXT:    leal (%rdi,%rax,4), %eax
98; X64-NEXT:    movl %eax, %ecx
99; X64-NEXT:    andl $1431655744, %ecx # imm = 0x55555540
100; X64-NEXT:    shrl %eax
101; X64-NEXT:    andl $1431655680, %eax # imm = 0x55555500
102; X64-NEXT:    leal (%rax,%rcx,2), %eax
103; X64-NEXT:    shrl $7, %eax
104; X64-NEXT:    bswapl %eax
105; X64-NEXT:    movl %eax, %ecx
106; X64-NEXT:    andl $252645121, %ecx # imm = 0xF0F0F01
107; X64-NEXT:    shll $4, %ecx
108; X64-NEXT:    shrl $4, %eax
109; X64-NEXT:    andl $252645120, %eax # imm = 0xF0F0F00
110; X64-NEXT:    orl %ecx, %eax
111; X64-NEXT:    movl %eax, %ecx
112; X64-NEXT:    andl $858993424, %ecx # imm = 0x33333310
113; X64-NEXT:    shrl $2, %eax
114; X64-NEXT:    andl $858993408, %eax # imm = 0x33333300
115; X64-NEXT:    leal (%rax,%rcx,4), %eax
116; X64-NEXT:    movl %eax, %ecx
117; X64-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
118; X64-NEXT:    shrl %eax
119; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
120; X64-NEXT:    leal (%rax,%rcx,2), %eax
121; X64-NEXT:    retq
122  %b = call i32 @llvm.bitreverse.i32(i32 %a0)
123  %c = lshr i32 %b, 7
124  %d = call i32 @llvm.bitreverse.i32(i32 %c)
125  ret i32 %d
126}
127
128define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind {
129; X86-LABEL: test_bitreverse_srli_bitreverse_i64:
130; X86:       # %bb.0:
131; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
132; X86-NEXT:    bswapl %eax
133; X86-NEXT:    movl %eax, %ecx
134; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
135; X86-NEXT:    shll $4, %ecx
136; X86-NEXT:    shrl $4, %eax
137; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
138; X86-NEXT:    orl %ecx, %eax
139; X86-NEXT:    movl %eax, %ecx
140; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
141; X86-NEXT:    shrl $2, %eax
142; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
143; X86-NEXT:    leal (%eax,%ecx,4), %eax
144; X86-NEXT:    movl %eax, %ecx
145; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
146; X86-NEXT:    shrl %eax
147; X86-NEXT:    andl $1431655764, %eax # imm = 0x55555554
148; X86-NEXT:    leal (%eax,%ecx,2), %eax
149; X86-NEXT:    shrl %eax
150; X86-NEXT:    bswapl %eax
151; X86-NEXT:    movl %eax, %ecx
152; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
153; X86-NEXT:    shll $4, %ecx
154; X86-NEXT:    shrl $4, %eax
155; X86-NEXT:    andl $252645127, %eax # imm = 0xF0F0F07
156; X86-NEXT:    orl %ecx, %eax
157; X86-NEXT:    movl %eax, %ecx
158; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
159; X86-NEXT:    shrl $2, %eax
160; X86-NEXT:    andl $858993457, %eax # imm = 0x33333331
161; X86-NEXT:    leal (%eax,%ecx,4), %eax
162; X86-NEXT:    movl %eax, %ecx
163; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
164; X86-NEXT:    shrl %eax
165; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
166; X86-NEXT:    leal (%eax,%ecx,2), %edx
167; X86-NEXT:    xorl %eax, %eax
168; X86-NEXT:    retl
169;
170; X64-LABEL: test_bitreverse_srli_bitreverse_i64:
171; X64:       # %bb.0:
172; X64-NEXT:    bswapq %rdi
173; X64-NEXT:    movq %rdi, %rax
174; X64-NEXT:    shrq $4, %rax
175; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
176; X64-NEXT:    andq %rcx, %rax
177; X64-NEXT:    andq %rcx, %rdi
178; X64-NEXT:    shlq $4, %rdi
179; X64-NEXT:    orq %rax, %rdi
180; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
181; X64-NEXT:    movq %rdi, %rcx
182; X64-NEXT:    andq %rax, %rcx
183; X64-NEXT:    shrq $2, %rdi
184; X64-NEXT:    andq %rax, %rdi
185; X64-NEXT:    leaq (%rdi,%rcx,4), %rax
186; X64-NEXT:    movabsq $6148914689804861440, %rcx # imm = 0x5555555500000000
187; X64-NEXT:    andq %rax, %rcx
188; X64-NEXT:    shrq %rax
189; X64-NEXT:    movabsq $6148914685509894144, %rdx # imm = 0x5555555400000000
190; X64-NEXT:    andq %rax, %rdx
191; X64-NEXT:    leaq (%rdx,%rcx,2), %rax
192; X64-NEXT:    shrq $33, %rax
193; X64-NEXT:    bswapq %rax
194; X64-NEXT:    movabsq $1085102592318504960, %rcx # imm = 0xF0F0F0F00000000
195; X64-NEXT:    andq %rax, %rcx
196; X64-NEXT:    shrq $4, %rax
197; X64-NEXT:    movabsq $1085102557958766592, %rdx # imm = 0xF0F0F0700000000
198; X64-NEXT:    andq %rax, %rdx
199; X64-NEXT:    shlq $4, %rcx
200; X64-NEXT:    orq %rdx, %rcx
201; X64-NEXT:    movabsq $3689348813882916864, %rax # imm = 0x3333333300000000
202; X64-NEXT:    andq %rcx, %rax
203; X64-NEXT:    shrq $2, %rcx
204; X64-NEXT:    movabsq $3689348805292982272, %rdx # imm = 0x3333333100000000
205; X64-NEXT:    andq %rcx, %rdx
206; X64-NEXT:    leaq (%rdx,%rax,4), %rax
207; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
208; X64-NEXT:    movq %rax, %rdx
209; X64-NEXT:    andq %rcx, %rdx
210; X64-NEXT:    shrq %rax
211; X64-NEXT:    andq %rcx, %rax
212; X64-NEXT:    leaq (%rax,%rdx,2), %rax
213; X64-NEXT:    retq
214    %1 = call i64 @llvm.bitreverse.i64(i64 %a)
215    %2 = lshr i64 %1, 33
216    %3 = call i64 @llvm.bitreverse.i64(i64 %2)
217    ret i64 %3
218}
219
220; TODO: fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x)
221define i32 @test_bitreverse_shli_bitreverse(i32 %a0) nounwind {
222; X86-LABEL: test_bitreverse_shli_bitreverse:
223; X86:       # %bb.0:
224; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
225; X86-NEXT:    bswapl %eax
226; X86-NEXT:    movl %eax, %ecx
227; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
228; X86-NEXT:    shll $4, %ecx
229; X86-NEXT:    shrl $4, %eax
230; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
231; X86-NEXT:    orl %ecx, %eax
232; X86-NEXT:    movl %eax, %ecx
233; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
234; X86-NEXT:    shrl $2, %eax
235; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
236; X86-NEXT:    leal (%eax,%ecx,4), %eax
237; X86-NEXT:    movl %eax, %ecx
238; X86-NEXT:    andl $5592405, %ecx # imm = 0x555555
239; X86-NEXT:    shrl %eax
240; X86-NEXT:    andl $22369621, %eax # imm = 0x1555555
241; X86-NEXT:    leal (%eax,%ecx,2), %eax
242; X86-NEXT:    shll $7, %eax
243; X86-NEXT:    bswapl %eax
244; X86-NEXT:    movl %eax, %ecx
245; X86-NEXT:    andl $986895, %ecx # imm = 0xF0F0F
246; X86-NEXT:    shll $4, %ecx
247; X86-NEXT:    shrl $4, %eax
248; X86-NEXT:    andl $135204623, %eax # imm = 0x80F0F0F
249; X86-NEXT:    orl %ecx, %eax
250; X86-NEXT:    movl %eax, %ecx
251; X86-NEXT:    andl $3355443, %ecx # imm = 0x333333
252; X86-NEXT:    shrl $2, %eax
253; X86-NEXT:    andl $36909875, %eax # imm = 0x2333333
254; X86-NEXT:    leal (%eax,%ecx,4), %eax
255; X86-NEXT:    movl %eax, %ecx
256; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
257; X86-NEXT:    shrl %eax
258; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
259; X86-NEXT:    leal (%eax,%ecx,2), %eax
260; X86-NEXT:    retl
261;
262; X64-LABEL: test_bitreverse_shli_bitreverse:
263; X64:       # %bb.0:
264; X64-NEXT:    # kill: def $edi killed $edi def $rdi
265; X64-NEXT:    bswapl %edi
266; X64-NEXT:    movl %edi, %eax
267; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
268; X64-NEXT:    shll $4, %eax
269; X64-NEXT:    shrl $4, %edi
270; X64-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
271; X64-NEXT:    orl %eax, %edi
272; X64-NEXT:    movl %edi, %eax
273; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
274; X64-NEXT:    shrl $2, %edi
275; X64-NEXT:    andl $858993459, %edi # imm = 0x33333333
276; X64-NEXT:    leal (%rdi,%rax,4), %eax
277; X64-NEXT:    movl %eax, %ecx
278; X64-NEXT:    andl $5592405, %ecx # imm = 0x555555
279; X64-NEXT:    shrl %eax
280; X64-NEXT:    andl $22369621, %eax # imm = 0x1555555
281; X64-NEXT:    leal (%rax,%rcx,2), %eax
282; X64-NEXT:    shll $7, %eax
283; X64-NEXT:    bswapl %eax
284; X64-NEXT:    movl %eax, %ecx
285; X64-NEXT:    andl $986895, %ecx # imm = 0xF0F0F
286; X64-NEXT:    shll $4, %ecx
287; X64-NEXT:    shrl $4, %eax
288; X64-NEXT:    andl $135204623, %eax # imm = 0x80F0F0F
289; X64-NEXT:    orl %ecx, %eax
290; X64-NEXT:    movl %eax, %ecx
291; X64-NEXT:    andl $3355443, %ecx # imm = 0x333333
292; X64-NEXT:    shrl $2, %eax
293; X64-NEXT:    andl $36909875, %eax # imm = 0x2333333
294; X64-NEXT:    leal (%rax,%rcx,4), %eax
295; X64-NEXT:    movl %eax, %ecx
296; X64-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
297; X64-NEXT:    shrl %eax
298; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
299; X64-NEXT:    leal (%rax,%rcx,2), %eax
300; X64-NEXT:    retq
301  %b = call i32 @llvm.bitreverse.i32(i32 %a0)
302  %c = shl i32 %b, 7
303  %d = call i32 @llvm.bitreverse.i32(i32 %c)
304  ret i32 %d
305}
306
307define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind {
308; X86-LABEL: test_bitreverse_shli_bitreverse_i64:
309; X86:       # %bb.0:
310; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
311; X86-NEXT:    bswapl %eax
312; X86-NEXT:    movl %eax, %ecx
313; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
314; X86-NEXT:    shll $4, %ecx
315; X86-NEXT:    shrl $4, %eax
316; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
317; X86-NEXT:    orl %ecx, %eax
318; X86-NEXT:    movl %eax, %ecx
319; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
320; X86-NEXT:    shrl $2, %eax
321; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
322; X86-NEXT:    leal (%eax,%ecx,4), %eax
323; X86-NEXT:    movl %eax, %ecx
324; X86-NEXT:    andl $357913941, %ecx # imm = 0x15555555
325; X86-NEXT:    shrl %eax
326; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
327; X86-NEXT:    leal (%eax,%ecx,2), %eax
328; X86-NEXT:    addl %eax, %eax
329; X86-NEXT:    bswapl %eax
330; X86-NEXT:    movl %eax, %ecx
331; X86-NEXT:    andl $235867919, %ecx # imm = 0xE0F0F0F
332; X86-NEXT:    shll $4, %ecx
333; X86-NEXT:    shrl $4, %eax
334; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
335; X86-NEXT:    orl %ecx, %eax
336; X86-NEXT:    movl %eax, %ecx
337; X86-NEXT:    andl $590558003, %ecx # imm = 0x23333333
338; X86-NEXT:    shrl $2, %eax
339; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
340; X86-NEXT:    leal (%eax,%ecx,4), %eax
341; X86-NEXT:    movl %eax, %ecx
342; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
343; X86-NEXT:    shrl %eax
344; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
345; X86-NEXT:    leal (%eax,%ecx,2), %eax
346; X86-NEXT:    xorl %edx, %edx
347; X86-NEXT:    retl
348;
349; X64-LABEL: test_bitreverse_shli_bitreverse_i64:
350; X64:       # %bb.0:
351; X64-NEXT:    bswapq %rdi
352; X64-NEXT:    movq %rdi, %rax
353; X64-NEXT:    shrq $4, %rax
354; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
355; X64-NEXT:    andq %rcx, %rax
356; X64-NEXT:    andq %rcx, %rdi
357; X64-NEXT:    shlq $4, %rdi
358; X64-NEXT:    orq %rax, %rdi
359; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
360; X64-NEXT:    movq %rdi, %rcx
361; X64-NEXT:    andq %rax, %rcx
362; X64-NEXT:    shrq $2, %rdi
363; X64-NEXT:    andq %rax, %rdi
364; X64-NEXT:    leaq (%rdi,%rcx,4), %rax
365; X64-NEXT:    movl %eax, %ecx
366; X64-NEXT:    andl $357913941, %ecx # imm = 0x15555555
367; X64-NEXT:    shrl %eax
368; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
369; X64-NEXT:    leal (%rax,%rcx,2), %eax
370; X64-NEXT:    shlq $33, %rax
371; X64-NEXT:    bswapq %rax
372; X64-NEXT:    movl %eax, %ecx
373; X64-NEXT:    andl $235867919, %ecx # imm = 0xE0F0F0F
374; X64-NEXT:    shlq $4, %rcx
375; X64-NEXT:    shrq $4, %rax
376; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
377; X64-NEXT:    orq %rcx, %rax
378; X64-NEXT:    movl %eax, %ecx
379; X64-NEXT:    andl $590558003, %ecx # imm = 0x23333333
380; X64-NEXT:    shrq $2, %rax
381; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
382; X64-NEXT:    leaq (%rax,%rcx,4), %rax
383; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
384; X64-NEXT:    movq %rax, %rdx
385; X64-NEXT:    andq %rcx, %rdx
386; X64-NEXT:    shrq %rax
387; X64-NEXT:    andq %rcx, %rax
388; X64-NEXT:    leaq (%rax,%rdx,2), %rax
389; X64-NEXT:    retq
390    %1 = call i64 @llvm.bitreverse.i64(i64 %a)
391    %2 = shl i64 %1, 33
392    %3 = call i64 @llvm.bitreverse.i64(i64 %2)
393    ret i64 %3
394}
395
396define <4 x i32> @test_demandedbits_bitreverse(<4 x i32> %a0) nounwind {
397; X86-LABEL: test_demandedbits_bitreverse:
398; X86:       # %bb.0:
399; X86-NEXT:    pxor %xmm1, %xmm1
400; X86-NEXT:    movdqa %xmm0, %xmm2
401; X86-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
402; X86-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
403; X86-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
404; X86-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
405; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
406; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
407; X86-NEXT:    packuswb %xmm2, %xmm0
408; X86-NEXT:    movdqa %xmm0, %xmm1
409; X86-NEXT:    psrlw $4, %xmm1
410; X86-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
411; X86-NEXT:    pand %xmm2, %xmm1
412; X86-NEXT:    pand %xmm2, %xmm0
413; X86-NEXT:    psllw $4, %xmm0
414; X86-NEXT:    por %xmm1, %xmm0
415; X86-NEXT:    movdqa %xmm0, %xmm1
416; X86-NEXT:    psrlw $2, %xmm1
417; X86-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
418; X86-NEXT:    pand %xmm2, %xmm1
419; X86-NEXT:    pand %xmm2, %xmm0
420; X86-NEXT:    psllw $2, %xmm0
421; X86-NEXT:    por %xmm1, %xmm0
422; X86-NEXT:    movdqa %xmm0, %xmm1
423; X86-NEXT:    psrlw $1, %xmm1
424; X86-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
425; X86-NEXT:    pand %xmm2, %xmm1
426; X86-NEXT:    pand %xmm2, %xmm0
427; X86-NEXT:    paddb %xmm0, %xmm0
428; X86-NEXT:    por %xmm1, %xmm0
429; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
430; X86-NEXT:    retl
431;
432; X64-LABEL: test_demandedbits_bitreverse:
433; X64:       # %bb.0:
434; X64-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
435; X64-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
436; X64-NEXT:    vpand %xmm1, %xmm0, %xmm2
437; X64-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
438; X64-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
439; X64-NEXT:    vpsrlw $4, %xmm0, %xmm0
440; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
441; X64-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15]
442; X64-NEXT:    vpshufb %xmm0, %xmm1, %xmm0
443; X64-NEXT:    vpor %xmm0, %xmm2, %xmm0
444; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
445; X64-NEXT:    retq
446  %b = or <4 x i32> %a0, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
447  %c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %b)
448  %d = and <4 x i32> %c, <i32 -2, i32 -2, i32 -2, i32 -2>
449  ret <4 x i32> %d
450}
451