1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2,-sse4.2 | FileCheck %s --check-prefixes=GPR,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.2,-avx  | FileCheck %s --check-prefixes=GPR,SSE4
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx,-avx512f | FileCheck %s --check-prefixes=GPR,AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f      | FileCheck %s --check-prefixes=GPR,AVX512
6
7declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
8declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind
9
10; /////////////////////////////////////////////////////////////////////////////
11
12define void @memset_1(ptr %a, i8 %value) nounwind {
13; GPR-LABEL: memset_1:
14; GPR:       # %bb.0:
15; GPR-NEXT:    movb %sil, (%rdi)
16; GPR-NEXT:    retq
17  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1, i1 0)
18  ret void
19}
20
21define void @memset_2(ptr %a, i8 %value) nounwind {
22; GPR-LABEL: memset_2:
23; GPR:       # %bb.0:
24; GPR-NEXT:    movzbl %sil, %eax
25; GPR-NEXT:    shll $8, %esi
26; GPR-NEXT:    orl %esi, %eax
27; GPR-NEXT:    movw %ax, (%rdi)
28; GPR-NEXT:    retq
29  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 2, i1 0)
30  ret void
31}
32
33define void @memset_4(ptr %a, i8 %value) nounwind {
34; GPR-LABEL: memset_4:
35; GPR:       # %bb.0:
36; GPR-NEXT:    movzbl %sil, %eax
37; GPR-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
38; GPR-NEXT:    movl %eax, (%rdi)
39; GPR-NEXT:    retq
40  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 4, i1 0)
41  ret void
42}
43
44define void @memset_8(ptr %a, i8 %value) nounwind {
45; GPR-LABEL: memset_8:
46; GPR:       # %bb.0:
47; GPR-NEXT:    # kill: def $esi killed $esi def $rsi
48; GPR-NEXT:    movzbl %sil, %eax
49; GPR-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
50; GPR-NEXT:    imulq %rax, %rcx
51; GPR-NEXT:    movq %rcx, (%rdi)
52; GPR-NEXT:    retq
53  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 8, i1 0)
54  ret void
55}
56
57define void @memset_16(ptr %a, i8 %value) nounwind {
58; SSE2-LABEL: memset_16:
59; SSE2:       # %bb.0:
60; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
61; SSE2-NEXT:    movzbl %sil, %eax
62; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
63; SSE2-NEXT:    imulq %rax, %rcx
64; SSE2-NEXT:    movq %rcx, 8(%rdi)
65; SSE2-NEXT:    movq %rcx, (%rdi)
66; SSE2-NEXT:    retq
67;
68; SSE4-LABEL: memset_16:
69; SSE4:       # %bb.0:
70; SSE4-NEXT:    movd %esi, %xmm0
71; SSE4-NEXT:    pxor %xmm1, %xmm1
72; SSE4-NEXT:    pshufb %xmm1, %xmm0
73; SSE4-NEXT:    movdqu %xmm0, (%rdi)
74; SSE4-NEXT:    retq
75;
76; AVX-LABEL: memset_16:
77; AVX:       # %bb.0:
78; AVX-NEXT:    vmovd %esi, %xmm0
79; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
80; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
81; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
82; AVX-NEXT:    retq
83;
84; AVX512-LABEL: memset_16:
85; AVX512:       # %bb.0:
86; AVX512-NEXT:    vmovd %esi, %xmm0
87; AVX512-NEXT:    vpbroadcastb %xmm0, %xmm0
88; AVX512-NEXT:    vmovdqu %xmm0, (%rdi)
89; AVX512-NEXT:    retq
90  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0)
91  ret void
92}
93
94define void @memset_32(ptr %a, i8 %value) nounwind {
95; SSE2-LABEL: memset_32:
96; SSE2:       # %bb.0:
97; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
98; SSE2-NEXT:    movzbl %sil, %eax
99; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
100; SSE2-NEXT:    imulq %rax, %rcx
101; SSE2-NEXT:    movq %rcx, 24(%rdi)
102; SSE2-NEXT:    movq %rcx, 16(%rdi)
103; SSE2-NEXT:    movq %rcx, 8(%rdi)
104; SSE2-NEXT:    movq %rcx, (%rdi)
105; SSE2-NEXT:    retq
106;
107; SSE4-LABEL: memset_32:
108; SSE4:       # %bb.0:
109; SSE4-NEXT:    movd %esi, %xmm0
110; SSE4-NEXT:    pxor %xmm1, %xmm1
111; SSE4-NEXT:    pshufb %xmm1, %xmm0
112; SSE4-NEXT:    movdqu %xmm0, 16(%rdi)
113; SSE4-NEXT:    movdqu %xmm0, (%rdi)
114; SSE4-NEXT:    retq
115;
116; AVX-LABEL: memset_32:
117; AVX:       # %bb.0:
118; AVX-NEXT:    vmovd %esi, %xmm0
119; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
120; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
121; AVX-NEXT:    vmovdqu %xmm0, 16(%rdi)
122; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
123; AVX-NEXT:    retq
124;
125; AVX512-LABEL: memset_32:
126; AVX512:       # %bb.0:
127; AVX512-NEXT:    vmovd %esi, %xmm0
128; AVX512-NEXT:    vpbroadcastb %xmm0, %ymm0
129; AVX512-NEXT:    vmovdqu %ymm0, (%rdi)
130; AVX512-NEXT:    vzeroupper
131; AVX512-NEXT:    retq
132  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0)
133  ret void
134}
135
136define void @memset_64(ptr %a, i8 %value) nounwind {
137; SSE2-LABEL: memset_64:
138; SSE2:       # %bb.0:
139; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
140; SSE2-NEXT:    movzbl %sil, %eax
141; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
142; SSE2-NEXT:    imulq %rax, %rcx
143; SSE2-NEXT:    movq %rcx, 56(%rdi)
144; SSE2-NEXT:    movq %rcx, 48(%rdi)
145; SSE2-NEXT:    movq %rcx, 40(%rdi)
146; SSE2-NEXT:    movq %rcx, 32(%rdi)
147; SSE2-NEXT:    movq %rcx, 24(%rdi)
148; SSE2-NEXT:    movq %rcx, 16(%rdi)
149; SSE2-NEXT:    movq %rcx, 8(%rdi)
150; SSE2-NEXT:    movq %rcx, (%rdi)
151; SSE2-NEXT:    retq
152;
153; SSE4-LABEL: memset_64:
154; SSE4:       # %bb.0:
155; SSE4-NEXT:    movd %esi, %xmm0
156; SSE4-NEXT:    pxor %xmm1, %xmm1
157; SSE4-NEXT:    pshufb %xmm1, %xmm0
158; SSE4-NEXT:    movdqu %xmm0, 48(%rdi)
159; SSE4-NEXT:    movdqu %xmm0, 32(%rdi)
160; SSE4-NEXT:    movdqu %xmm0, 16(%rdi)
161; SSE4-NEXT:    movdqu %xmm0, (%rdi)
162; SSE4-NEXT:    retq
163;
164; AVX-LABEL: memset_64:
165; AVX:       # %bb.0:
166; AVX-NEXT:    vmovd %esi, %xmm0
167; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
168; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
169; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
170; AVX-NEXT:    vmovups %ymm0, 32(%rdi)
171; AVX-NEXT:    vmovups %ymm0, (%rdi)
172; AVX-NEXT:    vzeroupper
173; AVX-NEXT:    retq
174;
175; AVX512-LABEL: memset_64:
176; AVX512:       # %bb.0:
177; AVX512-NEXT:    movzbl %sil, %eax
178; AVX512-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
179; AVX512-NEXT:    vpbroadcastd %eax, %zmm0
180; AVX512-NEXT:    vmovdqu64 %zmm0, (%rdi)
181; AVX512-NEXT:    vzeroupper
182; AVX512-NEXT:    retq
183  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0)
184  ret void
185}
186
187; /////////////////////////////////////////////////////////////////////////////
188
189define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
190; SSE2-LABEL: aligned_memset_16:
191; SSE2:       # %bb.0:
192; SSE2-NEXT:    movd %esi, %xmm0
193; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
194; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
195; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
196; SSE2-NEXT:    movdqa %xmm0, (%rdi)
197; SSE2-NEXT:    retq
198;
199; SSE4-LABEL: aligned_memset_16:
200; SSE4:       # %bb.0:
201; SSE4-NEXT:    movd %esi, %xmm0
202; SSE4-NEXT:    pxor %xmm1, %xmm1
203; SSE4-NEXT:    pshufb %xmm1, %xmm0
204; SSE4-NEXT:    movdqa %xmm0, (%rdi)
205; SSE4-NEXT:    retq
206;
207; AVX-LABEL: aligned_memset_16:
208; AVX:       # %bb.0:
209; AVX-NEXT:    vmovd %esi, %xmm0
210; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
211; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
212; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
213; AVX-NEXT:    retq
214;
215; AVX512-LABEL: aligned_memset_16:
216; AVX512:       # %bb.0:
217; AVX512-NEXT:    vmovd %esi, %xmm0
218; AVX512-NEXT:    vpbroadcastb %xmm0, %xmm0
219; AVX512-NEXT:    vmovdqa %xmm0, (%rdi)
220; AVX512-NEXT:    retq
221  tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0)
222  ret void
223}
224
225define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
226; SSE2-LABEL: aligned_memset_32:
227; SSE2:       # %bb.0:
228; SSE2-NEXT:    movd %esi, %xmm0
229; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
230; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
231; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
232; SSE2-NEXT:    movdqa %xmm0, 16(%rdi)
233; SSE2-NEXT:    movdqa %xmm0, (%rdi)
234; SSE2-NEXT:    retq
235;
236; SSE4-LABEL: aligned_memset_32:
237; SSE4:       # %bb.0:
238; SSE4-NEXT:    movd %esi, %xmm0
239; SSE4-NEXT:    pxor %xmm1, %xmm1
240; SSE4-NEXT:    pshufb %xmm1, %xmm0
241; SSE4-NEXT:    movdqa %xmm0, 16(%rdi)
242; SSE4-NEXT:    movdqa %xmm0, (%rdi)
243; SSE4-NEXT:    retq
244;
245; AVX-LABEL: aligned_memset_32:
246; AVX:       # %bb.0:
247; AVX-NEXT:    vmovd %esi, %xmm0
248; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
249; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
250; AVX-NEXT:    vmovdqa %xmm0, 16(%rdi)
251; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
252; AVX-NEXT:    retq
253;
254; AVX512-LABEL: aligned_memset_32:
255; AVX512:       # %bb.0:
256; AVX512-NEXT:    vmovd %esi, %xmm0
257; AVX512-NEXT:    vpbroadcastb %xmm0, %ymm0
258; AVX512-NEXT:    vmovdqa %ymm0, (%rdi)
259; AVX512-NEXT:    vzeroupper
260; AVX512-NEXT:    retq
261  tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0)
262  ret void
263}
264
265define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
266; SSE2-LABEL: aligned_memset_64:
267; SSE2:       # %bb.0:
268; SSE2-NEXT:    movd %esi, %xmm0
269; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
270; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
271; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
272; SSE2-NEXT:    movdqa %xmm0, 48(%rdi)
273; SSE2-NEXT:    movdqa %xmm0, 32(%rdi)
274; SSE2-NEXT:    movdqa %xmm0, 16(%rdi)
275; SSE2-NEXT:    movdqa %xmm0, (%rdi)
276; SSE2-NEXT:    retq
277;
278; SSE4-LABEL: aligned_memset_64:
279; SSE4:       # %bb.0:
280; SSE4-NEXT:    movd %esi, %xmm0
281; SSE4-NEXT:    pxor %xmm1, %xmm1
282; SSE4-NEXT:    pshufb %xmm1, %xmm0
283; SSE4-NEXT:    movdqa %xmm0, 48(%rdi)
284; SSE4-NEXT:    movdqa %xmm0, 32(%rdi)
285; SSE4-NEXT:    movdqa %xmm0, 16(%rdi)
286; SSE4-NEXT:    movdqa %xmm0, (%rdi)
287; SSE4-NEXT:    retq
288;
289; AVX-LABEL: aligned_memset_64:
290; AVX:       # %bb.0:
291; AVX-NEXT:    vmovd %esi, %xmm0
292; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
293; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
294; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
295; AVX-NEXT:    vmovaps %ymm0, 32(%rdi)
296; AVX-NEXT:    vmovaps %ymm0, (%rdi)
297; AVX-NEXT:    vzeroupper
298; AVX-NEXT:    retq
299;
300; AVX512-LABEL: aligned_memset_64:
301; AVX512:       # %bb.0:
302; AVX512-NEXT:    movzbl %sil, %eax
303; AVX512-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
304; AVX512-NEXT:    vpbroadcastd %eax, %zmm0
305; AVX512-NEXT:    vmovdqa64 %zmm0, (%rdi)
306; AVX512-NEXT:    vzeroupper
307; AVX512-NEXT:    retq
308  tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0)
309  ret void
310}
311
312; /////////////////////////////////////////////////////////////////////////////
313
314define void @bzero_1(ptr %a) nounwind {
315; GPR-LABEL: bzero_1:
316; GPR:       # %bb.0:
317; GPR-NEXT:    movb $0, (%rdi)
318; GPR-NEXT:    retq
319  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 1, i1 0)
320  ret void
321}
322
323define void @bzero_2(ptr %a) nounwind {
324; GPR-LABEL: bzero_2:
325; GPR:       # %bb.0:
326; GPR-NEXT:    movw $0, (%rdi)
327; GPR-NEXT:    retq
328  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 2, i1 0)
329  ret void
330}
331
332define void @bzero_4(ptr %a) nounwind {
333; GPR-LABEL: bzero_4:
334; GPR:       # %bb.0:
335; GPR-NEXT:    movl $0, (%rdi)
336; GPR-NEXT:    retq
337  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 4, i1 0)
338  ret void
339}
340
341define void @bzero_8(ptr %a) nounwind {
342; GPR-LABEL: bzero_8:
343; GPR:       # %bb.0:
344; GPR-NEXT:    movq $0, (%rdi)
345; GPR-NEXT:    retq
346  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 8, i1 0)
347  ret void
348}
349
350define void @bzero_16(ptr %a) nounwind {
351; SSE2-LABEL: bzero_16:
352; SSE2:       # %bb.0:
353; SSE2-NEXT:    movq $0, 8(%rdi)
354; SSE2-NEXT:    movq $0, (%rdi)
355; SSE2-NEXT:    retq
356;
357; SSE4-LABEL: bzero_16:
358; SSE4:       # %bb.0:
359; SSE4-NEXT:    xorps %xmm0, %xmm0
360; SSE4-NEXT:    movups %xmm0, (%rdi)
361; SSE4-NEXT:    retq
362;
363; AVX-LABEL: bzero_16:
364; AVX:       # %bb.0:
365; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
366; AVX-NEXT:    vmovups %xmm0, (%rdi)
367; AVX-NEXT:    retq
368;
369; AVX512-LABEL: bzero_16:
370; AVX512:       # %bb.0:
371; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
372; AVX512-NEXT:    vmovups %xmm0, (%rdi)
373; AVX512-NEXT:    retq
374  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 16, i1 0)
375  ret void
376}
377
378define void @bzero_32(ptr %a) nounwind {
379; SSE2-LABEL: bzero_32:
380; SSE2:       # %bb.0:
381; SSE2-NEXT:    movq $0, 24(%rdi)
382; SSE2-NEXT:    movq $0, 16(%rdi)
383; SSE2-NEXT:    movq $0, 8(%rdi)
384; SSE2-NEXT:    movq $0, (%rdi)
385; SSE2-NEXT:    retq
386;
387; SSE4-LABEL: bzero_32:
388; SSE4:       # %bb.0:
389; SSE4-NEXT:    xorps %xmm0, %xmm0
390; SSE4-NEXT:    movups %xmm0, 16(%rdi)
391; SSE4-NEXT:    movups %xmm0, (%rdi)
392; SSE4-NEXT:    retq
393;
394; AVX-LABEL: bzero_32:
395; AVX:       # %bb.0:
396; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
397; AVX-NEXT:    vmovups %ymm0, (%rdi)
398; AVX-NEXT:    vzeroupper
399; AVX-NEXT:    retq
400;
401; AVX512-LABEL: bzero_32:
402; AVX512:       # %bb.0:
403; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
404; AVX512-NEXT:    vmovups %ymm0, (%rdi)
405; AVX512-NEXT:    vzeroupper
406; AVX512-NEXT:    retq
407  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0)
408  ret void
409}
410
411define void @bzero_64(ptr %a) nounwind {
412; SSE2-LABEL: bzero_64:
413; SSE2:       # %bb.0:
414; SSE2-NEXT:    movq $0, 56(%rdi)
415; SSE2-NEXT:    movq $0, 48(%rdi)
416; SSE2-NEXT:    movq $0, 40(%rdi)
417; SSE2-NEXT:    movq $0, 32(%rdi)
418; SSE2-NEXT:    movq $0, 24(%rdi)
419; SSE2-NEXT:    movq $0, 16(%rdi)
420; SSE2-NEXT:    movq $0, 8(%rdi)
421; SSE2-NEXT:    movq $0, (%rdi)
422; SSE2-NEXT:    retq
423;
424; SSE4-LABEL: bzero_64:
425; SSE4:       # %bb.0:
426; SSE4-NEXT:    xorps %xmm0, %xmm0
427; SSE4-NEXT:    movups %xmm0, 48(%rdi)
428; SSE4-NEXT:    movups %xmm0, 32(%rdi)
429; SSE4-NEXT:    movups %xmm0, 16(%rdi)
430; SSE4-NEXT:    movups %xmm0, (%rdi)
431; SSE4-NEXT:    retq
432;
433; AVX-LABEL: bzero_64:
434; AVX:       # %bb.0:
435; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
436; AVX-NEXT:    vmovups %ymm0, 32(%rdi)
437; AVX-NEXT:    vmovups %ymm0, (%rdi)
438; AVX-NEXT:    vzeroupper
439; AVX-NEXT:    retq
440;
441; AVX512-LABEL: bzero_64:
442; AVX512:       # %bb.0:
443; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
444; AVX512-NEXT:    vmovups %zmm0, (%rdi)
445; AVX512-NEXT:    vzeroupper
446; AVX512-NEXT:    retq
447  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0)
448  ret void
449}
450
451; /////////////////////////////////////////////////////////////////////////////
452
453define void @aligned_bzero_16(ptr %a) nounwind {
454; SSE2-LABEL: aligned_bzero_16:
455; SSE2:       # %bb.0:
456; SSE2-NEXT:    xorps %xmm0, %xmm0
457; SSE2-NEXT:    movaps %xmm0, (%rdi)
458; SSE2-NEXT:    retq
459;
460; SSE4-LABEL: aligned_bzero_16:
461; SSE4:       # %bb.0:
462; SSE4-NEXT:    xorps %xmm0, %xmm0
463; SSE4-NEXT:    movaps %xmm0, (%rdi)
464; SSE4-NEXT:    retq
465;
466; AVX-LABEL: aligned_bzero_16:
467; AVX:       # %bb.0:
468; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
469; AVX-NEXT:    vmovaps %xmm0, (%rdi)
470; AVX-NEXT:    retq
471;
472; AVX512-LABEL: aligned_bzero_16:
473; AVX512:       # %bb.0:
474; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
475; AVX512-NEXT:    vmovaps %xmm0, (%rdi)
476; AVX512-NEXT:    retq
477  tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 0, i64 16, i1 0)
478  ret void
479}
480
481define void @aligned_bzero_32(ptr %a) nounwind {
482; SSE2-LABEL: aligned_bzero_32:
483; SSE2:       # %bb.0:
484; SSE2-NEXT:    xorps %xmm0, %xmm0
485; SSE2-NEXT:    movaps %xmm0, 16(%rdi)
486; SSE2-NEXT:    movaps %xmm0, (%rdi)
487; SSE2-NEXT:    retq
488;
489; SSE4-LABEL: aligned_bzero_32:
490; SSE4:       # %bb.0:
491; SSE4-NEXT:    xorps %xmm0, %xmm0
492; SSE4-NEXT:    movaps %xmm0, 16(%rdi)
493; SSE4-NEXT:    movaps %xmm0, (%rdi)
494; SSE4-NEXT:    retq
495;
496; AVX-LABEL: aligned_bzero_32:
497; AVX:       # %bb.0:
498; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
499; AVX-NEXT:    vmovaps %ymm0, (%rdi)
500; AVX-NEXT:    vzeroupper
501; AVX-NEXT:    retq
502;
503; AVX512-LABEL: aligned_bzero_32:
504; AVX512:       # %bb.0:
505; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
506; AVX512-NEXT:    vmovaps %ymm0, (%rdi)
507; AVX512-NEXT:    vzeroupper
508; AVX512-NEXT:    retq
509  tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0)
510  ret void
511}
512
513define void @aligned_bzero_64(ptr %a) nounwind {
514; SSE2-LABEL: aligned_bzero_64:
515; SSE2:       # %bb.0:
516; SSE2-NEXT:    xorps %xmm0, %xmm0
517; SSE2-NEXT:    movaps %xmm0, 48(%rdi)
518; SSE2-NEXT:    movaps %xmm0, 32(%rdi)
519; SSE2-NEXT:    movaps %xmm0, 16(%rdi)
520; SSE2-NEXT:    movaps %xmm0, (%rdi)
521; SSE2-NEXT:    retq
522;
523; SSE4-LABEL: aligned_bzero_64:
524; SSE4:       # %bb.0:
525; SSE4-NEXT:    xorps %xmm0, %xmm0
526; SSE4-NEXT:    movaps %xmm0, 48(%rdi)
527; SSE4-NEXT:    movaps %xmm0, 32(%rdi)
528; SSE4-NEXT:    movaps %xmm0, 16(%rdi)
529; SSE4-NEXT:    movaps %xmm0, (%rdi)
530; SSE4-NEXT:    retq
531;
532; AVX-LABEL: aligned_bzero_64:
533; AVX:       # %bb.0:
534; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
535; AVX-NEXT:    vmovaps %ymm0, 32(%rdi)
536; AVX-NEXT:    vmovaps %ymm0, (%rdi)
537; AVX-NEXT:    vzeroupper
538; AVX-NEXT:    retq
539;
540; AVX512-LABEL: aligned_bzero_64:
541; AVX512:       # %bb.0:
542; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
543; AVX512-NEXT:    vmovaps %zmm0, (%rdi)
544; AVX512-NEXT:    vzeroupper
545; AVX512-NEXT:    retq
546  tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0)
547  ret void
548}
549