1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2,-sse4.2 | FileCheck %s --check-prefixes=GPR,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.2,-avx | FileCheck %s --check-prefixes=GPR,SSE4 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx,-avx512f | FileCheck %s --check-prefixes=GPR,AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s --check-prefixes=GPR,AVX512 6 7declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind 8declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind 9 10; ///////////////////////////////////////////////////////////////////////////// 11 12define void @memset_1(ptr %a, i8 %value) nounwind { 13; GPR-LABEL: memset_1: 14; GPR: # %bb.0: 15; GPR-NEXT: movb %sil, (%rdi) 16; GPR-NEXT: retq 17 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1, i1 0) 18 ret void 19} 20 21define void @memset_2(ptr %a, i8 %value) nounwind { 22; GPR-LABEL: memset_2: 23; GPR: # %bb.0: 24; GPR-NEXT: movzbl %sil, %eax 25; GPR-NEXT: shll $8, %esi 26; GPR-NEXT: orl %esi, %eax 27; GPR-NEXT: movw %ax, (%rdi) 28; GPR-NEXT: retq 29 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 2, i1 0) 30 ret void 31} 32 33define void @memset_4(ptr %a, i8 %value) nounwind { 34; GPR-LABEL: memset_4: 35; GPR: # %bb.0: 36; GPR-NEXT: movzbl %sil, %eax 37; GPR-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 38; GPR-NEXT: movl %eax, (%rdi) 39; GPR-NEXT: retq 40 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 4, i1 0) 41 ret void 42} 43 44define void @memset_8(ptr %a, i8 %value) nounwind { 45; GPR-LABEL: memset_8: 46; GPR: # %bb.0: 47; GPR-NEXT: # kill: def $esi killed $esi def $rsi 48; GPR-NEXT: movzbl %sil, %eax 49; GPR-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 50; GPR-NEXT: imulq %rax, %rcx 51; GPR-NEXT: movq %rcx, (%rdi) 52; GPR-NEXT: retq 53 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 8, i1 0) 54 ret void 55} 56 57define void @memset_16(ptr %a, i8 %value) nounwind { 58; SSE2-LABEL: memset_16: 59; SSE2: # %bb.0: 60; SSE2-NEXT: # kill: def $esi killed $esi def $rsi 61; SSE2-NEXT: movzbl %sil, %eax 62; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 63; SSE2-NEXT: imulq %rax, %rcx 64; SSE2-NEXT: movq %rcx, 8(%rdi) 65; SSE2-NEXT: movq %rcx, (%rdi) 66; SSE2-NEXT: retq 67; 68; SSE4-LABEL: memset_16: 69; SSE4: # %bb.0: 70; SSE4-NEXT: movd %esi, %xmm0 71; SSE4-NEXT: pxor %xmm1, %xmm1 72; SSE4-NEXT: pshufb %xmm1, %xmm0 73; SSE4-NEXT: movdqu %xmm0, (%rdi) 74; SSE4-NEXT: retq 75; 76; AVX-LABEL: memset_16: 77; AVX: # %bb.0: 78; AVX-NEXT: vmovd %esi, %xmm0 79; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 80; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 81; AVX-NEXT: vmovdqu %xmm0, (%rdi) 82; AVX-NEXT: retq 83; 84; AVX512-LABEL: memset_16: 85; AVX512: # %bb.0: 86; AVX512-NEXT: vmovd %esi, %xmm0 87; AVX512-NEXT: vpbroadcastb %xmm0, %xmm0 88; AVX512-NEXT: vmovdqu %xmm0, (%rdi) 89; AVX512-NEXT: retq 90 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0) 91 ret void 92} 93 94define void @memset_32(ptr %a, i8 %value) nounwind { 95; SSE2-LABEL: memset_32: 96; SSE2: # %bb.0: 97; SSE2-NEXT: # kill: def $esi killed $esi def $rsi 98; SSE2-NEXT: movzbl %sil, %eax 99; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 100; SSE2-NEXT: imulq %rax, %rcx 101; SSE2-NEXT: movq %rcx, 24(%rdi) 102; SSE2-NEXT: movq %rcx, 16(%rdi) 103; SSE2-NEXT: movq %rcx, 8(%rdi) 104; SSE2-NEXT: movq %rcx, (%rdi) 105; SSE2-NEXT: retq 106; 107; SSE4-LABEL: memset_32: 108; SSE4: # %bb.0: 109; SSE4-NEXT: movd %esi, %xmm0 110; SSE4-NEXT: pxor %xmm1, %xmm1 111; SSE4-NEXT: pshufb %xmm1, %xmm0 112; SSE4-NEXT: movdqu %xmm0, 16(%rdi) 113; SSE4-NEXT: movdqu %xmm0, (%rdi) 114; SSE4-NEXT: retq 115; 116; AVX-LABEL: memset_32: 117; AVX: # %bb.0: 118; AVX-NEXT: vmovd %esi, %xmm0 119; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 120; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 121; AVX-NEXT: vmovdqu %xmm0, 16(%rdi) 122; AVX-NEXT: vmovdqu %xmm0, (%rdi) 123; AVX-NEXT: retq 124; 125; AVX512-LABEL: memset_32: 126; AVX512: # %bb.0: 127; AVX512-NEXT: vmovd %esi, %xmm0 128; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0 129; AVX512-NEXT: vmovdqu %ymm0, (%rdi) 130; AVX512-NEXT: vzeroupper 131; AVX512-NEXT: retq 132 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0) 133 ret void 134} 135 136define void @memset_64(ptr %a, i8 %value) nounwind { 137; SSE2-LABEL: memset_64: 138; SSE2: # %bb.0: 139; SSE2-NEXT: # kill: def $esi killed $esi def $rsi 140; SSE2-NEXT: movzbl %sil, %eax 141; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 142; SSE2-NEXT: imulq %rax, %rcx 143; SSE2-NEXT: movq %rcx, 56(%rdi) 144; SSE2-NEXT: movq %rcx, 48(%rdi) 145; SSE2-NEXT: movq %rcx, 40(%rdi) 146; SSE2-NEXT: movq %rcx, 32(%rdi) 147; SSE2-NEXT: movq %rcx, 24(%rdi) 148; SSE2-NEXT: movq %rcx, 16(%rdi) 149; SSE2-NEXT: movq %rcx, 8(%rdi) 150; SSE2-NEXT: movq %rcx, (%rdi) 151; SSE2-NEXT: retq 152; 153; SSE4-LABEL: memset_64: 154; SSE4: # %bb.0: 155; SSE4-NEXT: movd %esi, %xmm0 156; SSE4-NEXT: pxor %xmm1, %xmm1 157; SSE4-NEXT: pshufb %xmm1, %xmm0 158; SSE4-NEXT: movdqu %xmm0, 48(%rdi) 159; SSE4-NEXT: movdqu %xmm0, 32(%rdi) 160; SSE4-NEXT: movdqu %xmm0, 16(%rdi) 161; SSE4-NEXT: movdqu %xmm0, (%rdi) 162; SSE4-NEXT: retq 163; 164; AVX-LABEL: memset_64: 165; AVX: # %bb.0: 166; AVX-NEXT: vmovd %esi, %xmm0 167; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 168; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 169; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 170; AVX-NEXT: vmovups %ymm0, 32(%rdi) 171; AVX-NEXT: vmovups %ymm0, (%rdi) 172; AVX-NEXT: vzeroupper 173; AVX-NEXT: retq 174; 175; AVX512-LABEL: memset_64: 176; AVX512: # %bb.0: 177; AVX512-NEXT: movzbl %sil, %eax 178; AVX512-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 179; AVX512-NEXT: vpbroadcastd %eax, %zmm0 180; AVX512-NEXT: vmovdqu64 %zmm0, (%rdi) 181; AVX512-NEXT: vzeroupper 182; AVX512-NEXT: retq 183 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0) 184 ret void 185} 186 187; ///////////////////////////////////////////////////////////////////////////// 188 189define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind { 190; SSE2-LABEL: aligned_memset_16: 191; SSE2: # %bb.0: 192; SSE2-NEXT: movd %esi, %xmm0 193; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 194; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 195; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 196; SSE2-NEXT: movdqa %xmm0, (%rdi) 197; SSE2-NEXT: retq 198; 199; SSE4-LABEL: aligned_memset_16: 200; SSE4: # %bb.0: 201; SSE4-NEXT: movd %esi, %xmm0 202; SSE4-NEXT: pxor %xmm1, %xmm1 203; SSE4-NEXT: pshufb %xmm1, %xmm0 204; SSE4-NEXT: movdqa %xmm0, (%rdi) 205; SSE4-NEXT: retq 206; 207; AVX-LABEL: aligned_memset_16: 208; AVX: # %bb.0: 209; AVX-NEXT: vmovd %esi, %xmm0 210; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 211; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 212; AVX-NEXT: vmovdqa %xmm0, (%rdi) 213; AVX-NEXT: retq 214; 215; AVX512-LABEL: aligned_memset_16: 216; AVX512: # %bb.0: 217; AVX512-NEXT: vmovd %esi, %xmm0 218; AVX512-NEXT: vpbroadcastb %xmm0, %xmm0 219; AVX512-NEXT: vmovdqa %xmm0, (%rdi) 220; AVX512-NEXT: retq 221 tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0) 222 ret void 223} 224 225define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { 226; SSE2-LABEL: aligned_memset_32: 227; SSE2: # %bb.0: 228; SSE2-NEXT: movd %esi, %xmm0 229; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 230; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 231; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 232; SSE2-NEXT: movdqa %xmm0, 16(%rdi) 233; SSE2-NEXT: movdqa %xmm0, (%rdi) 234; SSE2-NEXT: retq 235; 236; SSE4-LABEL: aligned_memset_32: 237; SSE4: # %bb.0: 238; SSE4-NEXT: movd %esi, %xmm0 239; SSE4-NEXT: pxor %xmm1, %xmm1 240; SSE4-NEXT: pshufb %xmm1, %xmm0 241; SSE4-NEXT: movdqa %xmm0, 16(%rdi) 242; SSE4-NEXT: movdqa %xmm0, (%rdi) 243; SSE4-NEXT: retq 244; 245; AVX-LABEL: aligned_memset_32: 246; AVX: # %bb.0: 247; AVX-NEXT: vmovd %esi, %xmm0 248; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 249; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 250; AVX-NEXT: vmovdqa %xmm0, 16(%rdi) 251; AVX-NEXT: vmovdqa %xmm0, (%rdi) 252; AVX-NEXT: retq 253; 254; AVX512-LABEL: aligned_memset_32: 255; AVX512: # %bb.0: 256; AVX512-NEXT: vmovd %esi, %xmm0 257; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0 258; AVX512-NEXT: vmovdqa %ymm0, (%rdi) 259; AVX512-NEXT: vzeroupper 260; AVX512-NEXT: retq 261 tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0) 262 ret void 263} 264 265define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { 266; SSE2-LABEL: aligned_memset_64: 267; SSE2: # %bb.0: 268; SSE2-NEXT: movd %esi, %xmm0 269; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 270; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 271; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 272; SSE2-NEXT: movdqa %xmm0, 48(%rdi) 273; SSE2-NEXT: movdqa %xmm0, 32(%rdi) 274; SSE2-NEXT: movdqa %xmm0, 16(%rdi) 275; SSE2-NEXT: movdqa %xmm0, (%rdi) 276; SSE2-NEXT: retq 277; 278; SSE4-LABEL: aligned_memset_64: 279; SSE4: # %bb.0: 280; SSE4-NEXT: movd %esi, %xmm0 281; SSE4-NEXT: pxor %xmm1, %xmm1 282; SSE4-NEXT: pshufb %xmm1, %xmm0 283; SSE4-NEXT: movdqa %xmm0, 48(%rdi) 284; SSE4-NEXT: movdqa %xmm0, 32(%rdi) 285; SSE4-NEXT: movdqa %xmm0, 16(%rdi) 286; SSE4-NEXT: movdqa %xmm0, (%rdi) 287; SSE4-NEXT: retq 288; 289; AVX-LABEL: aligned_memset_64: 290; AVX: # %bb.0: 291; AVX-NEXT: vmovd %esi, %xmm0 292; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 293; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 294; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 295; AVX-NEXT: vmovaps %ymm0, 32(%rdi) 296; AVX-NEXT: vmovaps %ymm0, (%rdi) 297; AVX-NEXT: vzeroupper 298; AVX-NEXT: retq 299; 300; AVX512-LABEL: aligned_memset_64: 301; AVX512: # %bb.0: 302; AVX512-NEXT: movzbl %sil, %eax 303; AVX512-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 304; AVX512-NEXT: vpbroadcastd %eax, %zmm0 305; AVX512-NEXT: vmovdqa64 %zmm0, (%rdi) 306; AVX512-NEXT: vzeroupper 307; AVX512-NEXT: retq 308 tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0) 309 ret void 310} 311 312; ///////////////////////////////////////////////////////////////////////////// 313 314define void @bzero_1(ptr %a) nounwind { 315; GPR-LABEL: bzero_1: 316; GPR: # %bb.0: 317; GPR-NEXT: movb $0, (%rdi) 318; GPR-NEXT: retq 319 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 1, i1 0) 320 ret void 321} 322 323define void @bzero_2(ptr %a) nounwind { 324; GPR-LABEL: bzero_2: 325; GPR: # %bb.0: 326; GPR-NEXT: movw $0, (%rdi) 327; GPR-NEXT: retq 328 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 2, i1 0) 329 ret void 330} 331 332define void @bzero_4(ptr %a) nounwind { 333; GPR-LABEL: bzero_4: 334; GPR: # %bb.0: 335; GPR-NEXT: movl $0, (%rdi) 336; GPR-NEXT: retq 337 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 4, i1 0) 338 ret void 339} 340 341define void @bzero_8(ptr %a) nounwind { 342; GPR-LABEL: bzero_8: 343; GPR: # %bb.0: 344; GPR-NEXT: movq $0, (%rdi) 345; GPR-NEXT: retq 346 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 8, i1 0) 347 ret void 348} 349 350define void @bzero_16(ptr %a) nounwind { 351; SSE2-LABEL: bzero_16: 352; SSE2: # %bb.0: 353; SSE2-NEXT: movq $0, 8(%rdi) 354; SSE2-NEXT: movq $0, (%rdi) 355; SSE2-NEXT: retq 356; 357; SSE4-LABEL: bzero_16: 358; SSE4: # %bb.0: 359; SSE4-NEXT: xorps %xmm0, %xmm0 360; SSE4-NEXT: movups %xmm0, (%rdi) 361; SSE4-NEXT: retq 362; 363; AVX-LABEL: bzero_16: 364; AVX: # %bb.0: 365; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 366; AVX-NEXT: vmovups %xmm0, (%rdi) 367; AVX-NEXT: retq 368; 369; AVX512-LABEL: bzero_16: 370; AVX512: # %bb.0: 371; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 372; AVX512-NEXT: vmovups %xmm0, (%rdi) 373; AVX512-NEXT: retq 374 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 16, i1 0) 375 ret void 376} 377 378define void @bzero_32(ptr %a) nounwind { 379; SSE2-LABEL: bzero_32: 380; SSE2: # %bb.0: 381; SSE2-NEXT: movq $0, 24(%rdi) 382; SSE2-NEXT: movq $0, 16(%rdi) 383; SSE2-NEXT: movq $0, 8(%rdi) 384; SSE2-NEXT: movq $0, (%rdi) 385; SSE2-NEXT: retq 386; 387; SSE4-LABEL: bzero_32: 388; SSE4: # %bb.0: 389; SSE4-NEXT: xorps %xmm0, %xmm0 390; SSE4-NEXT: movups %xmm0, 16(%rdi) 391; SSE4-NEXT: movups %xmm0, (%rdi) 392; SSE4-NEXT: retq 393; 394; AVX-LABEL: bzero_32: 395; AVX: # %bb.0: 396; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 397; AVX-NEXT: vmovups %ymm0, (%rdi) 398; AVX-NEXT: vzeroupper 399; AVX-NEXT: retq 400; 401; AVX512-LABEL: bzero_32: 402; AVX512: # %bb.0: 403; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 404; AVX512-NEXT: vmovups %ymm0, (%rdi) 405; AVX512-NEXT: vzeroupper 406; AVX512-NEXT: retq 407 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0) 408 ret void 409} 410 411define void @bzero_64(ptr %a) nounwind { 412; SSE2-LABEL: bzero_64: 413; SSE2: # %bb.0: 414; SSE2-NEXT: movq $0, 56(%rdi) 415; SSE2-NEXT: movq $0, 48(%rdi) 416; SSE2-NEXT: movq $0, 40(%rdi) 417; SSE2-NEXT: movq $0, 32(%rdi) 418; SSE2-NEXT: movq $0, 24(%rdi) 419; SSE2-NEXT: movq $0, 16(%rdi) 420; SSE2-NEXT: movq $0, 8(%rdi) 421; SSE2-NEXT: movq $0, (%rdi) 422; SSE2-NEXT: retq 423; 424; SSE4-LABEL: bzero_64: 425; SSE4: # %bb.0: 426; SSE4-NEXT: xorps %xmm0, %xmm0 427; SSE4-NEXT: movups %xmm0, 48(%rdi) 428; SSE4-NEXT: movups %xmm0, 32(%rdi) 429; SSE4-NEXT: movups %xmm0, 16(%rdi) 430; SSE4-NEXT: movups %xmm0, (%rdi) 431; SSE4-NEXT: retq 432; 433; AVX-LABEL: bzero_64: 434; AVX: # %bb.0: 435; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 436; AVX-NEXT: vmovups %ymm0, 32(%rdi) 437; AVX-NEXT: vmovups %ymm0, (%rdi) 438; AVX-NEXT: vzeroupper 439; AVX-NEXT: retq 440; 441; AVX512-LABEL: bzero_64: 442; AVX512: # %bb.0: 443; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 444; AVX512-NEXT: vmovups %zmm0, (%rdi) 445; AVX512-NEXT: vzeroupper 446; AVX512-NEXT: retq 447 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0) 448 ret void 449} 450 451; ///////////////////////////////////////////////////////////////////////////// 452 453define void @aligned_bzero_16(ptr %a) nounwind { 454; SSE2-LABEL: aligned_bzero_16: 455; SSE2: # %bb.0: 456; SSE2-NEXT: xorps %xmm0, %xmm0 457; SSE2-NEXT: movaps %xmm0, (%rdi) 458; SSE2-NEXT: retq 459; 460; SSE4-LABEL: aligned_bzero_16: 461; SSE4: # %bb.0: 462; SSE4-NEXT: xorps %xmm0, %xmm0 463; SSE4-NEXT: movaps %xmm0, (%rdi) 464; SSE4-NEXT: retq 465; 466; AVX-LABEL: aligned_bzero_16: 467; AVX: # %bb.0: 468; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 469; AVX-NEXT: vmovaps %xmm0, (%rdi) 470; AVX-NEXT: retq 471; 472; AVX512-LABEL: aligned_bzero_16: 473; AVX512: # %bb.0: 474; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 475; AVX512-NEXT: vmovaps %xmm0, (%rdi) 476; AVX512-NEXT: retq 477 tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 0, i64 16, i1 0) 478 ret void 479} 480 481define void @aligned_bzero_32(ptr %a) nounwind { 482; SSE2-LABEL: aligned_bzero_32: 483; SSE2: # %bb.0: 484; SSE2-NEXT: xorps %xmm0, %xmm0 485; SSE2-NEXT: movaps %xmm0, 16(%rdi) 486; SSE2-NEXT: movaps %xmm0, (%rdi) 487; SSE2-NEXT: retq 488; 489; SSE4-LABEL: aligned_bzero_32: 490; SSE4: # %bb.0: 491; SSE4-NEXT: xorps %xmm0, %xmm0 492; SSE4-NEXT: movaps %xmm0, 16(%rdi) 493; SSE4-NEXT: movaps %xmm0, (%rdi) 494; SSE4-NEXT: retq 495; 496; AVX-LABEL: aligned_bzero_32: 497; AVX: # %bb.0: 498; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 499; AVX-NEXT: vmovaps %ymm0, (%rdi) 500; AVX-NEXT: vzeroupper 501; AVX-NEXT: retq 502; 503; AVX512-LABEL: aligned_bzero_32: 504; AVX512: # %bb.0: 505; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 506; AVX512-NEXT: vmovaps %ymm0, (%rdi) 507; AVX512-NEXT: vzeroupper 508; AVX512-NEXT: retq 509 tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0) 510 ret void 511} 512 513define void @aligned_bzero_64(ptr %a) nounwind { 514; SSE2-LABEL: aligned_bzero_64: 515; SSE2: # %bb.0: 516; SSE2-NEXT: xorps %xmm0, %xmm0 517; SSE2-NEXT: movaps %xmm0, 48(%rdi) 518; SSE2-NEXT: movaps %xmm0, 32(%rdi) 519; SSE2-NEXT: movaps %xmm0, 16(%rdi) 520; SSE2-NEXT: movaps %xmm0, (%rdi) 521; SSE2-NEXT: retq 522; 523; SSE4-LABEL: aligned_bzero_64: 524; SSE4: # %bb.0: 525; SSE4-NEXT: xorps %xmm0, %xmm0 526; SSE4-NEXT: movaps %xmm0, 48(%rdi) 527; SSE4-NEXT: movaps %xmm0, 32(%rdi) 528; SSE4-NEXT: movaps %xmm0, 16(%rdi) 529; SSE4-NEXT: movaps %xmm0, (%rdi) 530; SSE4-NEXT: retq 531; 532; AVX-LABEL: aligned_bzero_64: 533; AVX: # %bb.0: 534; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 535; AVX-NEXT: vmovaps %ymm0, 32(%rdi) 536; AVX-NEXT: vmovaps %ymm0, (%rdi) 537; AVX-NEXT: vzeroupper 538; AVX-NEXT: retq 539; 540; AVX512-LABEL: aligned_bzero_64: 541; AVX512: # %bb.0: 542; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 543; AVX512-NEXT: vmovaps %zmm0, (%rdi) 544; AVX512-NEXT: vzeroupper 545; AVX512-NEXT: retq 546 tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0) 547 ret void 548} 549