1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE 3; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE 4; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST 5; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 8; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 9; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512dq -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 10; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 11; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 12 13; https://llvm.org/bugs/show_bug.cgi?id=27100 14 15define void @memset_16_nonzero_bytes(ptr %x) { 16; SSE-LABEL: memset_16_nonzero_bytes: 17; SSE: # %bb.0: 18; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 19; SSE-NEXT: movq %rax, 8(%rdi) 20; SSE-NEXT: movq %rax, (%rdi) 21; SSE-NEXT: retq 22; 23; SSE2FAST-LABEL: memset_16_nonzero_bytes: 24; SSE2FAST: # %bb.0: 25; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 26; SSE2FAST-NEXT: movups %xmm0, (%rdi) 27; SSE2FAST-NEXT: retq 28; 29; AVX-LABEL: memset_16_nonzero_bytes: 30; AVX: # %bb.0: 31; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 32; AVX-NEXT: vmovups %xmm0, (%rdi) 33; AVX-NEXT: retq 34 %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 16, i64 -1) 35 ret void 36} 37 38define void @memset_32_nonzero_bytes(ptr %x) { 39; SSE-LABEL: memset_32_nonzero_bytes: 40; SSE: # %bb.0: 41; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 42; SSE-NEXT: movq %rax, 24(%rdi) 43; SSE-NEXT: movq %rax, 16(%rdi) 44; SSE-NEXT: movq %rax, 8(%rdi) 45; SSE-NEXT: movq %rax, (%rdi) 46; SSE-NEXT: retq 47; 48; SSE2FAST-LABEL: memset_32_nonzero_bytes: 49; SSE2FAST: # %bb.0: 50; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 51; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 52; SSE2FAST-NEXT: movups %xmm0, (%rdi) 53; SSE2FAST-NEXT: retq 54; 55; AVX-LABEL: memset_32_nonzero_bytes: 56; AVX: # %bb.0: 57; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 58; AVX-NEXT: vmovups %ymm0, (%rdi) 59; AVX-NEXT: vzeroupper 60; AVX-NEXT: retq 61 %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 32, i64 -1) 62 ret void 63} 64 65define void @memset_64_nonzero_bytes(ptr %x) { 66; SSE-LABEL: memset_64_nonzero_bytes: 67; SSE: # %bb.0: 68; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 69; SSE-NEXT: movq %rax, 56(%rdi) 70; SSE-NEXT: movq %rax, 48(%rdi) 71; SSE-NEXT: movq %rax, 40(%rdi) 72; SSE-NEXT: movq %rax, 32(%rdi) 73; SSE-NEXT: movq %rax, 24(%rdi) 74; SSE-NEXT: movq %rax, 16(%rdi) 75; SSE-NEXT: movq %rax, 8(%rdi) 76; SSE-NEXT: movq %rax, (%rdi) 77; SSE-NEXT: retq 78; 79; SSE2FAST-LABEL: memset_64_nonzero_bytes: 80; SSE2FAST: # %bb.0: 81; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 82; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 83; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 84; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 85; SSE2FAST-NEXT: movups %xmm0, (%rdi) 86; SSE2FAST-NEXT: retq 87; 88; AVX1-LABEL: memset_64_nonzero_bytes: 89; AVX1: # %bb.0: 90; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 91; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 92; AVX1-NEXT: vmovups %ymm0, (%rdi) 93; AVX1-NEXT: vzeroupper 94; AVX1-NEXT: retq 95; 96; AVX2-LABEL: memset_64_nonzero_bytes: 97; AVX2: # %bb.0: 98; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 99; AVX2-NEXT: vmovups %ymm0, 32(%rdi) 100; AVX2-NEXT: vmovups %ymm0, (%rdi) 101; AVX2-NEXT: vzeroupper 102; AVX2-NEXT: retq 103; 104; AVX512F-LABEL: memset_64_nonzero_bytes: 105; AVX512F: # %bb.0: 106; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378] 107; AVX512F-NEXT: vmovups %zmm0, (%rdi) 108; AVX512F-NEXT: vzeroupper 109; AVX512F-NEXT: retq 110; 111; AVX512BW-LABEL: memset_64_nonzero_bytes: 112; AVX512BW: # %bb.0: 113; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 114; AVX512BW-NEXT: vmovups %zmm0, (%rdi) 115; AVX512BW-NEXT: vzeroupper 116; AVX512BW-NEXT: retq 117; AVX512NW-NEXT: retq 118 %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 64, i64 -1) 119 ret void 120} 121 122define void @memset_128_nonzero_bytes(ptr %x) { 123; SSE-LABEL: memset_128_nonzero_bytes: 124; SSE: # %bb.0: 125; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 126; SSE-NEXT: movq %rax, 120(%rdi) 127; SSE-NEXT: movq %rax, 112(%rdi) 128; SSE-NEXT: movq %rax, 104(%rdi) 129; SSE-NEXT: movq %rax, 96(%rdi) 130; SSE-NEXT: movq %rax, 88(%rdi) 131; SSE-NEXT: movq %rax, 80(%rdi) 132; SSE-NEXT: movq %rax, 72(%rdi) 133; SSE-NEXT: movq %rax, 64(%rdi) 134; SSE-NEXT: movq %rax, 56(%rdi) 135; SSE-NEXT: movq %rax, 48(%rdi) 136; SSE-NEXT: movq %rax, 40(%rdi) 137; SSE-NEXT: movq %rax, 32(%rdi) 138; SSE-NEXT: movq %rax, 24(%rdi) 139; SSE-NEXT: movq %rax, 16(%rdi) 140; SSE-NEXT: movq %rax, 8(%rdi) 141; SSE-NEXT: movq %rax, (%rdi) 142; SSE-NEXT: retq 143; 144; SSE2FAST-LABEL: memset_128_nonzero_bytes: 145; SSE2FAST: # %bb.0: 146; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 147; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) 148; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) 149; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) 150; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) 151; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 152; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 153; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 154; SSE2FAST-NEXT: movups %xmm0, (%rdi) 155; SSE2FAST-NEXT: retq 156; 157; AVX1-LABEL: memset_128_nonzero_bytes: 158; AVX1: # %bb.0: 159; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 160; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 161; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 162; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 163; AVX1-NEXT: vmovups %ymm0, (%rdi) 164; AVX1-NEXT: vzeroupper 165; AVX1-NEXT: retq 166; 167; AVX2-LABEL: memset_128_nonzero_bytes: 168; AVX2: # %bb.0: 169; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 170; AVX2-NEXT: vmovups %ymm0, 96(%rdi) 171; AVX2-NEXT: vmovups %ymm0, 64(%rdi) 172; AVX2-NEXT: vmovups %ymm0, 32(%rdi) 173; AVX2-NEXT: vmovups %ymm0, (%rdi) 174; AVX2-NEXT: vzeroupper 175; AVX2-NEXT: retq 176; 177; AVX512F-LABEL: memset_128_nonzero_bytes: 178; AVX512F: # %bb.0: 179; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378] 180; AVX512F-NEXT: vmovups %zmm0, 64(%rdi) 181; AVX512F-NEXT: vmovups %zmm0, (%rdi) 182; AVX512F-NEXT: vzeroupper 183; AVX512F-NEXT: retq 184; 185; AVX512BW-LABEL: memset_128_nonzero_bytes: 186; AVX512BW: # %bb.0: 187; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 188; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi) 189; AVX512BW-NEXT: vmovups %zmm0, (%rdi) 190; AVX512BW-NEXT: vzeroupper 191; AVX512BW-NEXT: retq 192 %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 128, i64 -1) 193 ret void 194} 195 196define void @memset_256_nonzero_bytes(ptr %x) { 197; SSE-LABEL: memset_256_nonzero_bytes: 198; SSE: # %bb.0: 199; SSE-NEXT: movl $256, %edx # imm = 0x100 200; SSE-NEXT: movl $42, %esi 201; SSE-NEXT: jmp memset@PLT # TAILCALL 202; 203; SSE2FAST-LABEL: memset_256_nonzero_bytes: 204; SSE2FAST: # %bb.0: 205; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 206; SSE2FAST-NEXT: movups %xmm0, 240(%rdi) 207; SSE2FAST-NEXT: movups %xmm0, 224(%rdi) 208; SSE2FAST-NEXT: movups %xmm0, 208(%rdi) 209; SSE2FAST-NEXT: movups %xmm0, 192(%rdi) 210; SSE2FAST-NEXT: movups %xmm0, 176(%rdi) 211; SSE2FAST-NEXT: movups %xmm0, 160(%rdi) 212; SSE2FAST-NEXT: movups %xmm0, 144(%rdi) 213; SSE2FAST-NEXT: movups %xmm0, 128(%rdi) 214; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) 215; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) 216; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) 217; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) 218; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 219; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 220; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 221; SSE2FAST-NEXT: movups %xmm0, (%rdi) 222; SSE2FAST-NEXT: retq 223; 224; AVX1-LABEL: memset_256_nonzero_bytes: 225; AVX1: # %bb.0: 226; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 227; AVX1-NEXT: vmovups %ymm0, 224(%rdi) 228; AVX1-NEXT: vmovups %ymm0, 192(%rdi) 229; AVX1-NEXT: vmovups %ymm0, 160(%rdi) 230; AVX1-NEXT: vmovups %ymm0, 128(%rdi) 231; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 232; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 233; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 234; AVX1-NEXT: vmovups %ymm0, (%rdi) 235; AVX1-NEXT: vzeroupper 236; AVX1-NEXT: retq 237; 238; AVX2-LABEL: memset_256_nonzero_bytes: 239; AVX2: # %bb.0: 240; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 241; AVX2-NEXT: vmovups %ymm0, 224(%rdi) 242; AVX2-NEXT: vmovups %ymm0, 192(%rdi) 243; AVX2-NEXT: vmovups %ymm0, 160(%rdi) 244; AVX2-NEXT: vmovups %ymm0, 128(%rdi) 245; AVX2-NEXT: vmovups %ymm0, 96(%rdi) 246; AVX2-NEXT: vmovups %ymm0, 64(%rdi) 247; AVX2-NEXT: vmovups %ymm0, 32(%rdi) 248; AVX2-NEXT: vmovups %ymm0, (%rdi) 249; AVX2-NEXT: vzeroupper 250; AVX2-NEXT: retq 251; 252; AVX512F-LABEL: memset_256_nonzero_bytes: 253; AVX512F: # %bb.0: 254; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378] 255; AVX512F-NEXT: vmovups %zmm0, 192(%rdi) 256; AVX512F-NEXT: vmovups %zmm0, 128(%rdi) 257; AVX512F-NEXT: vmovups %zmm0, 64(%rdi) 258; AVX512F-NEXT: vmovups %zmm0, (%rdi) 259; AVX512F-NEXT: vzeroupper 260; AVX512F-NEXT: retq 261; 262; AVX512BW-LABEL: memset_256_nonzero_bytes: 263; AVX512BW: # %bb.0: 264; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 265; AVX512BW-NEXT: vmovups %zmm0, 192(%rdi) 266; AVX512BW-NEXT: vmovups %zmm0, 128(%rdi) 267; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi) 268; AVX512BW-NEXT: vmovups %zmm0, (%rdi) 269; AVX512BW-NEXT: vzeroupper 270; AVX512BW-NEXT: retq 271 %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 256, i64 -1) 272 ret void 273} 274 275declare ptr @__memset_chk(ptr, i32, i64, i64) 276 277; Repeat with a non-constant value for the stores. 278 279define void @memset_16_nonconst_bytes(ptr %x, i8 %c) { 280; SSE-LABEL: memset_16_nonconst_bytes: 281; SSE: # %bb.0: 282; SSE-NEXT: # kill: def $esi killed $esi def $rsi 283; SSE-NEXT: movzbl %sil, %eax 284; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 285; SSE-NEXT: imulq %rax, %rcx 286; SSE-NEXT: movq %rcx, 8(%rdi) 287; SSE-NEXT: movq %rcx, (%rdi) 288; SSE-NEXT: retq 289; 290; SSE2FAST-LABEL: memset_16_nonconst_bytes: 291; SSE2FAST: # %bb.0: 292; SSE2FAST-NEXT: movd %esi, %xmm0 293; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 294; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 295; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 296; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 297; SSE2FAST-NEXT: retq 298; 299; AVX1-LABEL: memset_16_nonconst_bytes: 300; AVX1: # %bb.0: 301; AVX1-NEXT: vmovd %esi, %xmm0 302; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 303; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 304; AVX1-NEXT: vmovdqu %xmm0, (%rdi) 305; AVX1-NEXT: retq 306; 307; AVX2-LABEL: memset_16_nonconst_bytes: 308; AVX2: # %bb.0: 309; AVX2-NEXT: vmovd %esi, %xmm0 310; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 311; AVX2-NEXT: vmovdqu %xmm0, (%rdi) 312; AVX2-NEXT: retq 313; 314; AVX512-LABEL: memset_16_nonconst_bytes: 315; AVX512: # %bb.0: 316; AVX512-NEXT: vmovd %esi, %xmm0 317; AVX512-NEXT: vpbroadcastb %xmm0, %xmm0 318; AVX512-NEXT: vmovdqu %xmm0, (%rdi) 319; AVX512-NEXT: retq 320 tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 16, i1 false) 321 ret void 322} 323 324define void @memset_32_nonconst_bytes(ptr %x, i8 %c) { 325; SSE-LABEL: memset_32_nonconst_bytes: 326; SSE: # %bb.0: 327; SSE-NEXT: # kill: def $esi killed $esi def $rsi 328; SSE-NEXT: movzbl %sil, %eax 329; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 330; SSE-NEXT: imulq %rax, %rcx 331; SSE-NEXT: movq %rcx, 24(%rdi) 332; SSE-NEXT: movq %rcx, 16(%rdi) 333; SSE-NEXT: movq %rcx, 8(%rdi) 334; SSE-NEXT: movq %rcx, (%rdi) 335; SSE-NEXT: retq 336; 337; SSE2FAST-LABEL: memset_32_nonconst_bytes: 338; SSE2FAST: # %bb.0: 339; SSE2FAST-NEXT: movd %esi, %xmm0 340; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 341; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 342; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 343; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 344; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 345; SSE2FAST-NEXT: retq 346; 347; AVX1-LABEL: memset_32_nonconst_bytes: 348; AVX1: # %bb.0: 349; AVX1-NEXT: vmovd %esi, %xmm0 350; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 351; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 352; AVX1-NEXT: vmovdqu %xmm0, 16(%rdi) 353; AVX1-NEXT: vmovdqu %xmm0, (%rdi) 354; AVX1-NEXT: retq 355; 356; AVX2-LABEL: memset_32_nonconst_bytes: 357; AVX2: # %bb.0: 358; AVX2-NEXT: vmovd %esi, %xmm0 359; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 360; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 361; AVX2-NEXT: vzeroupper 362; AVX2-NEXT: retq 363; 364; AVX512-LABEL: memset_32_nonconst_bytes: 365; AVX512: # %bb.0: 366; AVX512-NEXT: vmovd %esi, %xmm0 367; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0 368; AVX512-NEXT: vmovdqu %ymm0, (%rdi) 369; AVX512-NEXT: vzeroupper 370; AVX512-NEXT: retq 371 tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 32, i1 false) 372 ret void 373} 374 375define void @memset_64_nonconst_bytes(ptr %x, i8 %c) { 376; SSE-LABEL: memset_64_nonconst_bytes: 377; SSE: # %bb.0: 378; SSE-NEXT: # kill: def $esi killed $esi def $rsi 379; SSE-NEXT: movzbl %sil, %eax 380; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 381; SSE-NEXT: imulq %rax, %rcx 382; SSE-NEXT: movq %rcx, 56(%rdi) 383; SSE-NEXT: movq %rcx, 48(%rdi) 384; SSE-NEXT: movq %rcx, 40(%rdi) 385; SSE-NEXT: movq %rcx, 32(%rdi) 386; SSE-NEXT: movq %rcx, 24(%rdi) 387; SSE-NEXT: movq %rcx, 16(%rdi) 388; SSE-NEXT: movq %rcx, 8(%rdi) 389; SSE-NEXT: movq %rcx, (%rdi) 390; SSE-NEXT: retq 391; 392; SSE2FAST-LABEL: memset_64_nonconst_bytes: 393; SSE2FAST: # %bb.0: 394; SSE2FAST-NEXT: movd %esi, %xmm0 395; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 396; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 397; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 398; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 399; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 400; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 401; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 402; SSE2FAST-NEXT: retq 403; 404; AVX1-LABEL: memset_64_nonconst_bytes: 405; AVX1: # %bb.0: 406; AVX1-NEXT: vmovd %esi, %xmm0 407; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 408; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 409; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 410; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 411; AVX1-NEXT: vmovups %ymm0, (%rdi) 412; AVX1-NEXT: vzeroupper 413; AVX1-NEXT: retq 414; 415; AVX2-LABEL: memset_64_nonconst_bytes: 416; AVX2: # %bb.0: 417; AVX2-NEXT: vmovd %esi, %xmm0 418; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 419; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 420; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 421; AVX2-NEXT: vzeroupper 422; AVX2-NEXT: retq 423; 424; AVX512F-LABEL: memset_64_nonconst_bytes: 425; AVX512F: # %bb.0: 426; AVX512F-NEXT: movzbl %sil, %eax 427; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 428; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 429; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) 430; AVX512F-NEXT: vzeroupper 431; AVX512F-NEXT: retq 432; 433; AVX512BW-LABEL: memset_64_nonconst_bytes: 434; AVX512BW: # %bb.0: 435; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0 436; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi) 437; AVX512BW-NEXT: vzeroupper 438; AVX512BW-NEXT: retq 439 tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 64, i1 false) 440 ret void 441} 442 443define void @memset_128_nonconst_bytes(ptr %x, i8 %c) { 444; SSE-LABEL: memset_128_nonconst_bytes: 445; SSE: # %bb.0: 446; SSE-NEXT: # kill: def $esi killed $esi def $rsi 447; SSE-NEXT: movzbl %sil, %eax 448; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 449; SSE-NEXT: imulq %rax, %rcx 450; SSE-NEXT: movq %rcx, 120(%rdi) 451; SSE-NEXT: movq %rcx, 112(%rdi) 452; SSE-NEXT: movq %rcx, 104(%rdi) 453; SSE-NEXT: movq %rcx, 96(%rdi) 454; SSE-NEXT: movq %rcx, 88(%rdi) 455; SSE-NEXT: movq %rcx, 80(%rdi) 456; SSE-NEXT: movq %rcx, 72(%rdi) 457; SSE-NEXT: movq %rcx, 64(%rdi) 458; SSE-NEXT: movq %rcx, 56(%rdi) 459; SSE-NEXT: movq %rcx, 48(%rdi) 460; SSE-NEXT: movq %rcx, 40(%rdi) 461; SSE-NEXT: movq %rcx, 32(%rdi) 462; SSE-NEXT: movq %rcx, 24(%rdi) 463; SSE-NEXT: movq %rcx, 16(%rdi) 464; SSE-NEXT: movq %rcx, 8(%rdi) 465; SSE-NEXT: movq %rcx, (%rdi) 466; SSE-NEXT: retq 467; 468; SSE2FAST-LABEL: memset_128_nonconst_bytes: 469; SSE2FAST: # %bb.0: 470; SSE2FAST-NEXT: movd %esi, %xmm0 471; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 472; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 473; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 474; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) 475; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) 476; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) 477; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) 478; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 479; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 480; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 481; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 482; SSE2FAST-NEXT: retq 483; 484; AVX1-LABEL: memset_128_nonconst_bytes: 485; AVX1: # %bb.0: 486; AVX1-NEXT: vmovd %esi, %xmm0 487; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 488; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 489; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 490; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 491; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 492; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 493; AVX1-NEXT: vmovups %ymm0, (%rdi) 494; AVX1-NEXT: vzeroupper 495; AVX1-NEXT: retq 496; 497; AVX2-LABEL: memset_128_nonconst_bytes: 498; AVX2: # %bb.0: 499; AVX2-NEXT: vmovd %esi, %xmm0 500; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 501; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) 502; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) 503; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 504; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 505; AVX2-NEXT: vzeroupper 506; AVX2-NEXT: retq 507; 508; AVX512F-LABEL: memset_128_nonconst_bytes: 509; AVX512F: # %bb.0: 510; AVX512F-NEXT: movzbl %sil, %eax 511; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 512; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 513; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi) 514; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) 515; AVX512F-NEXT: vzeroupper 516; AVX512F-NEXT: retq 517; 518; AVX512BW-LABEL: memset_128_nonconst_bytes: 519; AVX512BW: # %bb.0: 520; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0 521; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi) 522; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi) 523; AVX512BW-NEXT: vzeroupper 524; AVX512BW-NEXT: retq 525 tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 128, i1 false) 526 ret void 527} 528 529define void @memset_256_nonconst_bytes(ptr %x, i8 %c) { 530; SSE-LABEL: memset_256_nonconst_bytes: 531; SSE: # %bb.0: 532; SSE-NEXT: movl $256, %edx # imm = 0x100 533; SSE-NEXT: jmp memset@PLT # TAILCALL 534; 535; SSE2FAST-LABEL: memset_256_nonconst_bytes: 536; SSE2FAST: # %bb.0: 537; SSE2FAST-NEXT: movd %esi, %xmm0 538; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 539; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 540; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 541; SSE2FAST-NEXT: movdqu %xmm0, 240(%rdi) 542; SSE2FAST-NEXT: movdqu %xmm0, 224(%rdi) 543; SSE2FAST-NEXT: movdqu %xmm0, 208(%rdi) 544; SSE2FAST-NEXT: movdqu %xmm0, 192(%rdi) 545; SSE2FAST-NEXT: movdqu %xmm0, 176(%rdi) 546; SSE2FAST-NEXT: movdqu %xmm0, 160(%rdi) 547; SSE2FAST-NEXT: movdqu %xmm0, 144(%rdi) 548; SSE2FAST-NEXT: movdqu %xmm0, 128(%rdi) 549; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) 550; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) 551; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) 552; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) 553; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 554; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 555; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 556; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 557; SSE2FAST-NEXT: retq 558; 559; AVX1-LABEL: memset_256_nonconst_bytes: 560; AVX1: # %bb.0: 561; AVX1-NEXT: vmovd %esi, %xmm0 562; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 563; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 564; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 565; AVX1-NEXT: vmovups %ymm0, 224(%rdi) 566; AVX1-NEXT: vmovups %ymm0, 192(%rdi) 567; AVX1-NEXT: vmovups %ymm0, 160(%rdi) 568; AVX1-NEXT: vmovups %ymm0, 128(%rdi) 569; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 570; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 571; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 572; AVX1-NEXT: vmovups %ymm0, (%rdi) 573; AVX1-NEXT: vzeroupper 574; AVX1-NEXT: retq 575; 576; AVX2-LABEL: memset_256_nonconst_bytes: 577; AVX2: # %bb.0: 578; AVX2-NEXT: vmovd %esi, %xmm0 579; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 580; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi) 581; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi) 582; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi) 583; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi) 584; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) 585; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) 586; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 587; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 588; AVX2-NEXT: vzeroupper 589; AVX2-NEXT: retq 590; 591; AVX512F-LABEL: memset_256_nonconst_bytes: 592; AVX512F: # %bb.0: 593; AVX512F-NEXT: movzbl %sil, %eax 594; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 595; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 596; AVX512F-NEXT: vmovdqu64 %zmm0, 192(%rdi) 597; AVX512F-NEXT: vmovdqu64 %zmm0, 128(%rdi) 598; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi) 599; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) 600; AVX512F-NEXT: vzeroupper 601; AVX512F-NEXT: retq 602; 603; AVX512BW-LABEL: memset_256_nonconst_bytes: 604; AVX512BW: # %bb.0: 605; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0 606; AVX512BW-NEXT: vmovdqu64 %zmm0, 192(%rdi) 607; AVX512BW-NEXT: vmovdqu64 %zmm0, 128(%rdi) 608; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi) 609; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi) 610; AVX512BW-NEXT: vzeroupper 611; AVX512BW-NEXT: retq 612 tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 256, i1 false) 613 ret void 614} 615 616declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) #1 617 618