1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1 3; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2 4; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX 5; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX 6; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOSSE 7; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X64,X64-SSE 8; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX 9; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX 10 11; Note: This test is testing that the lowering for atomics matches what we 12; currently emit for non-atomics + the atomic restriction. The presence of 13; particular lowering detail in these tests should not be read as requiring 14; that detail for correctness unless it's related to the atomicity itself. 15; (Specifically, there were reviewer questions about the lowering for halfs 16; and their calling convention which remain unresolved.) 17 18define void @store_half(half* %fptr, half %v) { 19; X86-LABEL: store_half: 20; X86: # %bb.0: 21; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 22; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 23; X86-NEXT: movw %ax, (%ecx) 24; X86-NEXT: retl 25; 26; X64-LABEL: store_half: 27; X64: # %bb.0: 28; X64-NEXT: movw %si, (%rdi) 29; X64-NEXT: retq 30 store atomic half %v, half* %fptr unordered, align 2 31 ret void 32} 33 34define void @store_float(float* %fptr, float %v) { 35; X86-LABEL: store_float: 36; X86: # %bb.0: 37; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 38; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 39; X86-NEXT: movl %ecx, (%eax) 40; X86-NEXT: retl 41; 42; X64-SSE-LABEL: store_float: 43; X64-SSE: # %bb.0: 44; X64-SSE-NEXT: movss %xmm0, (%rdi) 45; X64-SSE-NEXT: retq 46; 47; X64-AVX-LABEL: store_float: 48; X64-AVX: # %bb.0: 49; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 50; X64-AVX-NEXT: retq 51 store atomic float %v, float* %fptr unordered, align 4 52 ret void 53} 54 55define void @store_double(double* %fptr, double %v) { 56; X86-SSE1-LABEL: store_double: 57; X86-SSE1: # %bb.0: 58; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 59; X86-SSE1-NEXT: xorps %xmm0, %xmm0 60; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 61; X86-SSE1-NEXT: movlps %xmm0, (%eax) 62; X86-SSE1-NEXT: retl 63; 64; X86-SSE2-LABEL: store_double: 65; X86-SSE2: # %bb.0: 66; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 67; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 68; X86-SSE2-NEXT: movlps %xmm0, (%eax) 69; X86-SSE2-NEXT: retl 70; 71; X86-AVX-LABEL: store_double: 72; X86-AVX: # %bb.0: 73; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 74; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 75; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 76; X86-AVX-NEXT: retl 77; 78; X86-NOSSE-LABEL: store_double: 79; X86-NOSSE: # %bb.0: 80; X86-NOSSE-NEXT: subl $12, %esp 81; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 82; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 83; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 84; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 85; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 86; X86-NOSSE-NEXT: movl %ecx, (%esp) 87; X86-NOSSE-NEXT: fildll (%esp) 88; X86-NOSSE-NEXT: fistpll (%eax) 89; X86-NOSSE-NEXT: addl $12, %esp 90; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 91; X86-NOSSE-NEXT: retl 92; 93; X64-SSE-LABEL: store_double: 94; X64-SSE: # %bb.0: 95; X64-SSE-NEXT: movsd %xmm0, (%rdi) 96; X64-SSE-NEXT: retq 97; 98; X64-AVX-LABEL: store_double: 99; X64-AVX: # %bb.0: 100; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 101; X64-AVX-NEXT: retq 102 store atomic double %v, double* %fptr unordered, align 8 103 ret void 104} 105 106define void @store_fp128(fp128* %fptr, fp128 %v) { 107; X86-SSE-LABEL: store_fp128: 108; X86-SSE: # %bb.0: 109; X86-SSE-NEXT: subl $36, %esp 110; X86-SSE-NEXT: .cfi_adjust_cfa_offset 36 111; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax 112; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 113; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 114; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 115; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 116; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 117; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 118; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 119; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 120; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 121; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 122; X86-SSE-NEXT: pushl %eax 123; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 124; X86-SSE-NEXT: calll __sync_lock_test_and_set_16 125; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 126; X86-SSE-NEXT: addl $56, %esp 127; X86-SSE-NEXT: .cfi_adjust_cfa_offset -56 128; X86-SSE-NEXT: retl 129; 130; X86-AVX-LABEL: store_fp128: 131; X86-AVX: # %bb.0: 132; X86-AVX-NEXT: subl $44, %esp 133; X86-AVX-NEXT: .cfi_def_cfa_offset 48 134; X86-AVX-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 135; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 136; X86-AVX-NEXT: movl %eax, {{[0-9]+}}(%esp) 137; X86-AVX-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp) 138; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax 139; X86-AVX-NEXT: movl %eax, (%esp) 140; X86-AVX-NEXT: calll __sync_lock_test_and_set_16 141; X86-AVX-NEXT: addl $40, %esp 142; X86-AVX-NEXT: .cfi_def_cfa_offset 4 143; X86-AVX-NEXT: retl 144; 145; X86-NOSSE-LABEL: store_fp128: 146; X86-NOSSE: # %bb.0: 147; X86-NOSSE-NEXT: subl $36, %esp 148; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 36 149; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax 150; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 151; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 152; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 153; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 154; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 155; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 156; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 157; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 158; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 159; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 160; X86-NOSSE-NEXT: pushl %eax 161; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 162; X86-NOSSE-NEXT: calll __sync_lock_test_and_set_16 163; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -4 164; X86-NOSSE-NEXT: addl $56, %esp 165; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -56 166; X86-NOSSE-NEXT: retl 167; 168; X64-SSE-LABEL: store_fp128: 169; X64-SSE: # %bb.0: 170; X64-SSE-NEXT: subq $24, %rsp 171; X64-SSE-NEXT: .cfi_def_cfa_offset 32 172; X64-SSE-NEXT: movaps %xmm0, (%rsp) 173; X64-SSE-NEXT: movq (%rsp), %rsi 174; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx 175; X64-SSE-NEXT: callq __sync_lock_test_and_set_16@PLT 176; X64-SSE-NEXT: addq $24, %rsp 177; X64-SSE-NEXT: .cfi_def_cfa_offset 8 178; X64-SSE-NEXT: retq 179; 180; X64-AVX-LABEL: store_fp128: 181; X64-AVX: # %bb.0: 182; X64-AVX-NEXT: subq $24, %rsp 183; X64-AVX-NEXT: .cfi_def_cfa_offset 32 184; X64-AVX-NEXT: vmovaps %xmm0, (%rsp) 185; X64-AVX-NEXT: movq (%rsp), %rsi 186; X64-AVX-NEXT: movq {{[0-9]+}}(%rsp), %rdx 187; X64-AVX-NEXT: callq __sync_lock_test_and_set_16@PLT 188; X64-AVX-NEXT: addq $24, %rsp 189; X64-AVX-NEXT: .cfi_def_cfa_offset 8 190; X64-AVX-NEXT: retq 191 store atomic fp128 %v, fp128* %fptr unordered, align 16 192 ret void 193} 194 195define half @load_half(half* %fptr) { 196; X86-LABEL: load_half: 197; X86: # %bb.0: 198; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 199; X86-NEXT: movzwl (%eax), %eax 200; X86-NEXT: retl 201; 202; X64-LABEL: load_half: 203; X64: # %bb.0: 204; X64-NEXT: movzwl (%rdi), %eax 205; X64-NEXT: retq 206 %v = load atomic half, half* %fptr unordered, align 2 207 ret half %v 208} 209 210define float @load_float(float* %fptr) { 211; X86-SSE1-LABEL: load_float: 212; X86-SSE1: # %bb.0: 213; X86-SSE1-NEXT: pushl %eax 214; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 215; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 216; X86-SSE1-NEXT: movl (%eax), %eax 217; X86-SSE1-NEXT: movl %eax, (%esp) 218; X86-SSE1-NEXT: flds (%esp) 219; X86-SSE1-NEXT: popl %eax 220; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 221; X86-SSE1-NEXT: retl 222; 223; X86-SSE2-LABEL: load_float: 224; X86-SSE2: # %bb.0: 225; X86-SSE2-NEXT: pushl %eax 226; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 227; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 228; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 229; X86-SSE2-NEXT: movss %xmm0, (%esp) 230; X86-SSE2-NEXT: flds (%esp) 231; X86-SSE2-NEXT: popl %eax 232; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 233; X86-SSE2-NEXT: retl 234; 235; X86-AVX-LABEL: load_float: 236; X86-AVX: # %bb.0: 237; X86-AVX-NEXT: pushl %eax 238; X86-AVX-NEXT: .cfi_def_cfa_offset 8 239; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 240; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 241; X86-AVX-NEXT: vmovss %xmm0, (%esp) 242; X86-AVX-NEXT: flds (%esp) 243; X86-AVX-NEXT: popl %eax 244; X86-AVX-NEXT: .cfi_def_cfa_offset 4 245; X86-AVX-NEXT: retl 246; 247; X86-NOSSE-LABEL: load_float: 248; X86-NOSSE: # %bb.0: 249; X86-NOSSE-NEXT: pushl %eax 250; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 251; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 252; X86-NOSSE-NEXT: movl (%eax), %eax 253; X86-NOSSE-NEXT: movl %eax, (%esp) 254; X86-NOSSE-NEXT: flds (%esp) 255; X86-NOSSE-NEXT: popl %eax 256; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 257; X86-NOSSE-NEXT: retl 258; 259; X64-SSE-LABEL: load_float: 260; X64-SSE: # %bb.0: 261; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 262; X64-SSE-NEXT: retq 263; 264; X64-AVX-LABEL: load_float: 265; X64-AVX: # %bb.0: 266; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 267; X64-AVX-NEXT: retq 268 %v = load atomic float, float* %fptr unordered, align 4 269 ret float %v 270} 271 272define double @load_double(double* %fptr) { 273; X86-SSE1-LABEL: load_double: 274; X86-SSE1: # %bb.0: 275; X86-SSE1-NEXT: subl $12, %esp 276; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 277; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 278; X86-SSE1-NEXT: xorps %xmm0, %xmm0 279; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 280; X86-SSE1-NEXT: movss %xmm0, (%esp) 281; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 282; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 283; X86-SSE1-NEXT: fldl (%esp) 284; X86-SSE1-NEXT: addl $12, %esp 285; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 286; X86-SSE1-NEXT: retl 287; 288; X86-SSE2-LABEL: load_double: 289; X86-SSE2: # %bb.0: 290; X86-SSE2-NEXT: subl $12, %esp 291; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 292; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 293; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 294; X86-SSE2-NEXT: movlps %xmm0, (%esp) 295; X86-SSE2-NEXT: fldl (%esp) 296; X86-SSE2-NEXT: addl $12, %esp 297; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 298; X86-SSE2-NEXT: retl 299; 300; X86-AVX-LABEL: load_double: 301; X86-AVX: # %bb.0: 302; X86-AVX-NEXT: subl $12, %esp 303; X86-AVX-NEXT: .cfi_def_cfa_offset 16 304; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 305; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 306; X86-AVX-NEXT: vmovlps %xmm0, (%esp) 307; X86-AVX-NEXT: fldl (%esp) 308; X86-AVX-NEXT: addl $12, %esp 309; X86-AVX-NEXT: .cfi_def_cfa_offset 4 310; X86-AVX-NEXT: retl 311; 312; X86-NOSSE-LABEL: load_double: 313; X86-NOSSE: # %bb.0: 314; X86-NOSSE-NEXT: subl $20, %esp 315; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 316; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 317; X86-NOSSE-NEXT: fildll (%eax) 318; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 319; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 320; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 321; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 322; X86-NOSSE-NEXT: movl %eax, (%esp) 323; X86-NOSSE-NEXT: fldl (%esp) 324; X86-NOSSE-NEXT: addl $20, %esp 325; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 326; X86-NOSSE-NEXT: retl 327; 328; X64-SSE-LABEL: load_double: 329; X64-SSE: # %bb.0: 330; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 331; X64-SSE-NEXT: retq 332; 333; X64-AVX-LABEL: load_double: 334; X64-AVX: # %bb.0: 335; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 336; X64-AVX-NEXT: retq 337 %v = load atomic double, double* %fptr unordered, align 8 338 ret double %v 339} 340 341define fp128 @load_fp128(fp128* %fptr) { 342; X86-SSE-LABEL: load_fp128: 343; X86-SSE: # %bb.0: 344; X86-SSE-NEXT: pushl %edi 345; X86-SSE-NEXT: .cfi_def_cfa_offset 8 346; X86-SSE-NEXT: pushl %esi 347; X86-SSE-NEXT: .cfi_def_cfa_offset 12 348; X86-SSE-NEXT: subl $20, %esp 349; X86-SSE-NEXT: .cfi_def_cfa_offset 32 350; X86-SSE-NEXT: .cfi_offset %esi, -12 351; X86-SSE-NEXT: .cfi_offset %edi, -8 352; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi 353; X86-SSE-NEXT: subl $8, %esp 354; X86-SSE-NEXT: .cfi_adjust_cfa_offset 8 355; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax 356; X86-SSE-NEXT: pushl $0 357; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 358; X86-SSE-NEXT: pushl $0 359; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 360; X86-SSE-NEXT: pushl $0 361; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 362; X86-SSE-NEXT: pushl $0 363; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 364; X86-SSE-NEXT: pushl $0 365; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 366; X86-SSE-NEXT: pushl $0 367; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 368; X86-SSE-NEXT: pushl $0 369; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 370; X86-SSE-NEXT: pushl $0 371; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 372; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 373; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 374; X86-SSE-NEXT: pushl %eax 375; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 376; X86-SSE-NEXT: calll __sync_val_compare_and_swap_16 377; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 378; X86-SSE-NEXT: addl $44, %esp 379; X86-SSE-NEXT: .cfi_adjust_cfa_offset -44 380; X86-SSE-NEXT: movl (%esp), %eax 381; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 382; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx 383; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi 384; X86-SSE-NEXT: movl %edi, 8(%esi) 385; X86-SSE-NEXT: movl %edx, 12(%esi) 386; X86-SSE-NEXT: movl %eax, (%esi) 387; X86-SSE-NEXT: movl %ecx, 4(%esi) 388; X86-SSE-NEXT: movl %esi, %eax 389; X86-SSE-NEXT: addl $20, %esp 390; X86-SSE-NEXT: .cfi_def_cfa_offset 12 391; X86-SSE-NEXT: popl %esi 392; X86-SSE-NEXT: .cfi_def_cfa_offset 8 393; X86-SSE-NEXT: popl %edi 394; X86-SSE-NEXT: .cfi_def_cfa_offset 4 395; X86-SSE-NEXT: retl $4 396; 397; X86-AVX-LABEL: load_fp128: 398; X86-AVX: # %bb.0: 399; X86-AVX-NEXT: pushl %esi 400; X86-AVX-NEXT: .cfi_def_cfa_offset 8 401; X86-AVX-NEXT: subl $56, %esp 402; X86-AVX-NEXT: .cfi_def_cfa_offset 64 403; X86-AVX-NEXT: .cfi_offset %esi, -8 404; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %esi 405; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 406; X86-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 407; X86-AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp) 408; X86-AVX-NEXT: movl %eax, {{[0-9]+}}(%esp) 409; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax 410; X86-AVX-NEXT: movl %eax, (%esp) 411; X86-AVX-NEXT: vzeroupper 412; X86-AVX-NEXT: calll __sync_val_compare_and_swap_16 413; X86-AVX-NEXT: subl $4, %esp 414; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 415; X86-AVX-NEXT: vmovaps %xmm0, (%esi) 416; X86-AVX-NEXT: movl %esi, %eax 417; X86-AVX-NEXT: addl $56, %esp 418; X86-AVX-NEXT: .cfi_def_cfa_offset 8 419; X86-AVX-NEXT: popl %esi 420; X86-AVX-NEXT: .cfi_def_cfa_offset 4 421; X86-AVX-NEXT: retl $4 422; 423; X86-NOSSE-LABEL: load_fp128: 424; X86-NOSSE: # %bb.0: 425; X86-NOSSE-NEXT: pushl %edi 426; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 427; X86-NOSSE-NEXT: pushl %esi 428; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 429; X86-NOSSE-NEXT: subl $20, %esp 430; X86-NOSSE-NEXT: .cfi_def_cfa_offset 32 431; X86-NOSSE-NEXT: .cfi_offset %esi, -12 432; X86-NOSSE-NEXT: .cfi_offset %edi, -8 433; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 434; X86-NOSSE-NEXT: subl $8, %esp 435; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 8 436; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax 437; X86-NOSSE-NEXT: pushl $0 438; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 439; X86-NOSSE-NEXT: pushl $0 440; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 441; X86-NOSSE-NEXT: pushl $0 442; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 443; X86-NOSSE-NEXT: pushl $0 444; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 445; X86-NOSSE-NEXT: pushl $0 446; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 447; X86-NOSSE-NEXT: pushl $0 448; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 449; X86-NOSSE-NEXT: pushl $0 450; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 451; X86-NOSSE-NEXT: pushl $0 452; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 453; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 454; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 455; X86-NOSSE-NEXT: pushl %eax 456; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 457; X86-NOSSE-NEXT: calll __sync_val_compare_and_swap_16 458; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -4 459; X86-NOSSE-NEXT: addl $44, %esp 460; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -44 461; X86-NOSSE-NEXT: movl (%esp), %eax 462; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 463; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 464; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi 465; X86-NOSSE-NEXT: movl %edi, 8(%esi) 466; X86-NOSSE-NEXT: movl %edx, 12(%esi) 467; X86-NOSSE-NEXT: movl %eax, (%esi) 468; X86-NOSSE-NEXT: movl %ecx, 4(%esi) 469; X86-NOSSE-NEXT: movl %esi, %eax 470; X86-NOSSE-NEXT: addl $20, %esp 471; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 472; X86-NOSSE-NEXT: popl %esi 473; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 474; X86-NOSSE-NEXT: popl %edi 475; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 476; X86-NOSSE-NEXT: retl $4 477; 478; X64-SSE-LABEL: load_fp128: 479; X64-SSE: # %bb.0: 480; X64-SSE-NEXT: subq $24, %rsp 481; X64-SSE-NEXT: .cfi_def_cfa_offset 32 482; X64-SSE-NEXT: xorl %esi, %esi 483; X64-SSE-NEXT: xorl %edx, %edx 484; X64-SSE-NEXT: xorl %ecx, %ecx 485; X64-SSE-NEXT: xorl %r8d, %r8d 486; X64-SSE-NEXT: callq __sync_val_compare_and_swap_16@PLT 487; X64-SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 488; X64-SSE-NEXT: movq %rax, (%rsp) 489; X64-SSE-NEXT: movaps (%rsp), %xmm0 490; X64-SSE-NEXT: addq $24, %rsp 491; X64-SSE-NEXT: .cfi_def_cfa_offset 8 492; X64-SSE-NEXT: retq 493; 494; X64-AVX-LABEL: load_fp128: 495; X64-AVX: # %bb.0: 496; X64-AVX-NEXT: subq $24, %rsp 497; X64-AVX-NEXT: .cfi_def_cfa_offset 32 498; X64-AVX-NEXT: xorl %esi, %esi 499; X64-AVX-NEXT: xorl %edx, %edx 500; X64-AVX-NEXT: xorl %ecx, %ecx 501; X64-AVX-NEXT: xorl %r8d, %r8d 502; X64-AVX-NEXT: callq __sync_val_compare_and_swap_16@PLT 503; X64-AVX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 504; X64-AVX-NEXT: movq %rax, (%rsp) 505; X64-AVX-NEXT: vmovaps (%rsp), %xmm0 506; X64-AVX-NEXT: addq $24, %rsp 507; X64-AVX-NEXT: .cfi_def_cfa_offset 8 508; X64-AVX-NEXT: retq 509 %v = load atomic fp128, fp128* %fptr unordered, align 16 510 ret fp128 %v 511} 512 513 514; Check the seq_cst lowering since that's the 515; interesting one from an ordering perspective on x86. 516 517define void @store_float_seq_cst(float* %fptr, float %v) { 518; X86-LABEL: store_float_seq_cst: 519; X86: # %bb.0: 520; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 521; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 522; X86-NEXT: xchgl %ecx, (%eax) 523; X86-NEXT: retl 524; 525; X64-SSE-LABEL: store_float_seq_cst: 526; X64-SSE: # %bb.0: 527; X64-SSE-NEXT: movd %xmm0, %eax 528; X64-SSE-NEXT: xchgl %eax, (%rdi) 529; X64-SSE-NEXT: retq 530; 531; X64-AVX-LABEL: store_float_seq_cst: 532; X64-AVX: # %bb.0: 533; X64-AVX-NEXT: vmovd %xmm0, %eax 534; X64-AVX-NEXT: xchgl %eax, (%rdi) 535; X64-AVX-NEXT: retq 536 store atomic float %v, float* %fptr seq_cst, align 4 537 ret void 538} 539 540define void @store_double_seq_cst(double* %fptr, double %v) { 541; X86-SSE1-LABEL: store_double_seq_cst: 542; X86-SSE1: # %bb.0: 543; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 544; X86-SSE1-NEXT: xorps %xmm0, %xmm0 545; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 546; X86-SSE1-NEXT: movlps %xmm0, (%eax) 547; X86-SSE1-NEXT: lock orl $0, (%esp) 548; X86-SSE1-NEXT: retl 549; 550; X86-SSE2-LABEL: store_double_seq_cst: 551; X86-SSE2: # %bb.0: 552; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 553; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 554; X86-SSE2-NEXT: movlps %xmm0, (%eax) 555; X86-SSE2-NEXT: lock orl $0, (%esp) 556; X86-SSE2-NEXT: retl 557; 558; X86-AVX-LABEL: store_double_seq_cst: 559; X86-AVX: # %bb.0: 560; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 561; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 562; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 563; X86-AVX-NEXT: lock orl $0, (%esp) 564; X86-AVX-NEXT: retl 565; 566; X86-NOSSE-LABEL: store_double_seq_cst: 567; X86-NOSSE: # %bb.0: 568; X86-NOSSE-NEXT: subl $12, %esp 569; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 570; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 571; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 572; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 573; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 574; X86-NOSSE-NEXT: movl %ecx, (%esp) 575; X86-NOSSE-NEXT: fildll (%esp) 576; X86-NOSSE-NEXT: fistpll (%eax) 577; X86-NOSSE-NEXT: lock orl $0, (%esp) 578; X86-NOSSE-NEXT: addl $12, %esp 579; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 580; X86-NOSSE-NEXT: retl 581; 582; X64-SSE-LABEL: store_double_seq_cst: 583; X64-SSE: # %bb.0: 584; X64-SSE-NEXT: movq %xmm0, %rax 585; X64-SSE-NEXT: xchgq %rax, (%rdi) 586; X64-SSE-NEXT: retq 587; 588; X64-AVX-LABEL: store_double_seq_cst: 589; X64-AVX: # %bb.0: 590; X64-AVX-NEXT: vmovq %xmm0, %rax 591; X64-AVX-NEXT: xchgq %rax, (%rdi) 592; X64-AVX-NEXT: retq 593 store atomic double %v, double* %fptr seq_cst, align 8 594 ret void 595} 596 597define float @load_float_seq_cst(float* %fptr) { 598; X86-SSE1-LABEL: load_float_seq_cst: 599; X86-SSE1: # %bb.0: 600; X86-SSE1-NEXT: pushl %eax 601; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 602; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 603; X86-SSE1-NEXT: movl (%eax), %eax 604; X86-SSE1-NEXT: movl %eax, (%esp) 605; X86-SSE1-NEXT: flds (%esp) 606; X86-SSE1-NEXT: popl %eax 607; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 608; X86-SSE1-NEXT: retl 609; 610; X86-SSE2-LABEL: load_float_seq_cst: 611; X86-SSE2: # %bb.0: 612; X86-SSE2-NEXT: pushl %eax 613; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 614; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 615; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 616; X86-SSE2-NEXT: movss %xmm0, (%esp) 617; X86-SSE2-NEXT: flds (%esp) 618; X86-SSE2-NEXT: popl %eax 619; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 620; X86-SSE2-NEXT: retl 621; 622; X86-AVX-LABEL: load_float_seq_cst: 623; X86-AVX: # %bb.0: 624; X86-AVX-NEXT: pushl %eax 625; X86-AVX-NEXT: .cfi_def_cfa_offset 8 626; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 627; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 628; X86-AVX-NEXT: vmovss %xmm0, (%esp) 629; X86-AVX-NEXT: flds (%esp) 630; X86-AVX-NEXT: popl %eax 631; X86-AVX-NEXT: .cfi_def_cfa_offset 4 632; X86-AVX-NEXT: retl 633; 634; X86-NOSSE-LABEL: load_float_seq_cst: 635; X86-NOSSE: # %bb.0: 636; X86-NOSSE-NEXT: pushl %eax 637; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 638; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 639; X86-NOSSE-NEXT: movl (%eax), %eax 640; X86-NOSSE-NEXT: movl %eax, (%esp) 641; X86-NOSSE-NEXT: flds (%esp) 642; X86-NOSSE-NEXT: popl %eax 643; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 644; X86-NOSSE-NEXT: retl 645; 646; X64-SSE-LABEL: load_float_seq_cst: 647; X64-SSE: # %bb.0: 648; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 649; X64-SSE-NEXT: retq 650; 651; X64-AVX-LABEL: load_float_seq_cst: 652; X64-AVX: # %bb.0: 653; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 654; X64-AVX-NEXT: retq 655 %v = load atomic float, float* %fptr seq_cst, align 4 656 ret float %v 657} 658 659define double @load_double_seq_cst(double* %fptr) { 660; X86-SSE1-LABEL: load_double_seq_cst: 661; X86-SSE1: # %bb.0: 662; X86-SSE1-NEXT: subl $12, %esp 663; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 664; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 665; X86-SSE1-NEXT: xorps %xmm0, %xmm0 666; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 667; X86-SSE1-NEXT: movss %xmm0, (%esp) 668; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 669; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 670; X86-SSE1-NEXT: fldl (%esp) 671; X86-SSE1-NEXT: addl $12, %esp 672; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 673; X86-SSE1-NEXT: retl 674; 675; X86-SSE2-LABEL: load_double_seq_cst: 676; X86-SSE2: # %bb.0: 677; X86-SSE2-NEXT: subl $12, %esp 678; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 679; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 680; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 681; X86-SSE2-NEXT: movlps %xmm0, (%esp) 682; X86-SSE2-NEXT: fldl (%esp) 683; X86-SSE2-NEXT: addl $12, %esp 684; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 685; X86-SSE2-NEXT: retl 686; 687; X86-AVX-LABEL: load_double_seq_cst: 688; X86-AVX: # %bb.0: 689; X86-AVX-NEXT: subl $12, %esp 690; X86-AVX-NEXT: .cfi_def_cfa_offset 16 691; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 692; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 693; X86-AVX-NEXT: vmovlps %xmm0, (%esp) 694; X86-AVX-NEXT: fldl (%esp) 695; X86-AVX-NEXT: addl $12, %esp 696; X86-AVX-NEXT: .cfi_def_cfa_offset 4 697; X86-AVX-NEXT: retl 698; 699; X86-NOSSE-LABEL: load_double_seq_cst: 700; X86-NOSSE: # %bb.0: 701; X86-NOSSE-NEXT: subl $20, %esp 702; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 703; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 704; X86-NOSSE-NEXT: fildll (%eax) 705; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 706; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 707; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 708; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 709; X86-NOSSE-NEXT: movl %eax, (%esp) 710; X86-NOSSE-NEXT: fldl (%esp) 711; X86-NOSSE-NEXT: addl $20, %esp 712; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 713; X86-NOSSE-NEXT: retl 714; 715; X64-SSE-LABEL: load_double_seq_cst: 716; X64-SSE: # %bb.0: 717; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 718; X64-SSE-NEXT: retq 719; 720; X64-AVX-LABEL: load_double_seq_cst: 721; X64-AVX: # %bb.0: 722; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 723; X64-AVX-NEXT: retq 724 %v = load atomic double, double* %fptr seq_cst, align 8 725 ret double %v 726} 727