1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1 3; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2 4; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX 5; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX 6; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOSSE 7; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X64-SSE 8; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X64-AVX 9; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX 10 11; Note: This test is testing that the lowering for atomics matches what we 12; currently emit for non-atomics + the atomic restriction. The presence of 13; particular lowering detail in these tests should not be read as requiring 14; that detail for correctness unless it's related to the atomicity itself. 15; (Specifically, there were reviewer questions about the lowering for halfs 16; and their calling convention which remain unresolved.) 17 18define void @store_half(ptr %fptr, half %v) { 19; X86-SSE1-LABEL: store_half: 20; X86-SSE1: # %bb.0: 21; X86-SSE1-NEXT: movzwl {{[0-9]+}}(%esp), %eax 22; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 23; X86-SSE1-NEXT: movw %ax, (%ecx) 24; X86-SSE1-NEXT: retl 25; 26; X86-SSE2-LABEL: store_half: 27; X86-SSE2: # %bb.0: 28; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 29; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 30; X86-SSE2-NEXT: movw %cx, (%eax) 31; X86-SSE2-NEXT: retl 32; 33; X86-AVX-LABEL: store_half: 34; X86-AVX: # %bb.0: 35; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 36; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 37; X86-AVX-NEXT: movw %cx, (%eax) 38; X86-AVX-NEXT: retl 39; 40; X86-NOSSE-LABEL: store_half: 41; X86-NOSSE: # %bb.0: 42; X86-NOSSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax 43; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 44; X86-NOSSE-NEXT: movw %ax, (%ecx) 45; X86-NOSSE-NEXT: retl 46; 47; X64-SSE-LABEL: store_half: 48; X64-SSE: # %bb.0: 49; X64-SSE-NEXT: pextrw $0, %xmm0, %eax 50; X64-SSE-NEXT: movw %ax, (%rdi) 51; X64-SSE-NEXT: retq 52; 53; X64-AVX-LABEL: store_half: 54; X64-AVX: # %bb.0: 55; X64-AVX-NEXT: vpextrw $0, %xmm0, %eax 56; X64-AVX-NEXT: movw %ax, (%rdi) 57; X64-AVX-NEXT: retq 58 store atomic half %v, ptr %fptr unordered, align 2 59 ret void 60} 61 62define void @store_float(ptr %fptr, float %v) { 63; X86-LABEL: store_float: 64; X86: # %bb.0: 65; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 66; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 67; X86-NEXT: movl %ecx, (%eax) 68; X86-NEXT: retl 69; 70; X64-SSE-LABEL: store_float: 71; X64-SSE: # %bb.0: 72; X64-SSE-NEXT: movss %xmm0, (%rdi) 73; X64-SSE-NEXT: retq 74; 75; X64-AVX-LABEL: store_float: 76; X64-AVX: # %bb.0: 77; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 78; X64-AVX-NEXT: retq 79 store atomic float %v, ptr %fptr unordered, align 4 80 ret void 81} 82 83define void @store_double(ptr %fptr, double %v) { 84; X86-SSE1-LABEL: store_double: 85; X86-SSE1: # %bb.0: 86; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 87; X86-SSE1-NEXT: xorps %xmm0, %xmm0 88; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 89; X86-SSE1-NEXT: movlps %xmm0, (%eax) 90; X86-SSE1-NEXT: retl 91; 92; X86-SSE2-LABEL: store_double: 93; X86-SSE2: # %bb.0: 94; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 95; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 96; X86-SSE2-NEXT: movlps %xmm0, (%eax) 97; X86-SSE2-NEXT: retl 98; 99; X86-AVX-LABEL: store_double: 100; X86-AVX: # %bb.0: 101; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 102; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 103; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 104; X86-AVX-NEXT: retl 105; 106; X86-NOSSE-LABEL: store_double: 107; X86-NOSSE: # %bb.0: 108; X86-NOSSE-NEXT: subl $12, %esp 109; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 110; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 111; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 112; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 113; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 114; X86-NOSSE-NEXT: movl %ecx, (%esp) 115; X86-NOSSE-NEXT: fildll (%esp) 116; X86-NOSSE-NEXT: fistpll (%eax) 117; X86-NOSSE-NEXT: addl $12, %esp 118; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 119; X86-NOSSE-NEXT: retl 120; 121; X64-SSE-LABEL: store_double: 122; X64-SSE: # %bb.0: 123; X64-SSE-NEXT: movsd %xmm0, (%rdi) 124; X64-SSE-NEXT: retq 125; 126; X64-AVX-LABEL: store_double: 127; X64-AVX: # %bb.0: 128; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 129; X64-AVX-NEXT: retq 130 store atomic double %v, ptr %fptr unordered, align 8 131 ret void 132} 133 134define void @store_fp128(ptr %fptr, fp128 %v) { 135; X86-SSE-LABEL: store_fp128: 136; X86-SSE: # %bb.0: 137; X86-SSE-NEXT: subl $36, %esp 138; X86-SSE-NEXT: .cfi_adjust_cfa_offset 36 139; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax 140; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 141; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 142; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 143; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 144; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 145; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 146; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 147; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 148; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 149; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 150; X86-SSE-NEXT: pushl %eax 151; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 152; X86-SSE-NEXT: calll __sync_lock_test_and_set_16 153; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 154; X86-SSE-NEXT: addl $56, %esp 155; X86-SSE-NEXT: .cfi_adjust_cfa_offset -56 156; X86-SSE-NEXT: retl 157; 158; X86-AVX-LABEL: store_fp128: 159; X86-AVX: # %bb.0: 160; X86-AVX-NEXT: subl $44, %esp 161; X86-AVX-NEXT: .cfi_def_cfa_offset 48 162; X86-AVX-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 163; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 164; X86-AVX-NEXT: movl %eax, {{[0-9]+}}(%esp) 165; X86-AVX-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp) 166; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax 167; X86-AVX-NEXT: movl %eax, (%esp) 168; X86-AVX-NEXT: calll __sync_lock_test_and_set_16 169; X86-AVX-NEXT: addl $40, %esp 170; X86-AVX-NEXT: .cfi_def_cfa_offset 4 171; X86-AVX-NEXT: retl 172; 173; X86-NOSSE-LABEL: store_fp128: 174; X86-NOSSE: # %bb.0: 175; X86-NOSSE-NEXT: subl $36, %esp 176; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 36 177; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax 178; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 179; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 180; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 181; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 182; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 183; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 184; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 185; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 186; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 187; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 188; X86-NOSSE-NEXT: pushl %eax 189; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 190; X86-NOSSE-NEXT: calll __sync_lock_test_and_set_16 191; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -4 192; X86-NOSSE-NEXT: addl $56, %esp 193; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -56 194; X86-NOSSE-NEXT: retl 195; 196; X64-SSE-LABEL: store_fp128: 197; X64-SSE: # %bb.0: 198; X64-SSE-NEXT: subq $24, %rsp 199; X64-SSE-NEXT: .cfi_def_cfa_offset 32 200; X64-SSE-NEXT: movaps %xmm0, (%rsp) 201; X64-SSE-NEXT: movq (%rsp), %rsi 202; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx 203; X64-SSE-NEXT: callq __sync_lock_test_and_set_16@PLT 204; X64-SSE-NEXT: addq $24, %rsp 205; X64-SSE-NEXT: .cfi_def_cfa_offset 8 206; X64-SSE-NEXT: retq 207; 208; X64-AVX-LABEL: store_fp128: 209; X64-AVX: # %bb.0: 210; X64-AVX-NEXT: subq $24, %rsp 211; X64-AVX-NEXT: .cfi_def_cfa_offset 32 212; X64-AVX-NEXT: vmovaps %xmm0, (%rsp) 213; X64-AVX-NEXT: movq (%rsp), %rsi 214; X64-AVX-NEXT: movq {{[0-9]+}}(%rsp), %rdx 215; X64-AVX-NEXT: callq __sync_lock_test_and_set_16@PLT 216; X64-AVX-NEXT: addq $24, %rsp 217; X64-AVX-NEXT: .cfi_def_cfa_offset 8 218; X64-AVX-NEXT: retq 219 store atomic fp128 %v, ptr %fptr unordered, align 16 220 ret void 221} 222 223define half @load_half(ptr %fptr) { 224; X86-SSE1-LABEL: load_half: 225; X86-SSE1: # %bb.0: 226; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 227; X86-SSE1-NEXT: movzwl (%eax), %eax 228; X86-SSE1-NEXT: retl 229; 230; X86-SSE2-LABEL: load_half: 231; X86-SSE2: # %bb.0: 232; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 233; X86-SSE2-NEXT: movzwl (%eax), %eax 234; X86-SSE2-NEXT: pinsrw $0, %eax, %xmm0 235; X86-SSE2-NEXT: retl 236; 237; X86-AVX-LABEL: load_half: 238; X86-AVX: # %bb.0: 239; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 240; X86-AVX-NEXT: movzwl (%eax), %eax 241; X86-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 242; X86-AVX-NEXT: retl 243; 244; X86-NOSSE-LABEL: load_half: 245; X86-NOSSE: # %bb.0: 246; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 247; X86-NOSSE-NEXT: movzwl (%eax), %eax 248; X86-NOSSE-NEXT: retl 249; 250; X64-SSE-LABEL: load_half: 251; X64-SSE: # %bb.0: 252; X64-SSE-NEXT: movzwl (%rdi), %eax 253; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0 254; X64-SSE-NEXT: retq 255; 256; X64-AVX-LABEL: load_half: 257; X64-AVX: # %bb.0: 258; X64-AVX-NEXT: movzwl (%rdi), %eax 259; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 260; X64-AVX-NEXT: retq 261 %v = load atomic half, ptr %fptr unordered, align 2 262 ret half %v 263} 264 265define float @load_float(ptr %fptr) { 266; X86-SSE1-LABEL: load_float: 267; X86-SSE1: # %bb.0: 268; X86-SSE1-NEXT: pushl %eax 269; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 270; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 271; X86-SSE1-NEXT: movl (%eax), %eax 272; X86-SSE1-NEXT: movl %eax, (%esp) 273; X86-SSE1-NEXT: flds (%esp) 274; X86-SSE1-NEXT: popl %eax 275; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 276; X86-SSE1-NEXT: retl 277; 278; X86-SSE2-LABEL: load_float: 279; X86-SSE2: # %bb.0: 280; X86-SSE2-NEXT: pushl %eax 281; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 282; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 283; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 284; X86-SSE2-NEXT: movss %xmm0, (%esp) 285; X86-SSE2-NEXT: flds (%esp) 286; X86-SSE2-NEXT: popl %eax 287; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 288; X86-SSE2-NEXT: retl 289; 290; X86-AVX-LABEL: load_float: 291; X86-AVX: # %bb.0: 292; X86-AVX-NEXT: pushl %eax 293; X86-AVX-NEXT: .cfi_def_cfa_offset 8 294; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 295; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 296; X86-AVX-NEXT: vmovss %xmm0, (%esp) 297; X86-AVX-NEXT: flds (%esp) 298; X86-AVX-NEXT: popl %eax 299; X86-AVX-NEXT: .cfi_def_cfa_offset 4 300; X86-AVX-NEXT: retl 301; 302; X86-NOSSE-LABEL: load_float: 303; X86-NOSSE: # %bb.0: 304; X86-NOSSE-NEXT: pushl %eax 305; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 306; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 307; X86-NOSSE-NEXT: movl (%eax), %eax 308; X86-NOSSE-NEXT: movl %eax, (%esp) 309; X86-NOSSE-NEXT: flds (%esp) 310; X86-NOSSE-NEXT: popl %eax 311; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 312; X86-NOSSE-NEXT: retl 313; 314; X64-SSE-LABEL: load_float: 315; X64-SSE: # %bb.0: 316; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 317; X64-SSE-NEXT: retq 318; 319; X64-AVX-LABEL: load_float: 320; X64-AVX: # %bb.0: 321; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 322; X64-AVX-NEXT: retq 323 %v = load atomic float, ptr %fptr unordered, align 4 324 ret float %v 325} 326 327define double @load_double(ptr %fptr) { 328; X86-SSE1-LABEL: load_double: 329; X86-SSE1: # %bb.0: 330; X86-SSE1-NEXT: subl $12, %esp 331; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 332; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 333; X86-SSE1-NEXT: xorps %xmm0, %xmm0 334; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 335; X86-SSE1-NEXT: movss %xmm0, (%esp) 336; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 337; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 338; X86-SSE1-NEXT: fldl (%esp) 339; X86-SSE1-NEXT: addl $12, %esp 340; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 341; X86-SSE1-NEXT: retl 342; 343; X86-SSE2-LABEL: load_double: 344; X86-SSE2: # %bb.0: 345; X86-SSE2-NEXT: subl $12, %esp 346; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 347; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 348; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 349; X86-SSE2-NEXT: movlps %xmm0, (%esp) 350; X86-SSE2-NEXT: fldl (%esp) 351; X86-SSE2-NEXT: addl $12, %esp 352; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 353; X86-SSE2-NEXT: retl 354; 355; X86-AVX-LABEL: load_double: 356; X86-AVX: # %bb.0: 357; X86-AVX-NEXT: subl $12, %esp 358; X86-AVX-NEXT: .cfi_def_cfa_offset 16 359; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 360; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 361; X86-AVX-NEXT: vmovlps %xmm0, (%esp) 362; X86-AVX-NEXT: fldl (%esp) 363; X86-AVX-NEXT: addl $12, %esp 364; X86-AVX-NEXT: .cfi_def_cfa_offset 4 365; X86-AVX-NEXT: retl 366; 367; X86-NOSSE-LABEL: load_double: 368; X86-NOSSE: # %bb.0: 369; X86-NOSSE-NEXT: subl $20, %esp 370; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 371; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 372; X86-NOSSE-NEXT: fildll (%eax) 373; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 374; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 375; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 376; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 377; X86-NOSSE-NEXT: movl %eax, (%esp) 378; X86-NOSSE-NEXT: fldl (%esp) 379; X86-NOSSE-NEXT: addl $20, %esp 380; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 381; X86-NOSSE-NEXT: retl 382; 383; X64-SSE-LABEL: load_double: 384; X64-SSE: # %bb.0: 385; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 386; X64-SSE-NEXT: retq 387; 388; X64-AVX-LABEL: load_double: 389; X64-AVX: # %bb.0: 390; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 391; X64-AVX-NEXT: retq 392 %v = load atomic double, ptr %fptr unordered, align 8 393 ret double %v 394} 395 396define fp128 @load_fp128(ptr %fptr) { 397; X86-SSE-LABEL: load_fp128: 398; X86-SSE: # %bb.0: 399; X86-SSE-NEXT: pushl %edi 400; X86-SSE-NEXT: .cfi_def_cfa_offset 8 401; X86-SSE-NEXT: pushl %esi 402; X86-SSE-NEXT: .cfi_def_cfa_offset 12 403; X86-SSE-NEXT: subl $20, %esp 404; X86-SSE-NEXT: .cfi_def_cfa_offset 32 405; X86-SSE-NEXT: .cfi_offset %esi, -12 406; X86-SSE-NEXT: .cfi_offset %edi, -8 407; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi 408; X86-SSE-NEXT: subl $8, %esp 409; X86-SSE-NEXT: .cfi_adjust_cfa_offset 8 410; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax 411; X86-SSE-NEXT: pushl $0 412; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 413; X86-SSE-NEXT: pushl $0 414; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 415; X86-SSE-NEXT: pushl $0 416; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 417; X86-SSE-NEXT: pushl $0 418; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 419; X86-SSE-NEXT: pushl $0 420; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 421; X86-SSE-NEXT: pushl $0 422; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 423; X86-SSE-NEXT: pushl $0 424; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 425; X86-SSE-NEXT: pushl $0 426; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 427; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp) 428; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 429; X86-SSE-NEXT: pushl %eax 430; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 431; X86-SSE-NEXT: calll __sync_val_compare_and_swap_16 432; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 433; X86-SSE-NEXT: addl $44, %esp 434; X86-SSE-NEXT: .cfi_adjust_cfa_offset -44 435; X86-SSE-NEXT: movl (%esp), %eax 436; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 437; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx 438; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi 439; X86-SSE-NEXT: movl %edi, 8(%esi) 440; X86-SSE-NEXT: movl %edx, 12(%esi) 441; X86-SSE-NEXT: movl %eax, (%esi) 442; X86-SSE-NEXT: movl %ecx, 4(%esi) 443; X86-SSE-NEXT: movl %esi, %eax 444; X86-SSE-NEXT: addl $20, %esp 445; X86-SSE-NEXT: .cfi_def_cfa_offset 12 446; X86-SSE-NEXT: popl %esi 447; X86-SSE-NEXT: .cfi_def_cfa_offset 8 448; X86-SSE-NEXT: popl %edi 449; X86-SSE-NEXT: .cfi_def_cfa_offset 4 450; X86-SSE-NEXT: retl $4 451; 452; X86-AVX-LABEL: load_fp128: 453; X86-AVX: # %bb.0: 454; X86-AVX-NEXT: pushl %esi 455; X86-AVX-NEXT: .cfi_def_cfa_offset 8 456; X86-AVX-NEXT: subl $56, %esp 457; X86-AVX-NEXT: .cfi_def_cfa_offset 64 458; X86-AVX-NEXT: .cfi_offset %esi, -8 459; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %esi 460; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 461; X86-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 462; X86-AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp) 463; X86-AVX-NEXT: movl %eax, {{[0-9]+}}(%esp) 464; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax 465; X86-AVX-NEXT: movl %eax, (%esp) 466; X86-AVX-NEXT: vzeroupper 467; X86-AVX-NEXT: calll __sync_val_compare_and_swap_16 468; X86-AVX-NEXT: subl $4, %esp 469; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 470; X86-AVX-NEXT: vmovaps %xmm0, (%esi) 471; X86-AVX-NEXT: movl %esi, %eax 472; X86-AVX-NEXT: addl $56, %esp 473; X86-AVX-NEXT: .cfi_def_cfa_offset 8 474; X86-AVX-NEXT: popl %esi 475; X86-AVX-NEXT: .cfi_def_cfa_offset 4 476; X86-AVX-NEXT: retl $4 477; 478; X86-NOSSE-LABEL: load_fp128: 479; X86-NOSSE: # %bb.0: 480; X86-NOSSE-NEXT: pushl %edi 481; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 482; X86-NOSSE-NEXT: pushl %esi 483; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 484; X86-NOSSE-NEXT: subl $20, %esp 485; X86-NOSSE-NEXT: .cfi_def_cfa_offset 32 486; X86-NOSSE-NEXT: .cfi_offset %esi, -12 487; X86-NOSSE-NEXT: .cfi_offset %edi, -8 488; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 489; X86-NOSSE-NEXT: subl $8, %esp 490; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 8 491; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax 492; X86-NOSSE-NEXT: pushl $0 493; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 494; X86-NOSSE-NEXT: pushl $0 495; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 496; X86-NOSSE-NEXT: pushl $0 497; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 498; X86-NOSSE-NEXT: pushl $0 499; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 500; X86-NOSSE-NEXT: pushl $0 501; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 502; X86-NOSSE-NEXT: pushl $0 503; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 504; X86-NOSSE-NEXT: pushl $0 505; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 506; X86-NOSSE-NEXT: pushl $0 507; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 508; X86-NOSSE-NEXT: pushl {{[0-9]+}}(%esp) 509; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 510; X86-NOSSE-NEXT: pushl %eax 511; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset 4 512; X86-NOSSE-NEXT: calll __sync_val_compare_and_swap_16 513; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -4 514; X86-NOSSE-NEXT: addl $44, %esp 515; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -44 516; X86-NOSSE-NEXT: movl (%esp), %eax 517; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 518; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 519; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi 520; X86-NOSSE-NEXT: movl %edi, 8(%esi) 521; X86-NOSSE-NEXT: movl %edx, 12(%esi) 522; X86-NOSSE-NEXT: movl %eax, (%esi) 523; X86-NOSSE-NEXT: movl %ecx, 4(%esi) 524; X86-NOSSE-NEXT: movl %esi, %eax 525; X86-NOSSE-NEXT: addl $20, %esp 526; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 527; X86-NOSSE-NEXT: popl %esi 528; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 529; X86-NOSSE-NEXT: popl %edi 530; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 531; X86-NOSSE-NEXT: retl $4 532; 533; X64-SSE-LABEL: load_fp128: 534; X64-SSE: # %bb.0: 535; X64-SSE-NEXT: subq $24, %rsp 536; X64-SSE-NEXT: .cfi_def_cfa_offset 32 537; X64-SSE-NEXT: xorl %esi, %esi 538; X64-SSE-NEXT: xorl %edx, %edx 539; X64-SSE-NEXT: xorl %ecx, %ecx 540; X64-SSE-NEXT: xorl %r8d, %r8d 541; X64-SSE-NEXT: callq __sync_val_compare_and_swap_16@PLT 542; X64-SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 543; X64-SSE-NEXT: movq %rax, (%rsp) 544; X64-SSE-NEXT: movaps (%rsp), %xmm0 545; X64-SSE-NEXT: addq $24, %rsp 546; X64-SSE-NEXT: .cfi_def_cfa_offset 8 547; X64-SSE-NEXT: retq 548; 549; X64-AVX-LABEL: load_fp128: 550; X64-AVX: # %bb.0: 551; X64-AVX-NEXT: subq $24, %rsp 552; X64-AVX-NEXT: .cfi_def_cfa_offset 32 553; X64-AVX-NEXT: xorl %esi, %esi 554; X64-AVX-NEXT: xorl %edx, %edx 555; X64-AVX-NEXT: xorl %ecx, %ecx 556; X64-AVX-NEXT: xorl %r8d, %r8d 557; X64-AVX-NEXT: callq __sync_val_compare_and_swap_16@PLT 558; X64-AVX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) 559; X64-AVX-NEXT: movq %rax, (%rsp) 560; X64-AVX-NEXT: vmovaps (%rsp), %xmm0 561; X64-AVX-NEXT: addq $24, %rsp 562; X64-AVX-NEXT: .cfi_def_cfa_offset 8 563; X64-AVX-NEXT: retq 564 %v = load atomic fp128, ptr %fptr unordered, align 16 565 ret fp128 %v 566} 567 568 569; Check the seq_cst lowering since that's the 570; interesting one from an ordering perspective on x86. 571 572define void @store_float_seq_cst(ptr %fptr, float %v) { 573; X86-LABEL: store_float_seq_cst: 574; X86: # %bb.0: 575; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 576; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 577; X86-NEXT: xchgl %ecx, (%eax) 578; X86-NEXT: retl 579; 580; X64-SSE-LABEL: store_float_seq_cst: 581; X64-SSE: # %bb.0: 582; X64-SSE-NEXT: movd %xmm0, %eax 583; X64-SSE-NEXT: xchgl %eax, (%rdi) 584; X64-SSE-NEXT: retq 585; 586; X64-AVX-LABEL: store_float_seq_cst: 587; X64-AVX: # %bb.0: 588; X64-AVX-NEXT: vmovd %xmm0, %eax 589; X64-AVX-NEXT: xchgl %eax, (%rdi) 590; X64-AVX-NEXT: retq 591 store atomic float %v, ptr %fptr seq_cst, align 4 592 ret void 593} 594 595define void @store_double_seq_cst(ptr %fptr, double %v) { 596; X86-SSE1-LABEL: store_double_seq_cst: 597; X86-SSE1: # %bb.0: 598; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 599; X86-SSE1-NEXT: xorps %xmm0, %xmm0 600; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 601; X86-SSE1-NEXT: movlps %xmm0, (%eax) 602; X86-SSE1-NEXT: lock orl $0, (%esp) 603; X86-SSE1-NEXT: retl 604; 605; X86-SSE2-LABEL: store_double_seq_cst: 606; X86-SSE2: # %bb.0: 607; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 608; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 609; X86-SSE2-NEXT: movlps %xmm0, (%eax) 610; X86-SSE2-NEXT: lock orl $0, (%esp) 611; X86-SSE2-NEXT: retl 612; 613; X86-AVX-LABEL: store_double_seq_cst: 614; X86-AVX: # %bb.0: 615; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 616; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 617; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 618; X86-AVX-NEXT: lock orl $0, (%esp) 619; X86-AVX-NEXT: retl 620; 621; X86-NOSSE-LABEL: store_double_seq_cst: 622; X86-NOSSE: # %bb.0: 623; X86-NOSSE-NEXT: subl $12, %esp 624; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16 625; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 626; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 627; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 628; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 629; X86-NOSSE-NEXT: movl %ecx, (%esp) 630; X86-NOSSE-NEXT: fildll (%esp) 631; X86-NOSSE-NEXT: fistpll (%eax) 632; X86-NOSSE-NEXT: lock orl $0, (%esp) 633; X86-NOSSE-NEXT: addl $12, %esp 634; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 635; X86-NOSSE-NEXT: retl 636; 637; X64-SSE-LABEL: store_double_seq_cst: 638; X64-SSE: # %bb.0: 639; X64-SSE-NEXT: movq %xmm0, %rax 640; X64-SSE-NEXT: xchgq %rax, (%rdi) 641; X64-SSE-NEXT: retq 642; 643; X64-AVX-LABEL: store_double_seq_cst: 644; X64-AVX: # %bb.0: 645; X64-AVX-NEXT: vmovq %xmm0, %rax 646; X64-AVX-NEXT: xchgq %rax, (%rdi) 647; X64-AVX-NEXT: retq 648 store atomic double %v, ptr %fptr seq_cst, align 8 649 ret void 650} 651 652define float @load_float_seq_cst(ptr %fptr) { 653; X86-SSE1-LABEL: load_float_seq_cst: 654; X86-SSE1: # %bb.0: 655; X86-SSE1-NEXT: pushl %eax 656; X86-SSE1-NEXT: .cfi_def_cfa_offset 8 657; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 658; X86-SSE1-NEXT: movl (%eax), %eax 659; X86-SSE1-NEXT: movl %eax, (%esp) 660; X86-SSE1-NEXT: flds (%esp) 661; X86-SSE1-NEXT: popl %eax 662; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 663; X86-SSE1-NEXT: retl 664; 665; X86-SSE2-LABEL: load_float_seq_cst: 666; X86-SSE2: # %bb.0: 667; X86-SSE2-NEXT: pushl %eax 668; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 669; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 670; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 671; X86-SSE2-NEXT: movss %xmm0, (%esp) 672; X86-SSE2-NEXT: flds (%esp) 673; X86-SSE2-NEXT: popl %eax 674; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 675; X86-SSE2-NEXT: retl 676; 677; X86-AVX-LABEL: load_float_seq_cst: 678; X86-AVX: # %bb.0: 679; X86-AVX-NEXT: pushl %eax 680; X86-AVX-NEXT: .cfi_def_cfa_offset 8 681; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 682; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 683; X86-AVX-NEXT: vmovss %xmm0, (%esp) 684; X86-AVX-NEXT: flds (%esp) 685; X86-AVX-NEXT: popl %eax 686; X86-AVX-NEXT: .cfi_def_cfa_offset 4 687; X86-AVX-NEXT: retl 688; 689; X86-NOSSE-LABEL: load_float_seq_cst: 690; X86-NOSSE: # %bb.0: 691; X86-NOSSE-NEXT: pushl %eax 692; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 693; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 694; X86-NOSSE-NEXT: movl (%eax), %eax 695; X86-NOSSE-NEXT: movl %eax, (%esp) 696; X86-NOSSE-NEXT: flds (%esp) 697; X86-NOSSE-NEXT: popl %eax 698; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 699; X86-NOSSE-NEXT: retl 700; 701; X64-SSE-LABEL: load_float_seq_cst: 702; X64-SSE: # %bb.0: 703; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 704; X64-SSE-NEXT: retq 705; 706; X64-AVX-LABEL: load_float_seq_cst: 707; X64-AVX: # %bb.0: 708; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 709; X64-AVX-NEXT: retq 710 %v = load atomic float, ptr %fptr seq_cst, align 4 711 ret float %v 712} 713 714define double @load_double_seq_cst(ptr %fptr) { 715; X86-SSE1-LABEL: load_double_seq_cst: 716; X86-SSE1: # %bb.0: 717; X86-SSE1-NEXT: subl $12, %esp 718; X86-SSE1-NEXT: .cfi_def_cfa_offset 16 719; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 720; X86-SSE1-NEXT: xorps %xmm0, %xmm0 721; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 722; X86-SSE1-NEXT: movss %xmm0, (%esp) 723; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 724; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 725; X86-SSE1-NEXT: fldl (%esp) 726; X86-SSE1-NEXT: addl $12, %esp 727; X86-SSE1-NEXT: .cfi_def_cfa_offset 4 728; X86-SSE1-NEXT: retl 729; 730; X86-SSE2-LABEL: load_double_seq_cst: 731; X86-SSE2: # %bb.0: 732; X86-SSE2-NEXT: subl $12, %esp 733; X86-SSE2-NEXT: .cfi_def_cfa_offset 16 734; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 735; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 736; X86-SSE2-NEXT: movlps %xmm0, (%esp) 737; X86-SSE2-NEXT: fldl (%esp) 738; X86-SSE2-NEXT: addl $12, %esp 739; X86-SSE2-NEXT: .cfi_def_cfa_offset 4 740; X86-SSE2-NEXT: retl 741; 742; X86-AVX-LABEL: load_double_seq_cst: 743; X86-AVX: # %bb.0: 744; X86-AVX-NEXT: subl $12, %esp 745; X86-AVX-NEXT: .cfi_def_cfa_offset 16 746; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 747; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 748; X86-AVX-NEXT: vmovlps %xmm0, (%esp) 749; X86-AVX-NEXT: fldl (%esp) 750; X86-AVX-NEXT: addl $12, %esp 751; X86-AVX-NEXT: .cfi_def_cfa_offset 4 752; X86-AVX-NEXT: retl 753; 754; X86-NOSSE-LABEL: load_double_seq_cst: 755; X86-NOSSE: # %bb.0: 756; X86-NOSSE-NEXT: subl $20, %esp 757; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 758; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 759; X86-NOSSE-NEXT: fildll (%eax) 760; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 761; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 762; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 763; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 764; X86-NOSSE-NEXT: movl %eax, (%esp) 765; X86-NOSSE-NEXT: fldl (%esp) 766; X86-NOSSE-NEXT: addl $20, %esp 767; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 768; X86-NOSSE-NEXT: retl 769; 770; X64-SSE-LABEL: load_double_seq_cst: 771; X64-SSE: # %bb.0: 772; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 773; X64-SSE-NEXT: retq 774; 775; X64-AVX-LABEL: load_double_seq_cst: 776; X64-AVX: # %bb.0: 777; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 778; X64-AVX-NEXT: retq 779 %v = load atomic double, ptr %fptr seq_cst, align 8 780 ret double %v 781} 782