1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86-NOSSE 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE1 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE2 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX 10 11; ----- FADD ----- 12 13define dso_local void @fadd_32r(ptr %loc, float %val) nounwind { 14; X86-NOSSE-LABEL: fadd_32r: 15; X86-NOSSE: # %bb.0: 16; X86-NOSSE-NEXT: subl $8, %esp 17; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 18; X86-NOSSE-NEXT: movl (%eax), %ecx 19; X86-NOSSE-NEXT: movl %ecx, (%esp) 20; X86-NOSSE-NEXT: flds (%esp) 21; X86-NOSSE-NEXT: fadds {{[0-9]+}}(%esp) 22; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 23; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 24; X86-NOSSE-NEXT: movl %ecx, (%eax) 25; X86-NOSSE-NEXT: addl $8, %esp 26; X86-NOSSE-NEXT: retl 27; 28; X86-SSE1-LABEL: fadd_32r: 29; X86-SSE1: # %bb.0: 30; X86-SSE1-NEXT: subl $8, %esp 31; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 32; X86-SSE1-NEXT: movl (%eax), %ecx 33; X86-SSE1-NEXT: movl %ecx, (%esp) 34; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 35; X86-SSE1-NEXT: addss {{[0-9]+}}(%esp), %xmm0 36; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 37; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 38; X86-SSE1-NEXT: movl %ecx, (%eax) 39; X86-SSE1-NEXT: addl $8, %esp 40; X86-SSE1-NEXT: retl 41; 42; X86-SSE2-LABEL: fadd_32r: 43; X86-SSE2: # %bb.0: 44; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 45; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 46; X86-SSE2-NEXT: addss (%eax), %xmm0 47; X86-SSE2-NEXT: movss %xmm0, (%eax) 48; X86-SSE2-NEXT: retl 49; 50; X86-AVX-LABEL: fadd_32r: 51; X86-AVX: # %bb.0: 52; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 53; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 54; X86-AVX-NEXT: vaddss (%eax), %xmm0, %xmm0 55; X86-AVX-NEXT: vmovss %xmm0, (%eax) 56; X86-AVX-NEXT: retl 57; 58; X64-SSE-LABEL: fadd_32r: 59; X64-SSE: # %bb.0: 60; X64-SSE-NEXT: addss (%rdi), %xmm0 61; X64-SSE-NEXT: movss %xmm0, (%rdi) 62; X64-SSE-NEXT: retq 63; 64; X64-AVX-LABEL: fadd_32r: 65; X64-AVX: # %bb.0: 66; X64-AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 67; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 68; X64-AVX-NEXT: retq 69 %1 = load atomic i32, ptr %loc seq_cst, align 4 70 %2 = bitcast i32 %1 to float 71 %add = fadd float %2, %val 72 %3 = bitcast float %add to i32 73 store atomic i32 %3, ptr %loc release, align 4 74 ret void 75} 76 77define dso_local void @fadd_64r(ptr %loc, double %val) nounwind { 78; X86-NOSSE-LABEL: fadd_64r: 79; X86-NOSSE: # %bb.0: 80; X86-NOSSE-NEXT: pushl %ebp 81; X86-NOSSE-NEXT: movl %esp, %ebp 82; X86-NOSSE-NEXT: andl $-8, %esp 83; X86-NOSSE-NEXT: subl $32, %esp 84; X86-NOSSE-NEXT: movl 8(%ebp), %eax 85; X86-NOSSE-NEXT: fildll (%eax) 86; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 87; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 88; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 89; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 90; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 91; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 92; X86-NOSSE-NEXT: faddl 12(%ebp) 93; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 94; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 95; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 96; X86-NOSSE-NEXT: movl %ecx, (%esp) 97; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 98; X86-NOSSE-NEXT: fildll (%esp) 99; X86-NOSSE-NEXT: fistpll (%eax) 100; X86-NOSSE-NEXT: movl %ebp, %esp 101; X86-NOSSE-NEXT: popl %ebp 102; X86-NOSSE-NEXT: retl 103; 104; X86-SSE1-LABEL: fadd_64r: 105; X86-SSE1: # %bb.0: 106; X86-SSE1-NEXT: pushl %ebp 107; X86-SSE1-NEXT: movl %esp, %ebp 108; X86-SSE1-NEXT: andl $-8, %esp 109; X86-SSE1-NEXT: subl $16, %esp 110; X86-SSE1-NEXT: movl 8(%ebp), %eax 111; X86-SSE1-NEXT: xorps %xmm0, %xmm0 112; X86-SSE1-NEXT: xorps %xmm1, %xmm1 113; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 114; X86-SSE1-NEXT: movss %xmm1, (%esp) 115; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 116; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 117; X86-SSE1-NEXT: fldl (%esp) 118; X86-SSE1-NEXT: faddl 12(%ebp) 119; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 120; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 121; X86-SSE1-NEXT: movlps %xmm0, (%eax) 122; X86-SSE1-NEXT: movl %ebp, %esp 123; X86-SSE1-NEXT: popl %ebp 124; X86-SSE1-NEXT: retl 125; 126; X86-SSE2-LABEL: fadd_64r: 127; X86-SSE2: # %bb.0: 128; X86-SSE2-NEXT: pushl %ebp 129; X86-SSE2-NEXT: movl %esp, %ebp 130; X86-SSE2-NEXT: andl $-8, %esp 131; X86-SSE2-NEXT: subl $8, %esp 132; X86-SSE2-NEXT: movl 8(%ebp), %eax 133; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 134; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 135; X86-SSE2-NEXT: movsd %xmm0, (%esp) 136; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 137; X86-SSE2-NEXT: movlps %xmm0, (%eax) 138; X86-SSE2-NEXT: movl %ebp, %esp 139; X86-SSE2-NEXT: popl %ebp 140; X86-SSE2-NEXT: retl 141; 142; X86-AVX-LABEL: fadd_64r: 143; X86-AVX: # %bb.0: 144; X86-AVX-NEXT: pushl %ebp 145; X86-AVX-NEXT: movl %esp, %ebp 146; X86-AVX-NEXT: andl $-8, %esp 147; X86-AVX-NEXT: subl $8, %esp 148; X86-AVX-NEXT: movl 8(%ebp), %eax 149; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 150; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 151; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 152; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 153; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 154; X86-AVX-NEXT: movl %ebp, %esp 155; X86-AVX-NEXT: popl %ebp 156; X86-AVX-NEXT: retl 157; 158; X64-SSE-LABEL: fadd_64r: 159; X64-SSE: # %bb.0: 160; X64-SSE-NEXT: addsd (%rdi), %xmm0 161; X64-SSE-NEXT: movsd %xmm0, (%rdi) 162; X64-SSE-NEXT: retq 163; 164; X64-AVX-LABEL: fadd_64r: 165; X64-AVX: # %bb.0: 166; X64-AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 167; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 168; X64-AVX-NEXT: retq 169 %1 = load atomic i64, ptr %loc seq_cst, align 8 170 %2 = bitcast i64 %1 to double 171 %add = fadd double %2, %val 172 %3 = bitcast double %add to i64 173 store atomic i64 %3, ptr %loc release, align 8 174 ret void 175} 176 177@glob32 = dso_local global float 0.000000e+00, align 4 178@glob64 = dso_local global double 0.000000e+00, align 8 179 180; Floating-point add to a global using an immediate. 181define dso_local void @fadd_32g() nounwind { 182; X86-NOSSE-LABEL: fadd_32g: 183; X86-NOSSE: # %bb.0: 184; X86-NOSSE-NEXT: subl $8, %esp 185; X86-NOSSE-NEXT: movl glob32, %eax 186; X86-NOSSE-NEXT: movl %eax, (%esp) 187; X86-NOSSE-NEXT: fld1 188; X86-NOSSE-NEXT: fadds (%esp) 189; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 190; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 191; X86-NOSSE-NEXT: movl %eax, glob32 192; X86-NOSSE-NEXT: addl $8, %esp 193; X86-NOSSE-NEXT: retl 194; 195; X86-SSE1-LABEL: fadd_32g: 196; X86-SSE1: # %bb.0: 197; X86-SSE1-NEXT: subl $8, %esp 198; X86-SSE1-NEXT: movl glob32, %eax 199; X86-SSE1-NEXT: movl %eax, (%esp) 200; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 201; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 202; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 203; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-SSE1-NEXT: movl %eax, glob32 205; X86-SSE1-NEXT: addl $8, %esp 206; X86-SSE1-NEXT: retl 207; 208; X86-SSE2-LABEL: fadd_32g: 209; X86-SSE2: # %bb.0: 210; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 211; X86-SSE2-NEXT: addss glob32, %xmm0 212; X86-SSE2-NEXT: movss %xmm0, glob32 213; X86-SSE2-NEXT: retl 214; 215; X86-AVX-LABEL: fadd_32g: 216; X86-AVX: # %bb.0: 217; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 218; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0 219; X86-AVX-NEXT: vmovss %xmm0, glob32 220; X86-AVX-NEXT: retl 221; 222; X64-SSE-LABEL: fadd_32g: 223; X64-SSE: # %bb.0: 224; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 225; X64-SSE-NEXT: addss glob32(%rip), %xmm0 226; X64-SSE-NEXT: movss %xmm0, glob32(%rip) 227; X64-SSE-NEXT: retq 228; 229; X64-AVX-LABEL: fadd_32g: 230; X64-AVX: # %bb.0: 231; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 232; X64-AVX-NEXT: vaddss glob32(%rip), %xmm0, %xmm0 233; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip) 234; X64-AVX-NEXT: retq 235 %i = load atomic i32, ptr @glob32 monotonic, align 4 236 %f = bitcast i32 %i to float 237 %add = fadd float %f, 1.000000e+00 238 %s = bitcast float %add to i32 239 store atomic i32 %s, ptr @glob32 monotonic, align 4 240 ret void 241} 242 243define dso_local void @fadd_64g() nounwind { 244; X86-NOSSE-LABEL: fadd_64g: 245; X86-NOSSE: # %bb.0: 246; X86-NOSSE-NEXT: pushl %ebp 247; X86-NOSSE-NEXT: movl %esp, %ebp 248; X86-NOSSE-NEXT: andl $-8, %esp 249; X86-NOSSE-NEXT: subl $32, %esp 250; X86-NOSSE-NEXT: fildll glob64 251; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 252; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 253; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 254; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 255; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 256; X86-NOSSE-NEXT: fld1 257; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 258; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 259; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 260; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 261; X86-NOSSE-NEXT: movl %eax, (%esp) 262; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 263; X86-NOSSE-NEXT: fildll (%esp) 264; X86-NOSSE-NEXT: fistpll glob64 265; X86-NOSSE-NEXT: movl %ebp, %esp 266; X86-NOSSE-NEXT: popl %ebp 267; X86-NOSSE-NEXT: retl 268; 269; X86-SSE1-LABEL: fadd_64g: 270; X86-SSE1: # %bb.0: 271; X86-SSE1-NEXT: pushl %ebp 272; X86-SSE1-NEXT: movl %esp, %ebp 273; X86-SSE1-NEXT: andl $-8, %esp 274; X86-SSE1-NEXT: subl $16, %esp 275; X86-SSE1-NEXT: xorps %xmm0, %xmm0 276; X86-SSE1-NEXT: xorps %xmm1, %xmm1 277; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 278; X86-SSE1-NEXT: movss %xmm1, (%esp) 279; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 280; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 281; X86-SSE1-NEXT: fld1 282; X86-SSE1-NEXT: faddl (%esp) 283; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 284; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 285; X86-SSE1-NEXT: movlps %xmm0, glob64 286; X86-SSE1-NEXT: movl %ebp, %esp 287; X86-SSE1-NEXT: popl %ebp 288; X86-SSE1-NEXT: retl 289; 290; X86-SSE2-LABEL: fadd_64g: 291; X86-SSE2: # %bb.0: 292; X86-SSE2-NEXT: pushl %ebp 293; X86-SSE2-NEXT: movl %esp, %ebp 294; X86-SSE2-NEXT: andl $-8, %esp 295; X86-SSE2-NEXT: subl $8, %esp 296; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 297; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 298; X86-SSE2-NEXT: movsd %xmm0, (%esp) 299; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 300; X86-SSE2-NEXT: movlps %xmm0, glob64 301; X86-SSE2-NEXT: movl %ebp, %esp 302; X86-SSE2-NEXT: popl %ebp 303; X86-SSE2-NEXT: retl 304; 305; X86-AVX-LABEL: fadd_64g: 306; X86-AVX: # %bb.0: 307; X86-AVX-NEXT: pushl %ebp 308; X86-AVX-NEXT: movl %esp, %ebp 309; X86-AVX-NEXT: andl $-8, %esp 310; X86-AVX-NEXT: subl $8, %esp 311; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 312; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 313; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 314; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 315; X86-AVX-NEXT: vmovlps %xmm0, glob64 316; X86-AVX-NEXT: movl %ebp, %esp 317; X86-AVX-NEXT: popl %ebp 318; X86-AVX-NEXT: retl 319; 320; X64-SSE-LABEL: fadd_64g: 321; X64-SSE: # %bb.0: 322; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 323; X64-SSE-NEXT: addsd glob64(%rip), %xmm0 324; X64-SSE-NEXT: movsd %xmm0, glob64(%rip) 325; X64-SSE-NEXT: retq 326; 327; X64-AVX-LABEL: fadd_64g: 328; X64-AVX: # %bb.0: 329; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 330; X64-AVX-NEXT: vaddsd glob64(%rip), %xmm0, %xmm0 331; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip) 332; X64-AVX-NEXT: retq 333 %i = load atomic i64, ptr @glob64 monotonic, align 8 334 %f = bitcast i64 %i to double 335 %add = fadd double %f, 1.000000e+00 336 %s = bitcast double %add to i64 337 store atomic i64 %s, ptr @glob64 monotonic, align 8 338 ret void 339} 340 341; Floating-point add to a hard-coded immediate location using an immediate. 342define dso_local void @fadd_32imm() nounwind { 343; X86-NOSSE-LABEL: fadd_32imm: 344; X86-NOSSE: # %bb.0: 345; X86-NOSSE-NEXT: subl $8, %esp 346; X86-NOSSE-NEXT: movl -559038737, %eax 347; X86-NOSSE-NEXT: movl %eax, (%esp) 348; X86-NOSSE-NEXT: fld1 349; X86-NOSSE-NEXT: fadds (%esp) 350; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 351; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 352; X86-NOSSE-NEXT: movl %eax, -559038737 353; X86-NOSSE-NEXT: addl $8, %esp 354; X86-NOSSE-NEXT: retl 355; 356; X86-SSE1-LABEL: fadd_32imm: 357; X86-SSE1: # %bb.0: 358; X86-SSE1-NEXT: subl $8, %esp 359; X86-SSE1-NEXT: movl -559038737, %eax 360; X86-SSE1-NEXT: movl %eax, (%esp) 361; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 362; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 363; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 364; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 365; X86-SSE1-NEXT: movl %eax, -559038737 366; X86-SSE1-NEXT: addl $8, %esp 367; X86-SSE1-NEXT: retl 368; 369; X86-SSE2-LABEL: fadd_32imm: 370; X86-SSE2: # %bb.0: 371; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 372; X86-SSE2-NEXT: addss -559038737, %xmm0 373; X86-SSE2-NEXT: movss %xmm0, -559038737 374; X86-SSE2-NEXT: retl 375; 376; X86-AVX-LABEL: fadd_32imm: 377; X86-AVX: # %bb.0: 378; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 379; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0 380; X86-AVX-NEXT: vmovss %xmm0, -559038737 381; X86-AVX-NEXT: retl 382; 383; X64-SSE-LABEL: fadd_32imm: 384; X64-SSE: # %bb.0: 385; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 386; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 387; X64-SSE-NEXT: addss (%rax), %xmm0 388; X64-SSE-NEXT: movss %xmm0, (%rax) 389; X64-SSE-NEXT: retq 390; 391; X64-AVX-LABEL: fadd_32imm: 392; X64-AVX: # %bb.0: 393; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 394; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 395; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0 396; X64-AVX-NEXT: vmovss %xmm0, (%rax) 397; X64-AVX-NEXT: retq 398 %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 399 %f = bitcast i32 %i to float 400 %add = fadd float %f, 1.000000e+00 401 %s = bitcast float %add to i32 402 store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4 403 ret void 404} 405 406define dso_local void @fadd_64imm() nounwind { 407; X86-NOSSE-LABEL: fadd_64imm: 408; X86-NOSSE: # %bb.0: 409; X86-NOSSE-NEXT: pushl %ebp 410; X86-NOSSE-NEXT: movl %esp, %ebp 411; X86-NOSSE-NEXT: andl $-8, %esp 412; X86-NOSSE-NEXT: subl $32, %esp 413; X86-NOSSE-NEXT: fildll -559038737 414; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 415; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 416; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 417; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 418; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 419; X86-NOSSE-NEXT: fld1 420; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 421; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 422; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 423; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 424; X86-NOSSE-NEXT: movl %eax, (%esp) 425; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 426; X86-NOSSE-NEXT: fildll (%esp) 427; X86-NOSSE-NEXT: fistpll -559038737 428; X86-NOSSE-NEXT: movl %ebp, %esp 429; X86-NOSSE-NEXT: popl %ebp 430; X86-NOSSE-NEXT: retl 431; 432; X86-SSE1-LABEL: fadd_64imm: 433; X86-SSE1: # %bb.0: 434; X86-SSE1-NEXT: pushl %ebp 435; X86-SSE1-NEXT: movl %esp, %ebp 436; X86-SSE1-NEXT: andl $-8, %esp 437; X86-SSE1-NEXT: subl $16, %esp 438; X86-SSE1-NEXT: xorps %xmm0, %xmm0 439; X86-SSE1-NEXT: xorps %xmm1, %xmm1 440; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 441; X86-SSE1-NEXT: movss %xmm1, (%esp) 442; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 443; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 444; X86-SSE1-NEXT: fld1 445; X86-SSE1-NEXT: faddl (%esp) 446; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 447; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 448; X86-SSE1-NEXT: movlps %xmm0, -559038737 449; X86-SSE1-NEXT: movl %ebp, %esp 450; X86-SSE1-NEXT: popl %ebp 451; X86-SSE1-NEXT: retl 452; 453; X86-SSE2-LABEL: fadd_64imm: 454; X86-SSE2: # %bb.0: 455; X86-SSE2-NEXT: pushl %ebp 456; X86-SSE2-NEXT: movl %esp, %ebp 457; X86-SSE2-NEXT: andl $-8, %esp 458; X86-SSE2-NEXT: subl $8, %esp 459; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 460; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 461; X86-SSE2-NEXT: movsd %xmm0, (%esp) 462; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 463; X86-SSE2-NEXT: movlps %xmm0, -559038737 464; X86-SSE2-NEXT: movl %ebp, %esp 465; X86-SSE2-NEXT: popl %ebp 466; X86-SSE2-NEXT: retl 467; 468; X86-AVX-LABEL: fadd_64imm: 469; X86-AVX: # %bb.0: 470; X86-AVX-NEXT: pushl %ebp 471; X86-AVX-NEXT: movl %esp, %ebp 472; X86-AVX-NEXT: andl $-8, %esp 473; X86-AVX-NEXT: subl $8, %esp 474; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 475; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 476; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 477; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 478; X86-AVX-NEXT: vmovlps %xmm0, -559038737 479; X86-AVX-NEXT: movl %ebp, %esp 480; X86-AVX-NEXT: popl %ebp 481; X86-AVX-NEXT: retl 482; 483; X64-SSE-LABEL: fadd_64imm: 484; X64-SSE: # %bb.0: 485; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 486; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 487; X64-SSE-NEXT: addsd (%rax), %xmm0 488; X64-SSE-NEXT: movsd %xmm0, (%rax) 489; X64-SSE-NEXT: retq 490; 491; X64-AVX-LABEL: fadd_64imm: 492; X64-AVX: # %bb.0: 493; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 494; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 495; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0 496; X64-AVX-NEXT: vmovsd %xmm0, (%rax) 497; X64-AVX-NEXT: retq 498 %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 499 %f = bitcast i64 %i to double 500 %add = fadd double %f, 1.000000e+00 501 %s = bitcast double %add to i64 502 store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8 503 ret void 504} 505 506; Floating-point add to a stack location. 507define dso_local void @fadd_32stack() nounwind { 508; X86-NOSSE-LABEL: fadd_32stack: 509; X86-NOSSE: # %bb.0: 510; X86-NOSSE-NEXT: subl $12, %esp 511; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 512; X86-NOSSE-NEXT: movl %eax, (%esp) 513; X86-NOSSE-NEXT: fld1 514; X86-NOSSE-NEXT: fadds (%esp) 515; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 516; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 517; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 518; X86-NOSSE-NEXT: addl $12, %esp 519; X86-NOSSE-NEXT: retl 520; 521; X86-SSE1-LABEL: fadd_32stack: 522; X86-SSE1: # %bb.0: 523; X86-SSE1-NEXT: subl $12, %esp 524; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 525; X86-SSE1-NEXT: movl %eax, (%esp) 526; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 527; X86-SSE1-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 528; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 529; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 530; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) 531; X86-SSE1-NEXT: addl $12, %esp 532; X86-SSE1-NEXT: retl 533; 534; X86-SSE2-LABEL: fadd_32stack: 535; X86-SSE2: # %bb.0: 536; X86-SSE2-NEXT: pushl %eax 537; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 538; X86-SSE2-NEXT: addss (%esp), %xmm0 539; X86-SSE2-NEXT: movss %xmm0, (%esp) 540; X86-SSE2-NEXT: popl %eax 541; X86-SSE2-NEXT: retl 542; 543; X86-AVX-LABEL: fadd_32stack: 544; X86-AVX: # %bb.0: 545; X86-AVX-NEXT: pushl %eax 546; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 547; X86-AVX-NEXT: vaddss (%esp), %xmm0, %xmm0 548; X86-AVX-NEXT: vmovss %xmm0, (%esp) 549; X86-AVX-NEXT: popl %eax 550; X86-AVX-NEXT: retl 551; 552; X64-SSE-LABEL: fadd_32stack: 553; X64-SSE: # %bb.0: 554; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 555; X64-SSE-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 556; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) 557; X64-SSE-NEXT: retq 558; 559; X64-AVX-LABEL: fadd_32stack: 560; X64-AVX: # %bb.0: 561; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 562; X64-AVX-NEXT: vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 563; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 564; X64-AVX-NEXT: retq 565 %ptr = alloca i32, align 4 566 %load = load atomic i32, ptr %ptr acquire, align 4 567 %bc0 = bitcast i32 %load to float 568 %fadd = fadd float 1.000000e+00, %bc0 569 %bc1 = bitcast float %fadd to i32 570 store atomic i32 %bc1, ptr %ptr release, align 4 571 ret void 572} 573 574define dso_local void @fadd_64stack() nounwind { 575; X86-NOSSE-LABEL: fadd_64stack: 576; X86-NOSSE: # %bb.0: 577; X86-NOSSE-NEXT: pushl %ebp 578; X86-NOSSE-NEXT: movl %esp, %ebp 579; X86-NOSSE-NEXT: andl $-8, %esp 580; X86-NOSSE-NEXT: subl $40, %esp 581; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) 582; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 583; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 584; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 585; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 586; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 587; X86-NOSSE-NEXT: fld1 588; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 589; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 590; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 591; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 592; X86-NOSSE-NEXT: movl %eax, (%esp) 593; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 594; X86-NOSSE-NEXT: fildll (%esp) 595; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 596; X86-NOSSE-NEXT: movl %ebp, %esp 597; X86-NOSSE-NEXT: popl %ebp 598; X86-NOSSE-NEXT: retl 599; 600; X86-SSE1-LABEL: fadd_64stack: 601; X86-SSE1: # %bb.0: 602; X86-SSE1-NEXT: pushl %ebp 603; X86-SSE1-NEXT: movl %esp, %ebp 604; X86-SSE1-NEXT: andl $-8, %esp 605; X86-SSE1-NEXT: subl $24, %esp 606; X86-SSE1-NEXT: xorps %xmm0, %xmm0 607; X86-SSE1-NEXT: xorps %xmm1, %xmm1 608; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 609; X86-SSE1-NEXT: movss %xmm1, (%esp) 610; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 611; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 612; X86-SSE1-NEXT: fld1 613; X86-SSE1-NEXT: faddl (%esp) 614; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 615; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 616; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 617; X86-SSE1-NEXT: movl %ebp, %esp 618; X86-SSE1-NEXT: popl %ebp 619; X86-SSE1-NEXT: retl 620; 621; X86-SSE2-LABEL: fadd_64stack: 622; X86-SSE2: # %bb.0: 623; X86-SSE2-NEXT: pushl %ebp 624; X86-SSE2-NEXT: movl %esp, %ebp 625; X86-SSE2-NEXT: andl $-8, %esp 626; X86-SSE2-NEXT: subl $16, %esp 627; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 628; X86-SSE2-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 629; X86-SSE2-NEXT: movsd %xmm0, (%esp) 630; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 631; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 632; X86-SSE2-NEXT: movl %ebp, %esp 633; X86-SSE2-NEXT: popl %ebp 634; X86-SSE2-NEXT: retl 635; 636; X86-AVX-LABEL: fadd_64stack: 637; X86-AVX: # %bb.0: 638; X86-AVX-NEXT: pushl %ebp 639; X86-AVX-NEXT: movl %esp, %ebp 640; X86-AVX-NEXT: andl $-8, %esp 641; X86-AVX-NEXT: subl $16, %esp 642; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 643; X86-AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 644; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 645; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 646; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 647; X86-AVX-NEXT: movl %ebp, %esp 648; X86-AVX-NEXT: popl %ebp 649; X86-AVX-NEXT: retl 650; 651; X64-SSE-LABEL: fadd_64stack: 652; X64-SSE: # %bb.0: 653; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 654; X64-SSE-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 655; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 656; X64-SSE-NEXT: retq 657; 658; X64-AVX-LABEL: fadd_64stack: 659; X64-AVX: # %bb.0: 660; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 661; X64-AVX-NEXT: vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 662; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 663; X64-AVX-NEXT: retq 664 %ptr = alloca i64, align 8 665 %load = load atomic i64, ptr %ptr acquire, align 8 666 %bc0 = bitcast i64 %load to double 667 %fadd = fadd double 1.000000e+00, %bc0 668 %bc1 = bitcast double %fadd to i64 669 store atomic i64 %bc1, ptr %ptr release, align 8 670 ret void 671} 672 673define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind { 674; X86-NOSSE-LABEL: fadd_array: 675; X86-NOSSE: # %bb.0: # %bb 676; X86-NOSSE-NEXT: pushl %ebp 677; X86-NOSSE-NEXT: movl %esp, %ebp 678; X86-NOSSE-NEXT: pushl %esi 679; X86-NOSSE-NEXT: andl $-8, %esp 680; X86-NOSSE-NEXT: subl $40, %esp 681; X86-NOSSE-NEXT: movl 20(%ebp), %eax 682; X86-NOSSE-NEXT: movl 8(%ebp), %ecx 683; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) 684; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 685; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 686; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 687; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 688; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 689; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 690; X86-NOSSE-NEXT: faddl 12(%ebp) 691; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 692; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 693; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 694; X86-NOSSE-NEXT: movl %edx, (%esp) 695; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 696; X86-NOSSE-NEXT: fildll (%esp) 697; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) 698; X86-NOSSE-NEXT: leal -4(%ebp), %esp 699; X86-NOSSE-NEXT: popl %esi 700; X86-NOSSE-NEXT: popl %ebp 701; X86-NOSSE-NEXT: retl 702; 703; X86-SSE1-LABEL: fadd_array: 704; X86-SSE1: # %bb.0: # %bb 705; X86-SSE1-NEXT: pushl %ebp 706; X86-SSE1-NEXT: movl %esp, %ebp 707; X86-SSE1-NEXT: andl $-8, %esp 708; X86-SSE1-NEXT: subl $16, %esp 709; X86-SSE1-NEXT: movl 20(%ebp), %eax 710; X86-SSE1-NEXT: movl 8(%ebp), %ecx 711; X86-SSE1-NEXT: xorps %xmm0, %xmm0 712; X86-SSE1-NEXT: xorps %xmm1, %xmm1 713; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 714; X86-SSE1-NEXT: movss %xmm1, (%esp) 715; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 716; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 717; X86-SSE1-NEXT: fldl (%esp) 718; X86-SSE1-NEXT: faddl 12(%ebp) 719; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 720; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 721; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) 722; X86-SSE1-NEXT: movl %ebp, %esp 723; X86-SSE1-NEXT: popl %ebp 724; X86-SSE1-NEXT: retl 725; 726; X86-SSE2-LABEL: fadd_array: 727; X86-SSE2: # %bb.0: # %bb 728; X86-SSE2-NEXT: pushl %ebp 729; X86-SSE2-NEXT: movl %esp, %ebp 730; X86-SSE2-NEXT: andl $-8, %esp 731; X86-SSE2-NEXT: subl $8, %esp 732; X86-SSE2-NEXT: movl 20(%ebp), %eax 733; X86-SSE2-NEXT: movl 8(%ebp), %ecx 734; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 735; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 736; X86-SSE2-NEXT: movsd %xmm0, (%esp) 737; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 738; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) 739; X86-SSE2-NEXT: movl %ebp, %esp 740; X86-SSE2-NEXT: popl %ebp 741; X86-SSE2-NEXT: retl 742; 743; X86-AVX-LABEL: fadd_array: 744; X86-AVX: # %bb.0: # %bb 745; X86-AVX-NEXT: pushl %ebp 746; X86-AVX-NEXT: movl %esp, %ebp 747; X86-AVX-NEXT: andl $-8, %esp 748; X86-AVX-NEXT: subl $8, %esp 749; X86-AVX-NEXT: movl 20(%ebp), %eax 750; X86-AVX-NEXT: movl 8(%ebp), %ecx 751; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 752; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 753; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 754; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 755; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) 756; X86-AVX-NEXT: movl %ebp, %esp 757; X86-AVX-NEXT: popl %ebp 758; X86-AVX-NEXT: retl 759; 760; X64-SSE-LABEL: fadd_array: 761; X64-SSE: # %bb.0: # %bb 762; X64-SSE-NEXT: addsd (%rdi,%rsi,8), %xmm0 763; X64-SSE-NEXT: movsd %xmm0, (%rdi,%rsi,8) 764; X64-SSE-NEXT: retq 765; 766; X64-AVX-LABEL: fadd_array: 767; X64-AVX: # %bb.0: # %bb 768; X64-AVX-NEXT: vaddsd (%rdi,%rsi,8), %xmm0, %xmm0 769; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) 770; X64-AVX-NEXT: retq 771bb: 772 %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2 773 %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8 774 %tmp7 = bitcast i64 %tmp6 to double 775 %tmp8 = fadd double %tmp7, %arg1 776 %tmp9 = bitcast double %tmp8 to i64 777 store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8 778 ret void 779} 780