1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s 3 4; Basic test coverage for FREM 5 6define void @frem_f16(half %a0, half %a1, ptr%p3) nounwind { 7; CHECK-LABEL: frem_f16: 8; CHECK: # %bb.0: 9; CHECK-NEXT: pushq %rbx 10; CHECK-NEXT: subq $16, %rsp 11; CHECK-NEXT: movq %rdi, %rbx 12; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 13; CHECK-NEXT: movaps %xmm1, %xmm0 14; CHECK-NEXT: callq __extendhfsf2@PLT 15; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 16; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 17; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 18; CHECK-NEXT: callq __extendhfsf2@PLT 19; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 20; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 21; CHECK-NEXT: callq fmodf@PLT 22; CHECK-NEXT: callq __truncsfhf2@PLT 23; CHECK-NEXT: pextrw $0, %xmm0, %eax 24; CHECK-NEXT: movw %ax, (%rbx) 25; CHECK-NEXT: addq $16, %rsp 26; CHECK-NEXT: popq %rbx 27; CHECK-NEXT: retq 28 %frem = frem half %a0, %a1 29 store half %frem, ptr%p3 30 ret void 31} 32 33define void @frem_f32(float %a0, float %a1, ptr%p3) nounwind { 34; CHECK-LABEL: frem_f32: 35; CHECK: # %bb.0: 36; CHECK-NEXT: pushq %rbx 37; CHECK-NEXT: movq %rdi, %rbx 38; CHECK-NEXT: callq fmodf@PLT 39; CHECK-NEXT: movss %xmm0, (%rbx) 40; CHECK-NEXT: popq %rbx 41; CHECK-NEXT: retq 42 %frem = frem float %a0, %a1 43 store float %frem, ptr%p3 44 ret void 45} 46 47define void @frem_f64(double %a0, double %a1, ptr%p3) nounwind { 48; CHECK-LABEL: frem_f64: 49; CHECK: # %bb.0: 50; CHECK-NEXT: pushq %rbx 51; CHECK-NEXT: movq %rdi, %rbx 52; CHECK-NEXT: callq fmod@PLT 53; CHECK-NEXT: movsd %xmm0, (%rbx) 54; CHECK-NEXT: popq %rbx 55; CHECK-NEXT: retq 56 %frem = frem double %a0, %a1 57 store double %frem, ptr%p3 58 ret void 59} 60 61define void @frem_f80(x86_fp80 %a0, x86_fp80 %a1, ptr%p3) nounwind { 62; CHECK-LABEL: frem_f80: 63; CHECK: # %bb.0: 64; CHECK-NEXT: pushq %rbx 65; CHECK-NEXT: subq $32, %rsp 66; CHECK-NEXT: movq %rdi, %rbx 67; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 68; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 69; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 70; CHECK-NEXT: fstpt (%rsp) 71; CHECK-NEXT: callq fmodl@PLT 72; CHECK-NEXT: fstpt (%rbx) 73; CHECK-NEXT: addq $32, %rsp 74; CHECK-NEXT: popq %rbx 75; CHECK-NEXT: retq 76 %frem = frem x86_fp80 %a0, %a1 77 store x86_fp80 %frem, ptr%p3 78 ret void 79} 80 81define void @frem_f128(fp128 %a0, fp128 %a1, ptr%p3) nounwind { 82; CHECK-LABEL: frem_f128: 83; CHECK: # %bb.0: 84; CHECK-NEXT: pushq %rbx 85; CHECK-NEXT: movq %rdi, %rbx 86; CHECK-NEXT: callq fmodl@PLT 87; CHECK-NEXT: movaps %xmm0, (%rbx) 88; CHECK-NEXT: popq %rbx 89; CHECK-NEXT: retq 90 %frem = frem fp128 %a0, %a1 91 store fp128 %frem, ptr%p3 92 ret void 93} 94 95define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, ptr%p3) nounwind { 96; CHECK-LABEL: frem_v16f32: 97; CHECK: # %bb.0: 98; CHECK-NEXT: pushq %rbx 99; CHECK-NEXT: subq $160, %rsp 100; CHECK-NEXT: movq %rdi, %rbx 101; CHECK-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 102; CHECK-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 103; CHECK-NEXT: movaps %xmm5, (%rsp) # 16-byte Spill 104; CHECK-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 105; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 106; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 107; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 108; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 109; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 110; CHECK-NEXT: movaps %xmm4, %xmm1 111; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm4[3,3] 112; CHECK-NEXT: callq fmodf@PLT 113; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 114; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 115; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 116; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 117; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 118; CHECK-NEXT: callq fmodf@PLT 119; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 120; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 121; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 122; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 123; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 124; CHECK-NEXT: callq fmodf@PLT 125; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 126; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 127; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 128; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 129; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 130; CHECK-NEXT: callq fmodf@PLT 131; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 132; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 133; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 134; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 135; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 136; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 137; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 138; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 139; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] 140; CHECK-NEXT: callq fmodf@PLT 141; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 142; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 143; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 144; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 145; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 146; CHECK-NEXT: callq fmodf@PLT 147; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 148; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 149; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 150; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 151; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 152; CHECK-NEXT: callq fmodf@PLT 153; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 154; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 155; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 156; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 157; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 158; CHECK-NEXT: callq fmodf@PLT 159; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 160; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 161; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 162; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 163; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 164; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 165; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 166; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 167; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] 168; CHECK-NEXT: callq fmodf@PLT 169; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 170; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 171; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 172; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 173; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 174; CHECK-NEXT: callq fmodf@PLT 175; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload 176; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 177; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 178; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 179; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 180; CHECK-NEXT: callq fmodf@PLT 181; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 182; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 183; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 184; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 185; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 186; CHECK-NEXT: callq fmodf@PLT 187; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 188; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 189; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 190; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 191; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 192; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 193; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 194; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 195; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] 196; CHECK-NEXT: callq fmodf@PLT 197; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 198; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 199; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 200; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 201; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 202; CHECK-NEXT: callq fmodf@PLT 203; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 204; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 205; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 206; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 207; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 208; CHECK-NEXT: callq fmodf@PLT 209; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 210; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 211; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 212; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 213; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 214; CHECK-NEXT: callq fmodf@PLT 215; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 216; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 217; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 218; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 219; CHECK-NEXT: movaps %xmm1, 48(%rbx) 220; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 221; CHECK-NEXT: movaps %xmm0, 32(%rbx) 222; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 223; CHECK-NEXT: movaps %xmm0, 16(%rbx) 224; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 225; CHECK-NEXT: movaps %xmm0, (%rbx) 226; CHECK-NEXT: addq $160, %rsp 227; CHECK-NEXT: popq %rbx 228; CHECK-NEXT: retq 229 %frem = frem <16 x float> %a0, %a1 230 store <16 x float> %frem, ptr%p3 231 ret void 232} 233 234define void @frem_v8f32(<8 x float> %a0, <8 x float> %a1, ptr%p3) nounwind { 235; CHECK-LABEL: frem_v8f32: 236; CHECK: # %bb.0: 237; CHECK-NEXT: pushq %rbx 238; CHECK-NEXT: subq $96, %rsp 239; CHECK-NEXT: movq %rdi, %rbx 240; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 241; CHECK-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill 242; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 243; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 244; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 245; CHECK-NEXT: movaps %xmm2, %xmm1 246; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm2[3,3] 247; CHECK-NEXT: callq fmodf@PLT 248; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 249; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 250; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 251; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 252; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 253; CHECK-NEXT: callq fmodf@PLT 254; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 255; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 256; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 257; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 258; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 259; CHECK-NEXT: callq fmodf@PLT 260; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 261; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 262; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 263; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 264; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 265; CHECK-NEXT: callq fmodf@PLT 266; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 267; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 268; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 269; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 270; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 271; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 272; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 273; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 274; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] 275; CHECK-NEXT: callq fmodf@PLT 276; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 277; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 278; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 279; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 280; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 281; CHECK-NEXT: callq fmodf@PLT 282; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload 283; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 284; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 285; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 286; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 287; CHECK-NEXT: callq fmodf@PLT 288; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 289; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 290; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 291; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 292; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 293; CHECK-NEXT: callq fmodf@PLT 294; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 295; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 296; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload 297; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 298; CHECK-NEXT: movaps %xmm1, 16(%rbx) 299; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 300; CHECK-NEXT: movaps %xmm0, (%rbx) 301; CHECK-NEXT: addq $96, %rsp 302; CHECK-NEXT: popq %rbx 303; CHECK-NEXT: retq 304 %frem = frem <8 x float> %a0, %a1 305 store <8 x float> %frem, ptr%p3 306 ret void 307} 308 309define void @frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr%p3) nounwind { 310; CHECK-LABEL: frem_v4f32: 311; CHECK: # %bb.0: 312; CHECK-NEXT: pushq %rbx 313; CHECK-NEXT: subq $64, %rsp 314; CHECK-NEXT: movq %rdi, %rbx 315; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 316; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 317; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 318; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] 319; CHECK-NEXT: callq fmodf@PLT 320; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 321; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 322; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 323; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 324; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 325; CHECK-NEXT: callq fmodf@PLT 326; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload 327; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 328; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 329; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 330; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 331; CHECK-NEXT: callq fmodf@PLT 332; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 333; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 334; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 335; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 336; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 337; CHECK-NEXT: callq fmodf@PLT 338; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 339; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 340; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload 341; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 342; CHECK-NEXT: movaps %xmm1, (%rbx) 343; CHECK-NEXT: addq $64, %rsp 344; CHECK-NEXT: popq %rbx 345; CHECK-NEXT: retq 346 %frem = frem <4 x float> %a0, %a1 347 store <4 x float> %frem, ptr%p3 348 ret void 349} 350 351define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, ptr%p3) nounwind { 352; CHECK-LABEL: frem_v8f64: 353; CHECK: # %bb.0: 354; CHECK-NEXT: pushq %rbx 355; CHECK-NEXT: subq $144, %rsp 356; CHECK-NEXT: movq %rdi, %rbx 357; CHECK-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 358; CHECK-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 359; CHECK-NEXT: movaps %xmm5, (%rsp) # 16-byte Spill 360; CHECK-NEXT: movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 361; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 362; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 363; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 364; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 365; CHECK-NEXT: movaps %xmm4, %xmm1 366; CHECK-NEXT: callq fmod@PLT 367; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 368; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 369; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 370; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 371; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 372; CHECK-NEXT: callq fmod@PLT 373; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 374; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 375; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 376; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 377; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 378; CHECK-NEXT: callq fmod@PLT 379; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 380; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 381; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 382; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 383; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 384; CHECK-NEXT: callq fmod@PLT 385; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 386; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 387; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 388; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 389; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 390; CHECK-NEXT: callq fmod@PLT 391; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 392; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 393; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 394; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 395; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 396; CHECK-NEXT: callq fmod@PLT 397; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 398; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 399; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 400; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 401; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 402; CHECK-NEXT: callq fmod@PLT 403; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 404; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 405; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 406; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 407; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 408; CHECK-NEXT: callq fmod@PLT 409; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 410; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 411; CHECK-NEXT: movaps %xmm1, 48(%rbx) 412; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 413; CHECK-NEXT: movaps %xmm0, 32(%rbx) 414; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 415; CHECK-NEXT: movaps %xmm0, 16(%rbx) 416; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 417; CHECK-NEXT: movaps %xmm0, (%rbx) 418; CHECK-NEXT: addq $144, %rsp 419; CHECK-NEXT: popq %rbx 420; CHECK-NEXT: retq 421 %frem = frem <8 x double> %a0, %a1 422 store <8 x double> %frem, ptr%p3 423 ret void 424} 425 426define void @frem_v4f64(<4 x double> %a0, <4 x double> %a1, ptr%p3) nounwind { 427; CHECK-LABEL: frem_v4f64: 428; CHECK: # %bb.0: 429; CHECK-NEXT: pushq %rbx 430; CHECK-NEXT: subq $80, %rsp 431; CHECK-NEXT: movq %rdi, %rbx 432; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 433; CHECK-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill 434; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 435; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 436; CHECK-NEXT: movaps %xmm2, %xmm1 437; CHECK-NEXT: callq fmod@PLT 438; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 439; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 440; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 441; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 442; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 443; CHECK-NEXT: callq fmod@PLT 444; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 445; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 446; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 447; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 448; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 449; CHECK-NEXT: callq fmod@PLT 450; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 451; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 452; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 453; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 454; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 455; CHECK-NEXT: callq fmod@PLT 456; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 457; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 458; CHECK-NEXT: movaps %xmm1, 16(%rbx) 459; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 460; CHECK-NEXT: movaps %xmm0, (%rbx) 461; CHECK-NEXT: addq $80, %rsp 462; CHECK-NEXT: popq %rbx 463; CHECK-NEXT: retq 464 %frem = frem <4 x double> %a0, %a1 465 store <4 x double> %frem, ptr%p3 466 ret void 467} 468 469define void @frem_v2f64(<2 x double> %a0, <2 x double> %a1, ptr%p3) nounwind { 470; CHECK-LABEL: frem_v2f64: 471; CHECK: # %bb.0: 472; CHECK-NEXT: pushq %rbx 473; CHECK-NEXT: subq $48, %rsp 474; CHECK-NEXT: movq %rdi, %rbx 475; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 476; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 477; CHECK-NEXT: callq fmod@PLT 478; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 479; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 480; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 481; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 482; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 483; CHECK-NEXT: callq fmod@PLT 484; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 485; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 486; CHECK-NEXT: movaps %xmm1, (%rbx) 487; CHECK-NEXT: addq $48, %rsp 488; CHECK-NEXT: popq %rbx 489; CHECK-NEXT: retq 490 %frem = frem <2 x double> %a0, %a1 491 store <2 x double> %frem, ptr%p3 492 ret void 493} 494 495define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, ptr%p3) nounwind { 496; CHECK-LABEL: frem_v32f16: 497; CHECK: # %bb.0: 498; CHECK-NEXT: pushq %rbx 499; CHECK-NEXT: subq $176, %rsp 500; CHECK-NEXT: movq %rdi, %rbx 501; CHECK-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 502; CHECK-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 503; CHECK-NEXT: movaps %xmm5, (%rsp) # 16-byte Spill 504; CHECK-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 505; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 506; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 507; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 508; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 509; CHECK-NEXT: movdqa %xmm4, %xmm0 510; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 511; CHECK-NEXT: callq __extendhfsf2@PLT 512; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 513; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 514; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 515; CHECK-NEXT: callq __extendhfsf2@PLT 516; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 517; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 518; CHECK-NEXT: callq fmodf@PLT 519; CHECK-NEXT: callq __truncsfhf2@PLT 520; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 521; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 522; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 523; CHECK-NEXT: callq __extendhfsf2@PLT 524; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 525; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 526; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 527; CHECK-NEXT: callq __extendhfsf2@PLT 528; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 529; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 530; CHECK-NEXT: callq fmodf@PLT 531; CHECK-NEXT: callq __truncsfhf2@PLT 532; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 533; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 534; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 535; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 536; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 537; CHECK-NEXT: callq __extendhfsf2@PLT 538; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 539; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 540; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 541; CHECK-NEXT: callq __extendhfsf2@PLT 542; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 543; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 544; CHECK-NEXT: callq fmodf@PLT 545; CHECK-NEXT: callq __truncsfhf2@PLT 546; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 547; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 548; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 549; CHECK-NEXT: callq __extendhfsf2@PLT 550; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 551; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 552; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 553; CHECK-NEXT: callq __extendhfsf2@PLT 554; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 555; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 556; CHECK-NEXT: callq fmodf@PLT 557; CHECK-NEXT: callq __truncsfhf2@PLT 558; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 559; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 560; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 561; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 562; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 563; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 564; CHECK-NEXT: psrlq $48, %xmm0 565; CHECK-NEXT: callq __extendhfsf2@PLT 566; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 567; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 568; CHECK-NEXT: psrlq $48, %xmm0 569; CHECK-NEXT: callq __extendhfsf2@PLT 570; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 571; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 572; CHECK-NEXT: callq fmodf@PLT 573; CHECK-NEXT: callq __truncsfhf2@PLT 574; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 575; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 576; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 577; CHECK-NEXT: callq __extendhfsf2@PLT 578; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 579; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 580; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 581; CHECK-NEXT: callq __extendhfsf2@PLT 582; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 583; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 584; CHECK-NEXT: callq fmodf@PLT 585; CHECK-NEXT: callq __truncsfhf2@PLT 586; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 587; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 588; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 589; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 590; CHECK-NEXT: callq __extendhfsf2@PLT 591; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 592; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 593; CHECK-NEXT: callq __extendhfsf2@PLT 594; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 595; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 596; CHECK-NEXT: callq fmodf@PLT 597; CHECK-NEXT: callq __truncsfhf2@PLT 598; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 599; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 600; CHECK-NEXT: psrld $16, %xmm0 601; CHECK-NEXT: callq __extendhfsf2@PLT 602; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 603; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 604; CHECK-NEXT: psrld $16, %xmm0 605; CHECK-NEXT: callq __extendhfsf2@PLT 606; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 607; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 608; CHECK-NEXT: callq fmodf@PLT 609; CHECK-NEXT: callq __truncsfhf2@PLT 610; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 611; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 612; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 613; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 614; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 615; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 616; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 617; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 618; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 619; CHECK-NEXT: callq __extendhfsf2@PLT 620; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 621; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 622; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 623; CHECK-NEXT: callq __extendhfsf2@PLT 624; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 625; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 626; CHECK-NEXT: callq fmodf@PLT 627; CHECK-NEXT: callq __truncsfhf2@PLT 628; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 629; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 630; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 631; CHECK-NEXT: callq __extendhfsf2@PLT 632; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 633; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 634; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 635; CHECK-NEXT: callq __extendhfsf2@PLT 636; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 637; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 638; CHECK-NEXT: callq fmodf@PLT 639; CHECK-NEXT: callq __truncsfhf2@PLT 640; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 641; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 642; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 643; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 644; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 645; CHECK-NEXT: callq __extendhfsf2@PLT 646; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 647; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 648; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 649; CHECK-NEXT: callq __extendhfsf2@PLT 650; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 651; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 652; CHECK-NEXT: callq fmodf@PLT 653; CHECK-NEXT: callq __truncsfhf2@PLT 654; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 655; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 656; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 657; CHECK-NEXT: callq __extendhfsf2@PLT 658; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 659; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 660; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 661; CHECK-NEXT: callq __extendhfsf2@PLT 662; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 663; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 664; CHECK-NEXT: callq fmodf@PLT 665; CHECK-NEXT: callq __truncsfhf2@PLT 666; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 667; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 668; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 669; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 670; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 671; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 672; CHECK-NEXT: psrlq $48, %xmm0 673; CHECK-NEXT: callq __extendhfsf2@PLT 674; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 675; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 676; CHECK-NEXT: psrlq $48, %xmm0 677; CHECK-NEXT: callq __extendhfsf2@PLT 678; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 679; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 680; CHECK-NEXT: callq fmodf@PLT 681; CHECK-NEXT: callq __truncsfhf2@PLT 682; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 683; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 684; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 685; CHECK-NEXT: callq __extendhfsf2@PLT 686; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 687; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 688; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 689; CHECK-NEXT: callq __extendhfsf2@PLT 690; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 691; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 692; CHECK-NEXT: callq fmodf@PLT 693; CHECK-NEXT: callq __truncsfhf2@PLT 694; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 695; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 696; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 697; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 698; CHECK-NEXT: callq __extendhfsf2@PLT 699; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 700; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 701; CHECK-NEXT: callq __extendhfsf2@PLT 702; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 703; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 704; CHECK-NEXT: callq fmodf@PLT 705; CHECK-NEXT: callq __truncsfhf2@PLT 706; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 707; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 708; CHECK-NEXT: psrld $16, %xmm0 709; CHECK-NEXT: callq __extendhfsf2@PLT 710; CHECK-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 711; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 712; CHECK-NEXT: psrld $16, %xmm0 713; CHECK-NEXT: callq __extendhfsf2@PLT 714; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 715; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 716; CHECK-NEXT: callq fmodf@PLT 717; CHECK-NEXT: callq __truncsfhf2@PLT 718; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 719; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 720; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 721; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 722; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 723; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 724; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 725; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 726; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 727; CHECK-NEXT: callq __extendhfsf2@PLT 728; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 729; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 730; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 731; CHECK-NEXT: callq __extendhfsf2@PLT 732; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 733; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 734; CHECK-NEXT: callq fmodf@PLT 735; CHECK-NEXT: callq __truncsfhf2@PLT 736; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 737; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 738; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 739; CHECK-NEXT: callq __extendhfsf2@PLT 740; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 741; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 742; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 743; CHECK-NEXT: callq __extendhfsf2@PLT 744; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 745; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 746; CHECK-NEXT: callq fmodf@PLT 747; CHECK-NEXT: callq __truncsfhf2@PLT 748; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 749; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 750; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 751; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 752; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 753; CHECK-NEXT: callq __extendhfsf2@PLT 754; CHECK-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 755; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 756; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 757; CHECK-NEXT: callq __extendhfsf2@PLT 758; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 759; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 760; CHECK-NEXT: callq fmodf@PLT 761; CHECK-NEXT: callq __truncsfhf2@PLT 762; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 763; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 764; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 765; CHECK-NEXT: callq __extendhfsf2@PLT 766; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 767; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 768; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 769; CHECK-NEXT: callq __extendhfsf2@PLT 770; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 771; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 772; CHECK-NEXT: callq fmodf@PLT 773; CHECK-NEXT: callq __truncsfhf2@PLT 774; CHECK-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload 775; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 776; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 777; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 778; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 779; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 780; CHECK-NEXT: psrlq $48, %xmm0 781; CHECK-NEXT: callq __extendhfsf2@PLT 782; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 783; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 784; CHECK-NEXT: psrlq $48, %xmm0 785; CHECK-NEXT: callq __extendhfsf2@PLT 786; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 787; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 788; CHECK-NEXT: callq fmodf@PLT 789; CHECK-NEXT: callq __truncsfhf2@PLT 790; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 791; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 792; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 793; CHECK-NEXT: callq __extendhfsf2@PLT 794; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 795; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 796; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 797; CHECK-NEXT: callq __extendhfsf2@PLT 798; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 799; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 800; CHECK-NEXT: callq fmodf@PLT 801; CHECK-NEXT: callq __truncsfhf2@PLT 802; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 803; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 804; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 805; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 806; CHECK-NEXT: callq __extendhfsf2@PLT 807; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 808; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 809; CHECK-NEXT: callq __extendhfsf2@PLT 810; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 811; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 812; CHECK-NEXT: callq fmodf@PLT 813; CHECK-NEXT: callq __truncsfhf2@PLT 814; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 815; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 816; CHECK-NEXT: psrld $16, %xmm0 817; CHECK-NEXT: callq __extendhfsf2@PLT 818; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 819; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 820; CHECK-NEXT: psrld $16, %xmm0 821; CHECK-NEXT: callq __extendhfsf2@PLT 822; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 823; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 824; CHECK-NEXT: callq fmodf@PLT 825; CHECK-NEXT: callq __truncsfhf2@PLT 826; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 827; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 828; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 829; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 830; CHECK-NEXT: punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload 831; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 832; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 833; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 834; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 835; CHECK-NEXT: callq __extendhfsf2@PLT 836; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 837; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 838; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 839; CHECK-NEXT: callq __extendhfsf2@PLT 840; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 841; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 842; CHECK-NEXT: callq fmodf@PLT 843; CHECK-NEXT: callq __truncsfhf2@PLT 844; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 845; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 846; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 847; CHECK-NEXT: callq __extendhfsf2@PLT 848; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 849; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 850; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 851; CHECK-NEXT: callq __extendhfsf2@PLT 852; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 853; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 854; CHECK-NEXT: callq fmodf@PLT 855; CHECK-NEXT: callq __truncsfhf2@PLT 856; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 857; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 858; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 859; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 860; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 861; CHECK-NEXT: callq __extendhfsf2@PLT 862; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 863; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 864; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 865; CHECK-NEXT: callq __extendhfsf2@PLT 866; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 867; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 868; CHECK-NEXT: callq fmodf@PLT 869; CHECK-NEXT: callq __truncsfhf2@PLT 870; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 871; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 872; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 873; CHECK-NEXT: callq __extendhfsf2@PLT 874; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 875; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 876; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 877; CHECK-NEXT: callq __extendhfsf2@PLT 878; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 879; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 880; CHECK-NEXT: callq fmodf@PLT 881; CHECK-NEXT: callq __truncsfhf2@PLT 882; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 883; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 884; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 885; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 886; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 887; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 888; CHECK-NEXT: psrlq $48, %xmm0 889; CHECK-NEXT: callq __extendhfsf2@PLT 890; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 891; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 892; CHECK-NEXT: psrlq $48, %xmm0 893; CHECK-NEXT: callq __extendhfsf2@PLT 894; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 895; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 896; CHECK-NEXT: callq fmodf@PLT 897; CHECK-NEXT: callq __truncsfhf2@PLT 898; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 899; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 900; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 901; CHECK-NEXT: callq __extendhfsf2@PLT 902; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 903; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 904; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 905; CHECK-NEXT: callq __extendhfsf2@PLT 906; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 907; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 908; CHECK-NEXT: callq fmodf@PLT 909; CHECK-NEXT: callq __truncsfhf2@PLT 910; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 911; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 912; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 913; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 914; CHECK-NEXT: callq __extendhfsf2@PLT 915; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 916; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 917; CHECK-NEXT: callq __extendhfsf2@PLT 918; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 919; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 920; CHECK-NEXT: callq fmodf@PLT 921; CHECK-NEXT: callq __truncsfhf2@PLT 922; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 923; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 924; CHECK-NEXT: psrld $16, %xmm0 925; CHECK-NEXT: callq __extendhfsf2@PLT 926; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 927; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 928; CHECK-NEXT: psrld $16, %xmm0 929; CHECK-NEXT: callq __extendhfsf2@PLT 930; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 931; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 932; CHECK-NEXT: callq fmodf@PLT 933; CHECK-NEXT: callq __truncsfhf2@PLT 934; CHECK-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload 935; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 936; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 937; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 938; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 939; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 940; CHECK-NEXT: movdqa %xmm1, 48(%rbx) 941; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 942; CHECK-NEXT: movaps %xmm0, 32(%rbx) 943; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 944; CHECK-NEXT: movaps %xmm0, 16(%rbx) 945; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 946; CHECK-NEXT: movaps %xmm0, (%rbx) 947; CHECK-NEXT: addq $176, %rsp 948; CHECK-NEXT: popq %rbx 949; CHECK-NEXT: retq 950 %frem = frem <32 x half> %a0, %a1 951 store <32 x half> %frem, ptr%p3 952 ret void 953} 954 955define void @frem_v16f16(<16 x half> %a0, <16 x half> %a1, ptr%p3) nounwind { 956; CHECK-LABEL: frem_v16f16: 957; CHECK: # %bb.0: 958; CHECK-NEXT: pushq %rbx 959; CHECK-NEXT: subq $112, %rsp 960; CHECK-NEXT: movq %rdi, %rbx 961; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 962; CHECK-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill 963; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 964; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 965; CHECK-NEXT: movdqa %xmm2, %xmm0 966; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 967; CHECK-NEXT: callq __extendhfsf2@PLT 968; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 969; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 970; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 971; CHECK-NEXT: callq __extendhfsf2@PLT 972; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 973; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 974; CHECK-NEXT: callq fmodf@PLT 975; CHECK-NEXT: callq __truncsfhf2@PLT 976; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 977; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 978; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 979; CHECK-NEXT: callq __extendhfsf2@PLT 980; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 981; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 982; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 983; CHECK-NEXT: callq __extendhfsf2@PLT 984; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 985; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 986; CHECK-NEXT: callq fmodf@PLT 987; CHECK-NEXT: callq __truncsfhf2@PLT 988; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 989; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 990; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 991; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 992; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 993; CHECK-NEXT: callq __extendhfsf2@PLT 994; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 995; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 996; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 997; CHECK-NEXT: callq __extendhfsf2@PLT 998; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 999; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1000; CHECK-NEXT: callq fmodf@PLT 1001; CHECK-NEXT: callq __truncsfhf2@PLT 1002; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1003; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1004; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1005; CHECK-NEXT: callq __extendhfsf2@PLT 1006; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1007; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1008; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 1009; CHECK-NEXT: callq __extendhfsf2@PLT 1010; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1011; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1012; CHECK-NEXT: callq fmodf@PLT 1013; CHECK-NEXT: callq __truncsfhf2@PLT 1014; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1015; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1016; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1017; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1018; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1019; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 1020; CHECK-NEXT: psrlq $48, %xmm0 1021; CHECK-NEXT: callq __extendhfsf2@PLT 1022; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1023; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1024; CHECK-NEXT: psrlq $48, %xmm0 1025; CHECK-NEXT: callq __extendhfsf2@PLT 1026; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1027; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1028; CHECK-NEXT: callq fmodf@PLT 1029; CHECK-NEXT: callq __truncsfhf2@PLT 1030; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1031; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1032; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1033; CHECK-NEXT: callq __extendhfsf2@PLT 1034; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1035; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1036; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1037; CHECK-NEXT: callq __extendhfsf2@PLT 1038; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1039; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1040; CHECK-NEXT: callq fmodf@PLT 1041; CHECK-NEXT: callq __truncsfhf2@PLT 1042; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1043; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1044; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1045; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1046; CHECK-NEXT: callq __extendhfsf2@PLT 1047; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1048; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1049; CHECK-NEXT: callq __extendhfsf2@PLT 1050; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1051; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1052; CHECK-NEXT: callq fmodf@PLT 1053; CHECK-NEXT: callq __truncsfhf2@PLT 1054; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1055; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 1056; CHECK-NEXT: psrld $16, %xmm0 1057; CHECK-NEXT: callq __extendhfsf2@PLT 1058; CHECK-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 1059; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1060; CHECK-NEXT: psrld $16, %xmm0 1061; CHECK-NEXT: callq __extendhfsf2@PLT 1062; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1063; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1064; CHECK-NEXT: callq fmodf@PLT 1065; CHECK-NEXT: callq __truncsfhf2@PLT 1066; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1067; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1068; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1069; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 1070; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1071; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1072; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1073; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1074; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1075; CHECK-NEXT: callq __extendhfsf2@PLT 1076; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1077; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1078; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1079; CHECK-NEXT: callq __extendhfsf2@PLT 1080; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1081; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1082; CHECK-NEXT: callq fmodf@PLT 1083; CHECK-NEXT: callq __truncsfhf2@PLT 1084; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1085; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1086; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1087; CHECK-NEXT: callq __extendhfsf2@PLT 1088; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1089; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1090; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1091; CHECK-NEXT: callq __extendhfsf2@PLT 1092; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1093; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1094; CHECK-NEXT: callq fmodf@PLT 1095; CHECK-NEXT: callq __truncsfhf2@PLT 1096; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1097; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1098; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1099; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1100; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1101; CHECK-NEXT: callq __extendhfsf2@PLT 1102; CHECK-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 1103; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1104; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1105; CHECK-NEXT: callq __extendhfsf2@PLT 1106; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1107; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1108; CHECK-NEXT: callq fmodf@PLT 1109; CHECK-NEXT: callq __truncsfhf2@PLT 1110; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 1111; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1112; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1113; CHECK-NEXT: callq __extendhfsf2@PLT 1114; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1115; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1116; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 1117; CHECK-NEXT: callq __extendhfsf2@PLT 1118; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1119; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1120; CHECK-NEXT: callq fmodf@PLT 1121; CHECK-NEXT: callq __truncsfhf2@PLT 1122; CHECK-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload 1123; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1124; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1125; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1126; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1127; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1128; CHECK-NEXT: psrlq $48, %xmm0 1129; CHECK-NEXT: callq __extendhfsf2@PLT 1130; CHECK-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 1131; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1132; CHECK-NEXT: psrlq $48, %xmm0 1133; CHECK-NEXT: callq __extendhfsf2@PLT 1134; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1135; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1136; CHECK-NEXT: callq fmodf@PLT 1137; CHECK-NEXT: callq __truncsfhf2@PLT 1138; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 1139; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1140; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1141; CHECK-NEXT: callq __extendhfsf2@PLT 1142; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1143; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1144; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1145; CHECK-NEXT: callq __extendhfsf2@PLT 1146; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1147; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1148; CHECK-NEXT: callq fmodf@PLT 1149; CHECK-NEXT: callq __truncsfhf2@PLT 1150; CHECK-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload 1151; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1152; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 1153; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1154; CHECK-NEXT: callq __extendhfsf2@PLT 1155; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1156; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1157; CHECK-NEXT: callq __extendhfsf2@PLT 1158; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1159; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1160; CHECK-NEXT: callq fmodf@PLT 1161; CHECK-NEXT: callq __truncsfhf2@PLT 1162; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1163; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1164; CHECK-NEXT: psrld $16, %xmm0 1165; CHECK-NEXT: callq __extendhfsf2@PLT 1166; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1167; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1168; CHECK-NEXT: psrld $16, %xmm0 1169; CHECK-NEXT: callq __extendhfsf2@PLT 1170; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1171; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1172; CHECK-NEXT: callq fmodf@PLT 1173; CHECK-NEXT: callq __truncsfhf2@PLT 1174; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1175; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1176; CHECK-NEXT: punpckldq (%rsp), %xmm1 # 16-byte Folded Reload 1177; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 1178; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1179; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1180; CHECK-NEXT: movdqa %xmm1, 16(%rbx) 1181; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1182; CHECK-NEXT: movaps %xmm0, (%rbx) 1183; CHECK-NEXT: addq $112, %rsp 1184; CHECK-NEXT: popq %rbx 1185; CHECK-NEXT: retq 1186 %frem = frem <16 x half> %a0, %a1 1187 store <16 x half> %frem, ptr%p3 1188 ret void 1189} 1190 1191define void @frem_v8f16(<8 x half> %a0, <8 x half> %a1, ptr%p3) nounwind { 1192; CHECK-LABEL: frem_v8f16: 1193; CHECK: # %bb.0: 1194; CHECK-NEXT: pushq %rbx 1195; CHECK-NEXT: subq $80, %rsp 1196; CHECK-NEXT: movq %rdi, %rbx 1197; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1198; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1199; CHECK-NEXT: movdqa %xmm1, %xmm0 1200; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1201; CHECK-NEXT: callq __extendhfsf2@PLT 1202; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1203; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1204; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1205; CHECK-NEXT: callq __extendhfsf2@PLT 1206; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1207; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1208; CHECK-NEXT: callq fmodf@PLT 1209; CHECK-NEXT: callq __truncsfhf2@PLT 1210; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1211; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1212; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1213; CHECK-NEXT: callq __extendhfsf2@PLT 1214; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill 1215; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1216; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1217; CHECK-NEXT: callq __extendhfsf2@PLT 1218; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1219; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1220; CHECK-NEXT: callq fmodf@PLT 1221; CHECK-NEXT: callq __truncsfhf2@PLT 1222; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1223; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1224; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1225; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1226; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1227; CHECK-NEXT: callq __extendhfsf2@PLT 1228; CHECK-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 1229; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1230; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1231; CHECK-NEXT: callq __extendhfsf2@PLT 1232; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1233; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1234; CHECK-NEXT: callq fmodf@PLT 1235; CHECK-NEXT: callq __truncsfhf2@PLT 1236; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 1237; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1238; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1239; CHECK-NEXT: callq __extendhfsf2@PLT 1240; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1241; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1242; CHECK-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] 1243; CHECK-NEXT: callq __extendhfsf2@PLT 1244; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1245; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1246; CHECK-NEXT: callq fmodf@PLT 1247; CHECK-NEXT: callq __truncsfhf2@PLT 1248; CHECK-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload 1249; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1250; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1251; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 1252; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1253; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1254; CHECK-NEXT: psrlq $48, %xmm0 1255; CHECK-NEXT: callq __extendhfsf2@PLT 1256; CHECK-NEXT: movd %xmm0, (%rsp) # 4-byte Folded Spill 1257; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1258; CHECK-NEXT: psrlq $48, %xmm0 1259; CHECK-NEXT: callq __extendhfsf2@PLT 1260; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload 1261; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1262; CHECK-NEXT: callq fmodf@PLT 1263; CHECK-NEXT: callq __truncsfhf2@PLT 1264; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 1265; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1266; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1267; CHECK-NEXT: callq __extendhfsf2@PLT 1268; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1269; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1270; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1271; CHECK-NEXT: callq __extendhfsf2@PLT 1272; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1273; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1274; CHECK-NEXT: callq fmodf@PLT 1275; CHECK-NEXT: callq __truncsfhf2@PLT 1276; CHECK-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload 1277; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] 1278; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 1279; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1280; CHECK-NEXT: callq __extendhfsf2@PLT 1281; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1282; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1283; CHECK-NEXT: callq __extendhfsf2@PLT 1284; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1285; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1286; CHECK-NEXT: callq fmodf@PLT 1287; CHECK-NEXT: callq __truncsfhf2@PLT 1288; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1289; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1290; CHECK-NEXT: psrld $16, %xmm0 1291; CHECK-NEXT: callq __extendhfsf2@PLT 1292; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 1293; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 1294; CHECK-NEXT: psrld $16, %xmm0 1295; CHECK-NEXT: callq __extendhfsf2@PLT 1296; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 1297; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero 1298; CHECK-NEXT: callq fmodf@PLT 1299; CHECK-NEXT: callq __truncsfhf2@PLT 1300; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1301; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1302; CHECK-NEXT: punpckldq (%rsp), %xmm1 # 16-byte Folded Reload 1303; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 1304; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1305; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1306; CHECK-NEXT: movdqa %xmm1, (%rbx) 1307; CHECK-NEXT: addq $80, %rsp 1308; CHECK-NEXT: popq %rbx 1309; CHECK-NEXT: retq 1310 %frem = frem <8 x half> %a0, %a1 1311 store <8 x half> %frem, ptr%p3 1312 ret void 1313} 1314 1315define void @frem_v4f80(<4 x x86_fp80> %a0, <4 x x86_fp80> %a1, ptr%p3) nounwind { 1316; CHECK-LABEL: frem_v4f80: 1317; CHECK: # %bb.0: 1318; CHECK-NEXT: pushq %rbx 1319; CHECK-NEXT: subq $128, %rsp 1320; CHECK-NEXT: movq %rdi, %rbx 1321; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1322; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1323; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1324; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1325; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1326; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1327; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1328; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1329; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1330; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1331; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1332; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1333; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1334; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 1335; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1336; CHECK-NEXT: fstpt (%rsp) 1337; CHECK-NEXT: callq fmodl@PLT 1338; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1339; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1340; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1341; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1342; CHECK-NEXT: fstpt (%rsp) 1343; CHECK-NEXT: callq fmodl@PLT 1344; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1345; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1346; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1347; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1348; CHECK-NEXT: fstpt (%rsp) 1349; CHECK-NEXT: callq fmodl@PLT 1350; CHECK-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill 1351; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1352; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) 1353; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1354; CHECK-NEXT: fstpt (%rsp) 1355; CHECK-NEXT: callq fmodl@PLT 1356; CHECK-NEXT: fstpt 30(%rbx) 1357; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1358; CHECK-NEXT: fstpt 20(%rbx) 1359; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1360; CHECK-NEXT: fstpt 10(%rbx) 1361; CHECK-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload 1362; CHECK-NEXT: fstpt (%rbx) 1363; CHECK-NEXT: addq $128, %rsp 1364; CHECK-NEXT: popq %rbx 1365; CHECK-NEXT: retq 1366 %frem = frem <4 x x86_fp80> %a0, %a1 1367 store <4 x x86_fp80> %frem, ptr%p3 1368 ret void 1369} 1370