1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s 3 4define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind { 5; CHECK-LABEL: add: 6; CHECK: # %bb.0: 7; CHECK-NEXT: pushq %rbx 8; CHECK-NEXT: movq %rdx, %rbx 9; CHECK-NEXT: movzwl (%rdi), %eax 10; CHECK-NEXT: shll $16, %eax 11; CHECK-NEXT: movd %eax, %xmm1 12; CHECK-NEXT: movzwl (%rsi), %eax 13; CHECK-NEXT: shll $16, %eax 14; CHECK-NEXT: movd %eax, %xmm0 15; CHECK-NEXT: addss %xmm1, %xmm0 16; CHECK-NEXT: callq __truncsfbf2@PLT 17; CHECK-NEXT: movd %xmm0, %eax 18; CHECK-NEXT: movw %ax, (%rbx) 19; CHECK-NEXT: popq %rbx 20; CHECK-NEXT: retq 21 %a = load bfloat, ptr %pa 22 %b = load bfloat, ptr %pb 23 %add = fadd bfloat %a, %b 24 store bfloat %add, ptr %pc 25 ret void 26} 27 28define bfloat @add2(bfloat %a, bfloat %b) nounwind { 29; CHECK-LABEL: add2: 30; CHECK: # %bb.0: 31; CHECK-NEXT: pushq %rax 32; CHECK-NEXT: movd %xmm1, %eax 33; CHECK-NEXT: shll $16, %eax 34; CHECK-NEXT: movd %eax, %xmm1 35; CHECK-NEXT: movd %xmm0, %eax 36; CHECK-NEXT: shll $16, %eax 37; CHECK-NEXT: movd %eax, %xmm0 38; CHECK-NEXT: addss %xmm1, %xmm0 39; CHECK-NEXT: callq __truncsfbf2@PLT 40; CHECK-NEXT: popq %rax 41; CHECK-NEXT: retq 42 %add = fadd bfloat %a, %b 43 ret bfloat %add 44} 45 46define void @add_double(ptr %pa, ptr %pb, ptr %pc) nounwind { 47; CHECK-LABEL: add_double: 48; CHECK: # %bb.0: 49; CHECK-NEXT: pushq %r14 50; CHECK-NEXT: pushq %rbx 51; CHECK-NEXT: pushq %rax 52; CHECK-NEXT: movq %rdx, %r14 53; CHECK-NEXT: movq %rsi, %rbx 54; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 55; CHECK-NEXT: callq __truncdfbf2@PLT 56; CHECK-NEXT: movd %xmm0, %eax 57; CHECK-NEXT: shll $16, %eax 58; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 59; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 60; CHECK-NEXT: callq __truncdfbf2@PLT 61; CHECK-NEXT: movd %xmm0, %eax 62; CHECK-NEXT: shll $16, %eax 63; CHECK-NEXT: movd %eax, %xmm0 64; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 65; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 66; CHECK-NEXT: movsd %xmm0, (%r14) 67; CHECK-NEXT: addq $8, %rsp 68; CHECK-NEXT: popq %rbx 69; CHECK-NEXT: popq %r14 70; CHECK-NEXT: retq 71 %la = load double, ptr %pa 72 %a = fptrunc double %la to bfloat 73 %lb = load double, ptr %pb 74 %b = fptrunc double %lb to bfloat 75 %add = fadd bfloat %a, %b 76 %dadd = fpext bfloat %add to double 77 store double %dadd, ptr %pc 78 ret void 79} 80 81define double @add_double2(double %da, double %db) nounwind { 82; CHECK-LABEL: add_double2: 83; CHECK: # %bb.0: 84; CHECK-NEXT: subq $24, %rsp 85; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 86; CHECK-NEXT: callq __truncdfbf2@PLT 87; CHECK-NEXT: movd %xmm0, %eax 88; CHECK-NEXT: shll $16, %eax 89; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 90; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 91; CHECK-NEXT: # xmm0 = mem[0],zero 92; CHECK-NEXT: callq __truncdfbf2@PLT 93; CHECK-NEXT: movd %xmm0, %eax 94; CHECK-NEXT: shll $16, %eax 95; CHECK-NEXT: movd %eax, %xmm0 96; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 97; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 98; CHECK-NEXT: addq $24, %rsp 99; CHECK-NEXT: retq 100 %a = fptrunc double %da to bfloat 101 %b = fptrunc double %db to bfloat 102 %add = fadd bfloat %a, %b 103 %dadd = fpext bfloat %add to double 104 ret double %dadd 105} 106 107define void @add_constant(ptr %pa, ptr %pc) nounwind { 108; CHECK-LABEL: add_constant: 109; CHECK: # %bb.0: 110; CHECK-NEXT: pushq %rbx 111; CHECK-NEXT: movq %rsi, %rbx 112; CHECK-NEXT: movzwl (%rdi), %eax 113; CHECK-NEXT: shll $16, %eax 114; CHECK-NEXT: movd %eax, %xmm0 115; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 116; CHECK-NEXT: callq __truncsfbf2@PLT 117; CHECK-NEXT: movd %xmm0, %eax 118; CHECK-NEXT: movw %ax, (%rbx) 119; CHECK-NEXT: popq %rbx 120; CHECK-NEXT: retq 121 %a = load bfloat, ptr %pa 122 %add = fadd bfloat %a, 1.0 123 store bfloat %add, ptr %pc 124 ret void 125} 126 127define bfloat @add_constant2(bfloat %a) nounwind { 128; CHECK-LABEL: add_constant2: 129; CHECK: # %bb.0: 130; CHECK-NEXT: pushq %rax 131; CHECK-NEXT: movd %xmm0, %eax 132; CHECK-NEXT: shll $16, %eax 133; CHECK-NEXT: movd %eax, %xmm0 134; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 135; CHECK-NEXT: callq __truncsfbf2@PLT 136; CHECK-NEXT: popq %rax 137; CHECK-NEXT: retq 138 %add = fadd bfloat %a, 1.0 139 ret bfloat %add 140} 141 142define void @store_constant(ptr %pc) nounwind { 143; CHECK-LABEL: store_constant: 144; CHECK: # %bb.0: 145; CHECK-NEXT: movw $16256, (%rdi) # imm = 0x3F80 146; CHECK-NEXT: retq 147 store bfloat 1.0, ptr %pc 148 ret void 149} 150 151define void @fold_ext_trunc(ptr %pa, ptr %pc) nounwind { 152; CHECK-LABEL: fold_ext_trunc: 153; CHECK: # %bb.0: 154; CHECK-NEXT: movzwl (%rdi), %eax 155; CHECK-NEXT: movw %ax, (%rsi) 156; CHECK-NEXT: retq 157 %a = load bfloat, ptr %pa 158 %ext = fpext bfloat %a to float 159 %trunc = fptrunc float %ext to bfloat 160 store bfloat %trunc, ptr %pc 161 ret void 162} 163 164define bfloat @fold_ext_trunc2(bfloat %a) nounwind { 165; CHECK-LABEL: fold_ext_trunc2: 166; CHECK: # %bb.0: 167; CHECK-NEXT: retq 168 %ext = fpext bfloat %a to float 169 %trunc = fptrunc float %ext to bfloat 170 ret bfloat %trunc 171} 172 173define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind { 174; CHECK-LABEL: addv: 175; CHECK: # %bb.0: 176; CHECK-NEXT: pushq %rbp 177; CHECK-NEXT: pushq %r14 178; CHECK-NEXT: pushq %rbx 179; CHECK-NEXT: subq $32, %rsp 180; CHECK-NEXT: movq %xmm1, %rax 181; CHECK-NEXT: movq %rax, %rcx 182; CHECK-NEXT: shrq $32, %rcx 183; CHECK-NEXT: shll $16, %ecx 184; CHECK-NEXT: movd %ecx, %xmm2 185; CHECK-NEXT: movq %xmm0, %rcx 186; CHECK-NEXT: movq %rcx, %rdx 187; CHECK-NEXT: shrq $32, %rdx 188; CHECK-NEXT: shll $16, %edx 189; CHECK-NEXT: movd %edx, %xmm3 190; CHECK-NEXT: addss %xmm2, %xmm3 191; CHECK-NEXT: movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 192; CHECK-NEXT: movq %rax, %rdx 193; CHECK-NEXT: shrq $48, %rdx 194; CHECK-NEXT: shll $16, %edx 195; CHECK-NEXT: movd %edx, %xmm2 196; CHECK-NEXT: movq %rcx, %rdx 197; CHECK-NEXT: shrq $48, %rdx 198; CHECK-NEXT: shll $16, %edx 199; CHECK-NEXT: movd %edx, %xmm3 200; CHECK-NEXT: addss %xmm2, %xmm3 201; CHECK-NEXT: movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 202; CHECK-NEXT: movl %eax, %edx 203; CHECK-NEXT: shll $16, %edx 204; CHECK-NEXT: movd %edx, %xmm2 205; CHECK-NEXT: movl %ecx, %edx 206; CHECK-NEXT: shll $16, %edx 207; CHECK-NEXT: movd %edx, %xmm3 208; CHECK-NEXT: addss %xmm2, %xmm3 209; CHECK-NEXT: movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 210; CHECK-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 211; CHECK-NEXT: movd %eax, %xmm2 212; CHECK-NEXT: andl $-65536, %ecx # imm = 0xFFFF0000 213; CHECK-NEXT: movd %ecx, %xmm3 214; CHECK-NEXT: addss %xmm2, %xmm3 215; CHECK-NEXT: movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 216; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 217; CHECK-NEXT: movq %xmm1, %rax 218; CHECK-NEXT: movq %rax, %rcx 219; CHECK-NEXT: shrq $32, %rcx 220; CHECK-NEXT: shll $16, %ecx 221; CHECK-NEXT: movd %ecx, %xmm1 222; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 223; CHECK-NEXT: movq %xmm0, %rcx 224; CHECK-NEXT: movq %rcx, %rdx 225; CHECK-NEXT: shrq $32, %rdx 226; CHECK-NEXT: shll $16, %edx 227; CHECK-NEXT: movd %edx, %xmm0 228; CHECK-NEXT: addss %xmm1, %xmm0 229; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 230; CHECK-NEXT: movq %rax, %rdx 231; CHECK-NEXT: shrq $48, %rdx 232; CHECK-NEXT: shll $16, %edx 233; CHECK-NEXT: movd %edx, %xmm0 234; CHECK-NEXT: movq %rcx, %rdx 235; CHECK-NEXT: shrq $48, %rdx 236; CHECK-NEXT: shll $16, %edx 237; CHECK-NEXT: movd %edx, %xmm1 238; CHECK-NEXT: addss %xmm0, %xmm1 239; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 240; CHECK-NEXT: movl %eax, %edx 241; CHECK-NEXT: shll $16, %edx 242; CHECK-NEXT: movd %edx, %xmm0 243; CHECK-NEXT: movl %ecx, %edx 244; CHECK-NEXT: shll $16, %edx 245; CHECK-NEXT: movd %edx, %xmm1 246; CHECK-NEXT: addss %xmm0, %xmm1 247; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 248; CHECK-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 249; CHECK-NEXT: movd %eax, %xmm1 250; CHECK-NEXT: andl $-65536, %ecx # imm = 0xFFFF0000 251; CHECK-NEXT: movd %ecx, %xmm0 252; CHECK-NEXT: addss %xmm1, %xmm0 253; CHECK-NEXT: callq __truncsfbf2@PLT 254; CHECK-NEXT: movd %xmm0, %ebx 255; CHECK-NEXT: shll $16, %ebx 256; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 257; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 258; CHECK-NEXT: callq __truncsfbf2@PLT 259; CHECK-NEXT: movd %xmm0, %eax 260; CHECK-NEXT: movzwl %ax, %r14d 261; CHECK-NEXT: orl %ebx, %r14d 262; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 263; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 264; CHECK-NEXT: callq __truncsfbf2@PLT 265; CHECK-NEXT: movd %xmm0, %ebp 266; CHECK-NEXT: shll $16, %ebp 267; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 268; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 269; CHECK-NEXT: callq __truncsfbf2@PLT 270; CHECK-NEXT: movd %xmm0, %eax 271; CHECK-NEXT: movzwl %ax, %ebx 272; CHECK-NEXT: orl %ebp, %ebx 273; CHECK-NEXT: shlq $32, %rbx 274; CHECK-NEXT: orq %r14, %rbx 275; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 276; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 277; CHECK-NEXT: callq __truncsfbf2@PLT 278; CHECK-NEXT: movd %xmm0, %ebp 279; CHECK-NEXT: shll $16, %ebp 280; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 281; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 282; CHECK-NEXT: callq __truncsfbf2@PLT 283; CHECK-NEXT: movd %xmm0, %eax 284; CHECK-NEXT: movzwl %ax, %r14d 285; CHECK-NEXT: orl %ebp, %r14d 286; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 287; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 288; CHECK-NEXT: callq __truncsfbf2@PLT 289; CHECK-NEXT: movd %xmm0, %ebp 290; CHECK-NEXT: shll $16, %ebp 291; CHECK-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 292; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero 293; CHECK-NEXT: callq __truncsfbf2@PLT 294; CHECK-NEXT: movd %xmm0, %eax 295; CHECK-NEXT: movzwl %ax, %eax 296; CHECK-NEXT: orl %ebp, %eax 297; CHECK-NEXT: shlq $32, %rax 298; CHECK-NEXT: orq %r14, %rax 299; CHECK-NEXT: movq %rax, %xmm0 300; CHECK-NEXT: movq %rbx, %xmm1 301; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 302; CHECK-NEXT: addq $32, %rsp 303; CHECK-NEXT: popq %rbx 304; CHECK-NEXT: popq %r14 305; CHECK-NEXT: popq %rbp 306; CHECK-NEXT: retq 307 %add = fadd <8 x bfloat> %a, %b 308 ret <8 x bfloat> %add 309} 310