1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
3
4define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind {
5; CHECK-LABEL: add:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    pushq %rbx
8; CHECK-NEXT:    movq %rdx, %rbx
9; CHECK-NEXT:    movzwl (%rdi), %eax
10; CHECK-NEXT:    shll $16, %eax
11; CHECK-NEXT:    movd %eax, %xmm1
12; CHECK-NEXT:    movzwl (%rsi), %eax
13; CHECK-NEXT:    shll $16, %eax
14; CHECK-NEXT:    movd %eax, %xmm0
15; CHECK-NEXT:    addss %xmm1, %xmm0
16; CHECK-NEXT:    callq __truncsfbf2@PLT
17; CHECK-NEXT:    movd %xmm0, %eax
18; CHECK-NEXT:    movw %ax, (%rbx)
19; CHECK-NEXT:    popq %rbx
20; CHECK-NEXT:    retq
21  %a = load bfloat, ptr %pa
22  %b = load bfloat, ptr %pb
23  %add = fadd bfloat %a, %b
24  store bfloat %add, ptr %pc
25  ret void
26}
27
28define bfloat @add2(bfloat %a, bfloat %b) nounwind {
29; CHECK-LABEL: add2:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    pushq %rax
32; CHECK-NEXT:    movd %xmm1, %eax
33; CHECK-NEXT:    shll $16, %eax
34; CHECK-NEXT:    movd %eax, %xmm1
35; CHECK-NEXT:    movd %xmm0, %eax
36; CHECK-NEXT:    shll $16, %eax
37; CHECK-NEXT:    movd %eax, %xmm0
38; CHECK-NEXT:    addss %xmm1, %xmm0
39; CHECK-NEXT:    callq __truncsfbf2@PLT
40; CHECK-NEXT:    popq %rax
41; CHECK-NEXT:    retq
42  %add = fadd bfloat %a, %b
43  ret bfloat %add
44}
45
46define void @add_double(ptr %pa, ptr %pb, ptr %pc) nounwind {
47; CHECK-LABEL: add_double:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    pushq %r14
50; CHECK-NEXT:    pushq %rbx
51; CHECK-NEXT:    pushq %rax
52; CHECK-NEXT:    movq %rdx, %r14
53; CHECK-NEXT:    movq %rsi, %rbx
54; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
55; CHECK-NEXT:    callq __truncdfbf2@PLT
56; CHECK-NEXT:    movd %xmm0, %eax
57; CHECK-NEXT:    shll $16, %eax
58; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
59; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
60; CHECK-NEXT:    callq __truncdfbf2@PLT
61; CHECK-NEXT:    movd %xmm0, %eax
62; CHECK-NEXT:    shll $16, %eax
63; CHECK-NEXT:    movd %eax, %xmm0
64; CHECK-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
65; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
66; CHECK-NEXT:    movsd %xmm0, (%r14)
67; CHECK-NEXT:    addq $8, %rsp
68; CHECK-NEXT:    popq %rbx
69; CHECK-NEXT:    popq %r14
70; CHECK-NEXT:    retq
71  %la = load double, ptr %pa
72  %a = fptrunc double %la to bfloat
73  %lb = load double, ptr %pb
74  %b = fptrunc double %lb to bfloat
75  %add = fadd bfloat %a, %b
76  %dadd = fpext bfloat %add to double
77  store double %dadd, ptr %pc
78  ret void
79}
80
81define double @add_double2(double %da, double %db) nounwind {
82; CHECK-LABEL: add_double2:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    subq $24, %rsp
85; CHECK-NEXT:    movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
86; CHECK-NEXT:    callq __truncdfbf2@PLT
87; CHECK-NEXT:    movd %xmm0, %eax
88; CHECK-NEXT:    shll $16, %eax
89; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
90; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
91; CHECK-NEXT:    # xmm0 = mem[0],zero
92; CHECK-NEXT:    callq __truncdfbf2@PLT
93; CHECK-NEXT:    movd %xmm0, %eax
94; CHECK-NEXT:    shll $16, %eax
95; CHECK-NEXT:    movd %eax, %xmm0
96; CHECK-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
97; CHECK-NEXT:    cvtss2sd %xmm0, %xmm0
98; CHECK-NEXT:    addq $24, %rsp
99; CHECK-NEXT:    retq
100  %a = fptrunc double %da to bfloat
101  %b = fptrunc double %db to bfloat
102  %add = fadd bfloat %a, %b
103  %dadd = fpext bfloat %add to double
104  ret double %dadd
105}
106
107define void @add_constant(ptr %pa, ptr %pc) nounwind {
108; CHECK-LABEL: add_constant:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    pushq %rbx
111; CHECK-NEXT:    movq %rsi, %rbx
112; CHECK-NEXT:    movzwl (%rdi), %eax
113; CHECK-NEXT:    shll $16, %eax
114; CHECK-NEXT:    movd %eax, %xmm0
115; CHECK-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
116; CHECK-NEXT:    callq __truncsfbf2@PLT
117; CHECK-NEXT:    movd %xmm0, %eax
118; CHECK-NEXT:    movw %ax, (%rbx)
119; CHECK-NEXT:    popq %rbx
120; CHECK-NEXT:    retq
121  %a = load bfloat, ptr %pa
122  %add = fadd bfloat %a, 1.0
123  store bfloat %add, ptr %pc
124  ret void
125}
126
127define bfloat @add_constant2(bfloat %a) nounwind {
128; CHECK-LABEL: add_constant2:
129; CHECK:       # %bb.0:
130; CHECK-NEXT:    pushq %rax
131; CHECK-NEXT:    movd %xmm0, %eax
132; CHECK-NEXT:    shll $16, %eax
133; CHECK-NEXT:    movd %eax, %xmm0
134; CHECK-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
135; CHECK-NEXT:    callq __truncsfbf2@PLT
136; CHECK-NEXT:    popq %rax
137; CHECK-NEXT:    retq
138  %add = fadd bfloat %a, 1.0
139  ret bfloat %add
140}
141
142define void @store_constant(ptr %pc) nounwind {
143; CHECK-LABEL: store_constant:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    movw $16256, (%rdi) # imm = 0x3F80
146; CHECK-NEXT:    retq
147  store bfloat 1.0, ptr %pc
148  ret void
149}
150
151define void @fold_ext_trunc(ptr %pa, ptr %pc) nounwind {
152; CHECK-LABEL: fold_ext_trunc:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    movzwl (%rdi), %eax
155; CHECK-NEXT:    movw %ax, (%rsi)
156; CHECK-NEXT:    retq
157  %a = load bfloat, ptr %pa
158  %ext = fpext bfloat %a to float
159  %trunc = fptrunc float %ext to bfloat
160  store bfloat %trunc, ptr %pc
161  ret void
162}
163
164define bfloat @fold_ext_trunc2(bfloat %a) nounwind {
165; CHECK-LABEL: fold_ext_trunc2:
166; CHECK:       # %bb.0:
167; CHECK-NEXT:    retq
168  %ext = fpext bfloat %a to float
169  %trunc = fptrunc float %ext to bfloat
170  ret bfloat %trunc
171}
172
173define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
174; CHECK-LABEL: addv:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    pushq %rbp
177; CHECK-NEXT:    pushq %r14
178; CHECK-NEXT:    pushq %rbx
179; CHECK-NEXT:    subq $32, %rsp
180; CHECK-NEXT:    movq %xmm1, %rax
181; CHECK-NEXT:    movq %rax, %rcx
182; CHECK-NEXT:    shrq $32, %rcx
183; CHECK-NEXT:    shll $16, %ecx
184; CHECK-NEXT:    movd %ecx, %xmm2
185; CHECK-NEXT:    movq %xmm0, %rcx
186; CHECK-NEXT:    movq %rcx, %rdx
187; CHECK-NEXT:    shrq $32, %rdx
188; CHECK-NEXT:    shll $16, %edx
189; CHECK-NEXT:    movd %edx, %xmm3
190; CHECK-NEXT:    addss %xmm2, %xmm3
191; CHECK-NEXT:    movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
192; CHECK-NEXT:    movq %rax, %rdx
193; CHECK-NEXT:    shrq $48, %rdx
194; CHECK-NEXT:    shll $16, %edx
195; CHECK-NEXT:    movd %edx, %xmm2
196; CHECK-NEXT:    movq %rcx, %rdx
197; CHECK-NEXT:    shrq $48, %rdx
198; CHECK-NEXT:    shll $16, %edx
199; CHECK-NEXT:    movd %edx, %xmm3
200; CHECK-NEXT:    addss %xmm2, %xmm3
201; CHECK-NEXT:    movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
202; CHECK-NEXT:    movl %eax, %edx
203; CHECK-NEXT:    shll $16, %edx
204; CHECK-NEXT:    movd %edx, %xmm2
205; CHECK-NEXT:    movl %ecx, %edx
206; CHECK-NEXT:    shll $16, %edx
207; CHECK-NEXT:    movd %edx, %xmm3
208; CHECK-NEXT:    addss %xmm2, %xmm3
209; CHECK-NEXT:    movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
210; CHECK-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
211; CHECK-NEXT:    movd %eax, %xmm2
212; CHECK-NEXT:    andl $-65536, %ecx # imm = 0xFFFF0000
213; CHECK-NEXT:    movd %ecx, %xmm3
214; CHECK-NEXT:    addss %xmm2, %xmm3
215; CHECK-NEXT:    movss %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
216; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
217; CHECK-NEXT:    movq %xmm1, %rax
218; CHECK-NEXT:    movq %rax, %rcx
219; CHECK-NEXT:    shrq $32, %rcx
220; CHECK-NEXT:    shll $16, %ecx
221; CHECK-NEXT:    movd %ecx, %xmm1
222; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
223; CHECK-NEXT:    movq %xmm0, %rcx
224; CHECK-NEXT:    movq %rcx, %rdx
225; CHECK-NEXT:    shrq $32, %rdx
226; CHECK-NEXT:    shll $16, %edx
227; CHECK-NEXT:    movd %edx, %xmm0
228; CHECK-NEXT:    addss %xmm1, %xmm0
229; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
230; CHECK-NEXT:    movq %rax, %rdx
231; CHECK-NEXT:    shrq $48, %rdx
232; CHECK-NEXT:    shll $16, %edx
233; CHECK-NEXT:    movd %edx, %xmm0
234; CHECK-NEXT:    movq %rcx, %rdx
235; CHECK-NEXT:    shrq $48, %rdx
236; CHECK-NEXT:    shll $16, %edx
237; CHECK-NEXT:    movd %edx, %xmm1
238; CHECK-NEXT:    addss %xmm0, %xmm1
239; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
240; CHECK-NEXT:    movl %eax, %edx
241; CHECK-NEXT:    shll $16, %edx
242; CHECK-NEXT:    movd %edx, %xmm0
243; CHECK-NEXT:    movl %ecx, %edx
244; CHECK-NEXT:    shll $16, %edx
245; CHECK-NEXT:    movd %edx, %xmm1
246; CHECK-NEXT:    addss %xmm0, %xmm1
247; CHECK-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
248; CHECK-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
249; CHECK-NEXT:    movd %eax, %xmm1
250; CHECK-NEXT:    andl $-65536, %ecx # imm = 0xFFFF0000
251; CHECK-NEXT:    movd %ecx, %xmm0
252; CHECK-NEXT:    addss %xmm1, %xmm0
253; CHECK-NEXT:    callq __truncsfbf2@PLT
254; CHECK-NEXT:    movd %xmm0, %ebx
255; CHECK-NEXT:    shll $16, %ebx
256; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
257; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
258; CHECK-NEXT:    callq __truncsfbf2@PLT
259; CHECK-NEXT:    movd %xmm0, %eax
260; CHECK-NEXT:    movzwl %ax, %r14d
261; CHECK-NEXT:    orl %ebx, %r14d
262; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
263; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
264; CHECK-NEXT:    callq __truncsfbf2@PLT
265; CHECK-NEXT:    movd %xmm0, %ebp
266; CHECK-NEXT:    shll $16, %ebp
267; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
268; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
269; CHECK-NEXT:    callq __truncsfbf2@PLT
270; CHECK-NEXT:    movd %xmm0, %eax
271; CHECK-NEXT:    movzwl %ax, %ebx
272; CHECK-NEXT:    orl %ebp, %ebx
273; CHECK-NEXT:    shlq $32, %rbx
274; CHECK-NEXT:    orq %r14, %rbx
275; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
276; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
277; CHECK-NEXT:    callq __truncsfbf2@PLT
278; CHECK-NEXT:    movd %xmm0, %ebp
279; CHECK-NEXT:    shll $16, %ebp
280; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
281; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
282; CHECK-NEXT:    callq __truncsfbf2@PLT
283; CHECK-NEXT:    movd %xmm0, %eax
284; CHECK-NEXT:    movzwl %ax, %r14d
285; CHECK-NEXT:    orl %ebp, %r14d
286; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
287; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
288; CHECK-NEXT:    callq __truncsfbf2@PLT
289; CHECK-NEXT:    movd %xmm0, %ebp
290; CHECK-NEXT:    shll $16, %ebp
291; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
292; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
293; CHECK-NEXT:    callq __truncsfbf2@PLT
294; CHECK-NEXT:    movd %xmm0, %eax
295; CHECK-NEXT:    movzwl %ax, %eax
296; CHECK-NEXT:    orl %ebp, %eax
297; CHECK-NEXT:    shlq $32, %rax
298; CHECK-NEXT:    orq %r14, %rax
299; CHECK-NEXT:    movq %rax, %xmm0
300; CHECK-NEXT:    movq %rbx, %xmm1
301; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
302; CHECK-NEXT:    addq $32, %rsp
303; CHECK-NEXT:    popq %rbx
304; CHECK-NEXT:    popq %r14
305; CHECK-NEXT:    popq %rbp
306; CHECK-NEXT:    retq
307  %add = fadd <8 x bfloat> %a, %b
308  ret <8 x bfloat> %add
309}
310