1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
2
3; --------------------------------------------------------------------------------
4; fadd tests
5; --------------------------------------------------------------------------------
6
7; GCN-LABEL: {{^}}v_fneg_add_f32:
8; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
9; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
10; GCN: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
11; GCN-NEXT: buffer_store_dword [[RESULT]]
12define void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
13  %tid = call i32 @llvm.amdgcn.workitem.id.x()
14  %tid.ext = sext i32 %tid to i64
15  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
16  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
17  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
18  %a = load volatile float, float addrspace(1)* %a.gep
19  %b = load volatile float, float addrspace(1)* %b.gep
20  %add = fadd float %a, %b
21  %fneg = fsub float -0.000000e+00, %add
22  store float %fneg, float addrspace(1)* %out.gep
23  ret void
24}
25
26; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
27; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
28; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
29; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
30; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
31; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
32; GCN-NEXT: buffer_store_dword [[ADD]]
33define void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
34  %tid = call i32 @llvm.amdgcn.workitem.id.x()
35  %tid.ext = sext i32 %tid to i64
36  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
37  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
38  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
39  %a = load volatile float, float addrspace(1)* %a.gep
40  %b = load volatile float, float addrspace(1)* %b.gep
41  %add = fadd float %a, %b
42  %fneg = fsub float -0.000000e+00, %add
43  store volatile float %fneg, float addrspace(1)* %out
44  store volatile float %add, float addrspace(1)* %out
45  ret void
46}
47
48; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
49; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
50; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
51; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
52; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
53; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
54; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
55; GCN-NEXT: buffer_store_dword [[MUL]]
56define void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
57  %tid = call i32 @llvm.amdgcn.workitem.id.x()
58  %tid.ext = sext i32 %tid to i64
59  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
60  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
61  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
62  %a = load volatile float, float addrspace(1)* %a.gep
63  %b = load volatile float, float addrspace(1)* %b.gep
64  %add = fadd float %a, %b
65  %fneg = fsub float -0.000000e+00, %add
66  %use1 = fmul float %add, 4.0
67  store volatile float %fneg, float addrspace(1)* %out
68  store volatile float %use1, float addrspace(1)* %out
69  ret void
70}
71
72; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
73; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
74; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
75; GCN: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
76; GCN-NEXT: buffer_store_dword [[ADD]]
77define void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
78  %tid = call i32 @llvm.amdgcn.workitem.id.x()
79  %tid.ext = sext i32 %tid to i64
80  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
81  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
82  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
83  %a = load volatile float, float addrspace(1)* %a.gep
84  %b = load volatile float, float addrspace(1)* %b.gep
85  %fneg.a = fsub float -0.000000e+00, %a
86  %add = fadd float %fneg.a, %b
87  %fneg = fsub float -0.000000e+00, %add
88  store volatile float %fneg, float addrspace(1)* %out
89  ret void
90}
91
92; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
93; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
94; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
95; GCN: v_subrev_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
96; GCN-NEXT: buffer_store_dword [[ADD]]
97define void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
98  %tid = call i32 @llvm.amdgcn.workitem.id.x()
99  %tid.ext = sext i32 %tid to i64
100  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
101  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
102  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
103  %a = load volatile float, float addrspace(1)* %a.gep
104  %b = load volatile float, float addrspace(1)* %b.gep
105  %fneg.b = fsub float -0.000000e+00, %b
106  %add = fadd float %a, %fneg.b
107  %fneg = fsub float -0.000000e+00, %add
108  store volatile float %fneg, float addrspace(1)* %out
109  ret void
110}
111
112; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
113; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
114; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
115; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
116; GCN-NEXT: buffer_store_dword [[ADD]]
117define void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
118  %tid = call i32 @llvm.amdgcn.workitem.id.x()
119  %tid.ext = sext i32 %tid to i64
120  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
121  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
122  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
123  %a = load volatile float, float addrspace(1)* %a.gep
124  %b = load volatile float, float addrspace(1)* %b.gep
125  %fneg.a = fsub float -0.000000e+00, %a
126  %fneg.b = fsub float -0.000000e+00, %b
127  %add = fadd float %fneg.a, %fneg.b
128  %fneg = fsub float -0.000000e+00, %add
129  store volatile float %fneg, float addrspace(1)* %out
130  ret void
131}
132
133; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
134; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
135; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
136; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
137; GCN-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]]
138; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
139; GCN-NEXT: buffer_store_dword [[NEG_A]]
140define void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
141  %tid = call i32 @llvm.amdgcn.workitem.id.x()
142  %tid.ext = sext i32 %tid to i64
143  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
144  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
145  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
146  %a = load volatile float, float addrspace(1)* %a.gep
147  %b = load volatile float, float addrspace(1)* %b.gep
148  %fneg.a = fsub float -0.000000e+00, %a
149  %add = fadd float %fneg.a, %b
150  %fneg = fsub float -0.000000e+00, %add
151  store volatile float %fneg, float addrspace(1)* %out
152  store volatile float %fneg.a, float addrspace(1)* %out
153  ret void
154}
155
156; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
157; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
158; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
159; GCN-DAG: v_subrev_f32_e32 [[NEG_ADD:v[0-9]+]], [[B]], [[A]]
160; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
161; GCN-NEXT: buffer_store_dword [[NEG_ADD]]
162; GCN-NEXT: buffer_store_dword [[MUL]]
163define void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
164  %tid = call i32 @llvm.amdgcn.workitem.id.x()
165  %tid.ext = sext i32 %tid to i64
166  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
167  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
168  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
169  %a = load volatile float, float addrspace(1)* %a.gep
170  %b = load volatile float, float addrspace(1)* %b.gep
171  %fneg.a = fsub float -0.000000e+00, %a
172  %add = fadd float %fneg.a, %b
173  %fneg = fsub float -0.000000e+00, %add
174  %use1 = fmul float %fneg.a, %c
175  store volatile float %fneg, float addrspace(1)* %out
176  store volatile float %use1, float addrspace(1)* %out
177  ret void
178}
179
180; --------------------------------------------------------------------------------
181; fmul tests
182; --------------------------------------------------------------------------------
183
184; GCN-LABEL: {{^}}v_fneg_mul_f32:
185; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
186; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
187; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
188; GCN-NEXT: buffer_store_dword [[RESULT]]
189define void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
190  %tid = call i32 @llvm.amdgcn.workitem.id.x()
191  %tid.ext = sext i32 %tid to i64
192  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
193  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
194  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
195  %a = load volatile float, float addrspace(1)* %a.gep
196  %b = load volatile float, float addrspace(1)* %b.gep
197  %mul = fmul float %a, %b
198  %fneg = fsub float -0.000000e+00, %mul
199  store float %fneg, float addrspace(1)* %out.gep
200  ret void
201}
202
203; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
204; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
205; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
206; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
207; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
208; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
209; GCN: buffer_store_dword [[ADD]]
210define void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
211  %tid = call i32 @llvm.amdgcn.workitem.id.x()
212  %tid.ext = sext i32 %tid to i64
213  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
214  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
215  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
216  %a = load volatile float, float addrspace(1)* %a.gep
217  %b = load volatile float, float addrspace(1)* %b.gep
218  %mul = fmul float %a, %b
219  %fneg = fsub float -0.000000e+00, %mul
220  store volatile float %fneg, float addrspace(1)* %out
221  store volatile float %mul, float addrspace(1)* %out
222  ret void
223}
224
225; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
226; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
227; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
228; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
229; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
230; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
231; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
232; GCN: buffer_store_dword [[MUL]]
233define void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
234  %tid = call i32 @llvm.amdgcn.workitem.id.x()
235  %tid.ext = sext i32 %tid to i64
236  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
237  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
238  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
239  %a = load volatile float, float addrspace(1)* %a.gep
240  %b = load volatile float, float addrspace(1)* %b.gep
241  %mul = fmul float %a, %b
242  %fneg = fsub float -0.000000e+00, %mul
243  %use1 = fmul float %mul, 4.0
244  store volatile float %fneg, float addrspace(1)* %out
245  store volatile float %use1, float addrspace(1)* %out
246  ret void
247}
248
249; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
250; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
251; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
252; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
253; GCN-NEXT: buffer_store_dword [[ADD]]
254define void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
255  %tid = call i32 @llvm.amdgcn.workitem.id.x()
256  %tid.ext = sext i32 %tid to i64
257  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
258  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
259  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
260  %a = load volatile float, float addrspace(1)* %a.gep
261  %b = load volatile float, float addrspace(1)* %b.gep
262  %fneg.a = fsub float -0.000000e+00, %a
263  %mul = fmul float %fneg.a, %b
264  %fneg = fsub float -0.000000e+00, %mul
265  store volatile float %fneg, float addrspace(1)* %out
266  ret void
267}
268
269; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
270; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
271; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
272; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
273; GCN-NEXT: buffer_store_dword [[ADD]]
274define void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
275  %tid = call i32 @llvm.amdgcn.workitem.id.x()
276  %tid.ext = sext i32 %tid to i64
277  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
278  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
279  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
280  %a = load volatile float, float addrspace(1)* %a.gep
281  %b = load volatile float, float addrspace(1)* %b.gep
282  %fneg.b = fsub float -0.000000e+00, %b
283  %mul = fmul float %a, %fneg.b
284  %fneg = fsub float -0.000000e+00, %mul
285  store volatile float %fneg, float addrspace(1)* %out
286  ret void
287}
288
289; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
290; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
291; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
292; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
293; GCN-NEXT: buffer_store_dword [[ADD]]
294define void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
295  %tid = call i32 @llvm.amdgcn.workitem.id.x()
296  %tid.ext = sext i32 %tid to i64
297  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
298  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
299  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
300  %a = load volatile float, float addrspace(1)* %a.gep
301  %b = load volatile float, float addrspace(1)* %b.gep
302  %fneg.a = fsub float -0.000000e+00, %a
303  %fneg.b = fsub float -0.000000e+00, %b
304  %mul = fmul float %fneg.a, %fneg.b
305  %fneg = fsub float -0.000000e+00, %mul
306  store volatile float %fneg, float addrspace(1)* %out
307  ret void
308}
309
310; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
311; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
312; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
313; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
314; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[B]], [[A]]
315; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
316; GCN: buffer_store_dword [[NEG_A]]
317define void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
318  %tid = call i32 @llvm.amdgcn.workitem.id.x()
319  %tid.ext = sext i32 %tid to i64
320  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
321  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
322  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
323  %a = load volatile float, float addrspace(1)* %a.gep
324  %b = load volatile float, float addrspace(1)* %b.gep
325  %fneg.a = fsub float -0.000000e+00, %a
326  %mul = fmul float %fneg.a, %b
327  %fneg = fsub float -0.000000e+00, %mul
328  store volatile float %fneg, float addrspace(1)* %out
329  store volatile float %fneg.a, float addrspace(1)* %out
330  ret void
331}
332
333; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
334; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
335; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
336; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[B]], [[A]]
337; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
338; GCN-NEXT: buffer_store_dword [[NEG_MUL]]
339; GCN: buffer_store_dword [[MUL]]
340define void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
341  %tid = call i32 @llvm.amdgcn.workitem.id.x()
342  %tid.ext = sext i32 %tid to i64
343  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
344  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
345  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
346  %a = load volatile float, float addrspace(1)* %a.gep
347  %b = load volatile float, float addrspace(1)* %b.gep
348  %fneg.a = fsub float -0.000000e+00, %a
349  %mul = fmul float %fneg.a, %b
350  %fneg = fsub float -0.000000e+00, %mul
351  %use1 = fmul float %fneg.a, %c
352  store volatile float %fneg, float addrspace(1)* %out
353  store volatile float %use1, float addrspace(1)* %out
354  ret void
355}
356
357; --------------------------------------------------------------------------------
358; fma tests
359; --------------------------------------------------------------------------------
360
361; GCN-LABEL: {{^}}v_fneg_fma_f32:
362; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
363; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
364; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
365; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
366; GCN-NEXT: buffer_store_dword [[RESULT]]
367define void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
368  %tid = call i32 @llvm.amdgcn.workitem.id.x()
369  %tid.ext = sext i32 %tid to i64
370  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
371  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
372  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
373  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
374  %a = load volatile float, float addrspace(1)* %a.gep
375  %b = load volatile float, float addrspace(1)* %b.gep
376  %c = load volatile float, float addrspace(1)* %c.gep
377  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
378  %fneg = fsub float -0.000000e+00, %fma
379  store float %fneg, float addrspace(1)* %out.gep
380  ret void
381}
382
383; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
384; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
385; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
386; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
387; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
388; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
389; GCN-NEXT: buffer_store_dword [[NEG_FMA]]
390; GCN-NEXT: buffer_store_dword [[FMA]]
391define void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
392  %tid = call i32 @llvm.amdgcn.workitem.id.x()
393  %tid.ext = sext i32 %tid to i64
394  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
395  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
396  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
397  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
398  %a = load volatile float, float addrspace(1)* %a.gep
399  %b = load volatile float, float addrspace(1)* %b.gep
400  %c = load volatile float, float addrspace(1)* %c.gep
401  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
402  %fneg = fsub float -0.000000e+00, %fma
403  store volatile float %fneg, float addrspace(1)* %out
404  store volatile float %fma, float addrspace(1)* %out
405  ret void
406}
407
408; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
409; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
410; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
411; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
412; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
413; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
414; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
415; GCN-NEXT: buffer_store_dword [[NEG_FMA]]
416; GCN-NEXT: buffer_store_dword [[MUL]]
417define void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
418  %tid = call i32 @llvm.amdgcn.workitem.id.x()
419  %tid.ext = sext i32 %tid to i64
420  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
421  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
422  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
423  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
424  %a = load volatile float, float addrspace(1)* %a.gep
425  %b = load volatile float, float addrspace(1)* %b.gep
426  %c = load volatile float, float addrspace(1)* %c.gep
427  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
428  %fneg = fsub float -0.000000e+00, %fma
429  %use1 = fmul float %fma, 4.0
430  store volatile float %fneg, float addrspace(1)* %out
431  store volatile float %use1, float addrspace(1)* %out
432  ret void
433}
434
435; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
436; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
437; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
438; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
439; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
440; GCN-NEXT: buffer_store_dword [[FMA]]
441define void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
442  %tid = call i32 @llvm.amdgcn.workitem.id.x()
443  %tid.ext = sext i32 %tid to i64
444  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
445  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
446  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
447  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
448  %a = load volatile float, float addrspace(1)* %a.gep
449  %b = load volatile float, float addrspace(1)* %b.gep
450  %c = load volatile float, float addrspace(1)* %c.gep
451  %fneg.a = fsub float -0.000000e+00, %a
452  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
453  %fneg = fsub float -0.000000e+00, %fma
454  store volatile float %fneg, float addrspace(1)* %out
455  ret void
456}
457
458; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
459; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
460; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
461; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
462; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
463; GCN-NEXT: buffer_store_dword [[FMA]]
464define void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
465  %tid = call i32 @llvm.amdgcn.workitem.id.x()
466  %tid.ext = sext i32 %tid to i64
467  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
468  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
469  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
470  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
471  %a = load volatile float, float addrspace(1)* %a.gep
472  %b = load volatile float, float addrspace(1)* %b.gep
473  %c = load volatile float, float addrspace(1)* %c.gep
474  %fneg.b = fsub float -0.000000e+00, %b
475  %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
476  %fneg = fsub float -0.000000e+00, %fma
477  store volatile float %fneg, float addrspace(1)* %out
478  ret void
479}
480
481; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
482; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
483; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
484; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
485; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
486; GCN-NEXT: buffer_store_dword [[FMA]]
487define void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
488  %tid = call i32 @llvm.amdgcn.workitem.id.x()
489  %tid.ext = sext i32 %tid to i64
490  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
491  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
492  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
493  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
494  %a = load volatile float, float addrspace(1)* %a.gep
495  %b = load volatile float, float addrspace(1)* %b.gep
496  %c = load volatile float, float addrspace(1)* %c.gep
497  %fneg.a = fsub float -0.000000e+00, %a
498  %fneg.b = fsub float -0.000000e+00, %b
499  %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
500  %fneg = fsub float -0.000000e+00, %fma
501  store volatile float %fneg, float addrspace(1)* %out
502  ret void
503}
504
505; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
506; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
507; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
508; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
509; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
510; GCN-NEXT: buffer_store_dword [[FMA]]
511define void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
512  %tid = call i32 @llvm.amdgcn.workitem.id.x()
513  %tid.ext = sext i32 %tid to i64
514  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
515  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
516  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
517  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
518  %a = load volatile float, float addrspace(1)* %a.gep
519  %b = load volatile float, float addrspace(1)* %b.gep
520  %c = load volatile float, float addrspace(1)* %c.gep
521  %fneg.a = fsub float -0.000000e+00, %a
522  %fneg.c = fsub float -0.000000e+00, %c
523  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
524  %fneg = fsub float -0.000000e+00, %fma
525  store volatile float %fneg, float addrspace(1)* %out
526  ret void
527}
528
529; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
530; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
531; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
532; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
533; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
534; GCN-NEXT: buffer_store_dword [[FMA]]
535define void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
536  %tid = call i32 @llvm.amdgcn.workitem.id.x()
537  %tid.ext = sext i32 %tid to i64
538  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
539  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
540  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
541  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
542  %a = load volatile float, float addrspace(1)* %a.gep
543  %b = load volatile float, float addrspace(1)* %b.gep
544  %c = load volatile float, float addrspace(1)* %c.gep
545  %fneg.c = fsub float -0.000000e+00, %c
546  %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
547  %fneg = fsub float -0.000000e+00, %fma
548  store volatile float %fneg, float addrspace(1)* %out
549  ret void
550}
551
552; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
553; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
554; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
555; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
556; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
557; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
558; GCN-NEXT: buffer_store_dword [[FMA]]
559; GCN-NEXT: buffer_store_dword [[NEG_A]]
560define void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
561  %tid = call i32 @llvm.amdgcn.workitem.id.x()
562  %tid.ext = sext i32 %tid to i64
563  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
564  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
565  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
566  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
567  %a = load volatile float, float addrspace(1)* %a.gep
568  %b = load volatile float, float addrspace(1)* %b.gep
569  %c = load volatile float, float addrspace(1)* %c.gep
570  %fneg.a = fsub float -0.000000e+00, %a
571  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
572  %fneg = fsub float -0.000000e+00, %fma
573  store volatile float %fneg, float addrspace(1)* %out
574  store volatile float %fneg.a, float addrspace(1)* %out
575  ret void
576}
577
578; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
579; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
580; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
581; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
582; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
583; GCN-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
584; GCN-NEXT: buffer_store_dword [[NEG_FMA]]
585; GCN-NEXT: buffer_store_dword [[MUL]]
586define void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
587  %tid = call i32 @llvm.amdgcn.workitem.id.x()
588  %tid.ext = sext i32 %tid to i64
589  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
590  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
591  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
592  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
593  %a = load volatile float, float addrspace(1)* %a.gep
594  %b = load volatile float, float addrspace(1)* %b.gep
595  %c = load volatile float, float addrspace(1)* %c.gep
596  %fneg.a = fsub float -0.000000e+00, %a
597  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
598  %fneg = fsub float -0.000000e+00, %fma
599  %use1 = fmul float %fneg.a, %d
600  store volatile float %fneg, float addrspace(1)* %out
601  store volatile float %use1, float addrspace(1)* %out
602  ret void
603}
604
605; --------------------------------------------------------------------------------
606; fmad tests
607; --------------------------------------------------------------------------------
608
609; GCN-LABEL: {{^}}v_fneg_fmad_f32:
610; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
611; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
612; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
613; GCN: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
614; GCN-NEXT: buffer_store_dword [[RESULT]]
615define void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
616  %tid = call i32 @llvm.amdgcn.workitem.id.x()
617  %tid.ext = sext i32 %tid to i64
618  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
619  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
620  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
621  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
622  %a = load volatile float, float addrspace(1)* %a.gep
623  %b = load volatile float, float addrspace(1)* %b.gep
624  %c = load volatile float, float addrspace(1)* %c.gep
625  %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
626  %fneg = fsub float -0.000000e+00, %fma
627  store float %fneg, float addrspace(1)* %out.gep
628  ret void
629}
630
631; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
632; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
633; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
634; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
635; GCN-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
636; GCN-DAG: v_xor_b32_e32 [[NEG_C:v[0-9]+]], 0x80000000, [[C]]
637; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
638; GCN-NEXT: buffer_store_dword [[NEG_C]]
639; GCN-NEXT: buffer_store_dword [[MUL]]
640define void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
641  %tid = call i32 @llvm.amdgcn.workitem.id.x()
642  %tid.ext = sext i32 %tid to i64
643  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
644  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
645  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
646  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
647  %a = load volatile float, float addrspace(1)* %a.gep
648  %b = load volatile float, float addrspace(1)* %b.gep
649  %c = load volatile float, float addrspace(1)* %c.gep
650  %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
651  %fneg = fsub float -0.000000e+00, %fma
652  %use1 = fmul float %fma, 4.0
653  store volatile float %fneg, float addrspace(1)* %out
654  store volatile float %use1, float addrspace(1)* %out
655  ret void
656}
657
658; --------------------------------------------------------------------------------
659; fp_extend tests
660; --------------------------------------------------------------------------------
661
662; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
663; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
664; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
665; GCN: buffer_store_dwordx2 [[RESULT]]
666define void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
667  %tid = call i32 @llvm.amdgcn.workitem.id.x()
668  %tid.ext = sext i32 %tid to i64
669  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
670  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
671  %a = load volatile float, float addrspace(1)* %a.gep
672  %fpext = fpext float %a to double
673  %fneg = fsub double -0.000000e+00, %fpext
674  store double %fneg, double addrspace(1)* %out.gep
675  ret void
676}
677
678; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
679; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
680; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
681; GCN: buffer_store_dwordx2 [[RESULT]]
682define void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
683  %tid = call i32 @llvm.amdgcn.workitem.id.x()
684  %tid.ext = sext i32 %tid to i64
685  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
686  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
687  %a = load volatile float, float addrspace(1)* %a.gep
688  %fneg.a = fsub float -0.000000e+00, %a
689  %fpext = fpext float %fneg.a to double
690  %fneg = fsub double -0.000000e+00, %fpext
691  store double %fneg, double addrspace(1)* %out.gep
692  ret void
693}
694
695; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
696; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
697; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
698; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
699; GCN: buffer_store_dwordx2 [[RESULT]]
700; GCN: buffer_store_dword [[FNEG_A]]
701define void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
702  %tid = call i32 @llvm.amdgcn.workitem.id.x()
703  %tid.ext = sext i32 %tid to i64
704  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
705  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
706  %a = load volatile float, float addrspace(1)* %a.gep
707  %fneg.a = fsub float -0.000000e+00, %a
708  %fpext = fpext float %fneg.a to double
709  %fneg = fsub double -0.000000e+00, %fpext
710  store volatile double %fneg, double addrspace(1)* %out.gep
711  store volatile float %fneg.a, float addrspace(1)* undef
712  ret void
713}
714
715; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
716; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
717; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
718; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
719; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
720; GCN: buffer_store_dwordx2 v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
721define void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
722  %tid = call i32 @llvm.amdgcn.workitem.id.x()
723  %tid.ext = sext i32 %tid to i64
724  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
725  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
726  %a = load volatile float, float addrspace(1)* %a.gep
727  %fpext = fpext float %a to double
728  %fneg = fsub double -0.000000e+00, %fpext
729  store volatile double %fneg, double addrspace(1)* %out.gep
730  store volatile double %fpext, double addrspace(1)* undef
731  ret void
732}
733
734; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
735; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
736; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
737; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
738; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
739; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
740; GCN: buffer_store_dwordx2 [[MUL]]
741define void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
742  %tid = call i32 @llvm.amdgcn.workitem.id.x()
743  %tid.ext = sext i32 %tid to i64
744  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
745  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
746  %a = load volatile float, float addrspace(1)* %a.gep
747  %fpext = fpext float %a to double
748  %fneg = fsub double -0.000000e+00, %fpext
749  %mul = fmul double %fpext, 4.0
750  store volatile double %fneg, double addrspace(1)* %out.gep
751  store volatile double %mul, double addrspace(1)* %out.gep
752  ret void
753}
754
755; FIXME: Source modifiers not folded for f16->f32
756; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
757define void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
758  %tid = call i32 @llvm.amdgcn.workitem.id.x()
759  %tid.ext = sext i32 %tid to i64
760  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
761  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
762  %a = load volatile half, half addrspace(1)* %a.gep
763  %fpext = fpext half %a to float
764  %fneg = fsub float -0.000000e+00, %fpext
765  store volatile float %fneg, float addrspace(1)* %out.gep
766  store volatile float %fpext, float addrspace(1)* %out.gep
767  ret void
768}
769
770; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
771define void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
772  %tid = call i32 @llvm.amdgcn.workitem.id.x()
773  %tid.ext = sext i32 %tid to i64
774  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
775  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
776  %a = load volatile half, half addrspace(1)* %a.gep
777  %fpext = fpext half %a to float
778  %fneg = fsub float -0.000000e+00, %fpext
779  %mul = fmul float %fpext, 4.0
780  store volatile float %fneg, float addrspace(1)* %out.gep
781  store volatile float %mul, float addrspace(1)* %out.gep
782  ret void
783}
784
785; --------------------------------------------------------------------------------
786; fp_round tests
787; --------------------------------------------------------------------------------
788
789; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
790; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
791; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
792; GCN: buffer_store_dword [[RESULT]]
793define void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
794  %tid = call i32 @llvm.amdgcn.workitem.id.x()
795  %tid.ext = sext i32 %tid to i64
796  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
797  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
798  %a = load volatile double, double addrspace(1)* %a.gep
799  %fpround = fptrunc double %a to float
800  %fneg = fsub float -0.000000e+00, %fpround
801  store float %fneg, float addrspace(1)* %out.gep
802  ret void
803}
804
805; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
806; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
807; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
808; GCN: buffer_store_dword [[RESULT]]
809define void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
810  %tid = call i32 @llvm.amdgcn.workitem.id.x()
811  %tid.ext = sext i32 %tid to i64
812  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
813  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
814  %a = load volatile double, double addrspace(1)* %a.gep
815  %fneg.a = fsub double -0.000000e+00, %a
816  %fpround = fptrunc double %fneg.a to float
817  %fneg = fsub float -0.000000e+00, %fpround
818  store float %fneg, float addrspace(1)* %out.gep
819  ret void
820}
821
822; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
823; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
824; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
825; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
826; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]]
827; GCN: buffer_store_dword [[RESULT]]
828; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}}
829define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
830  %tid = call i32 @llvm.amdgcn.workitem.id.x()
831  %tid.ext = sext i32 %tid to i64
832  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
833  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
834  %a = load volatile double, double addrspace(1)* %a.gep
835  %fneg.a = fsub double -0.000000e+00, %a
836  %fpround = fptrunc double %fneg.a to float
837  %fneg = fsub float -0.000000e+00, %fpround
838  store volatile float %fneg, float addrspace(1)* %out.gep
839  store volatile double %fneg.a, double addrspace(1)* undef
840  ret void
841}
842
843; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
844; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
845; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
846; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
847; GCN: buffer_store_dword [[RESULT]]
848; GCN: buffer_store_dwordx2 [[USE1]]
849define void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
850  %tid = call i32 @llvm.amdgcn.workitem.id.x()
851  %tid.ext = sext i32 %tid to i64
852  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
853  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
854  %a = load volatile double, double addrspace(1)* %a.gep
855  %fneg.a = fsub double -0.000000e+00, %a
856  %fpround = fptrunc double %fneg.a to float
857  %fneg = fsub float -0.000000e+00, %fpround
858  %use1 = fmul double %fneg.a, %c
859  store volatile float %fneg, float addrspace(1)* %out.gep
860  store volatile double %use1, double addrspace(1)* undef
861  ret void
862}
863
864; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
865; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
866; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
867; GCN: buffer_store_short [[RESULT]]
868define void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
869  %tid = call i32 @llvm.amdgcn.workitem.id.x()
870  %tid.ext = sext i32 %tid to i64
871  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
872  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
873  %a = load volatile float, float addrspace(1)* %a.gep
874  %fpround = fptrunc float %a to half
875  %fneg = fsub half -0.000000e+00, %fpround
876  store half %fneg, half addrspace(1)* %out.gep
877  ret void
878}
879
880; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
881; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
882; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
883; GCN: buffer_store_short [[RESULT]]
884define void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
885  %tid = call i32 @llvm.amdgcn.workitem.id.x()
886  %tid.ext = sext i32 %tid to i64
887  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
888  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
889  %a = load volatile float, float addrspace(1)* %a.gep
890  %fneg.a = fsub float -0.000000e+00, %a
891  %fpround = fptrunc float %fneg.a to half
892  %fneg = fsub half -0.000000e+00, %fpround
893  store half %fneg, half addrspace(1)* %out.gep
894  ret void
895}
896
897; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
898; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
899; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
900; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
901; GCN: buffer_store_dword [[NEG]]
902; GCN: buffer_store_dword [[CVT]]
903define void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
904  %tid = call i32 @llvm.amdgcn.workitem.id.x()
905  %tid.ext = sext i32 %tid to i64
906  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
907  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
908  %a = load volatile double, double addrspace(1)* %a.gep
909  %fpround = fptrunc double %a to float
910  %fneg = fsub float -0.000000e+00, %fpround
911  store volatile float %fneg, float addrspace(1)* %out.gep
912  store volatile float %fpround, float addrspace(1)* %out.gep
913  ret void
914}
915
916; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
917; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
918; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
919; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
920; GCN: buffer_store_short [[RESULT]]
921; GCN: buffer_store_dword [[NEG_A]]
922define void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
923  %tid = call i32 @llvm.amdgcn.workitem.id.x()
924  %tid.ext = sext i32 %tid to i64
925  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
926  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
927  %a = load volatile float, float addrspace(1)* %a.gep
928  %fneg.a = fsub float -0.000000e+00, %a
929  %fpround = fptrunc float %fneg.a to half
930  %fneg = fsub half -0.000000e+00, %fpround
931  store volatile half %fneg, half addrspace(1)* %out.gep
932  store volatile float %fneg.a, float addrspace(1)* undef
933  ret void
934}
935
936; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
937; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
938; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
939; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
940; GCN: buffer_store_short [[RESULT]]
941; GCN: buffer_store_dword [[USE1]]
942define void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
943  %tid = call i32 @llvm.amdgcn.workitem.id.x()
944  %tid.ext = sext i32 %tid to i64
945  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
946  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
947  %a = load volatile float, float addrspace(1)* %a.gep
948  %fneg.a = fsub float -0.000000e+00, %a
949  %fpround = fptrunc float %fneg.a to half
950  %fneg = fsub half -0.000000e+00, %fpround
951  %use1 = fmul float %fneg.a, %c
952  store volatile half %fneg, half addrspace(1)* %out.gep
953  store volatile float %use1, float addrspace(1)* undef
954  ret void
955}
956
957; --------------------------------------------------------------------------------
958; rcp tests
959; --------------------------------------------------------------------------------
960
961; GCN-LABEL: {{^}}v_fneg_rcp_f32:
962; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
963; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
964; GCN: buffer_store_dword [[RESULT]]
965define void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
966  %tid = call i32 @llvm.amdgcn.workitem.id.x()
967  %tid.ext = sext i32 %tid to i64
968  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
969  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
970  %a = load volatile float, float addrspace(1)* %a.gep
971  %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
972  %fneg = fsub float -0.000000e+00, %rcp
973  store float %fneg, float addrspace(1)* %out.gep
974  ret void
975}
976
977; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
978; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
979; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
980; GCN: buffer_store_dword [[RESULT]]
981define void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
982  %tid = call i32 @llvm.amdgcn.workitem.id.x()
983  %tid.ext = sext i32 %tid to i64
984  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
985  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
986  %a = load volatile float, float addrspace(1)* %a.gep
987  %fneg.a = fsub float -0.000000e+00, %a
988  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
989  %fneg = fsub float -0.000000e+00, %rcp
990  store float %fneg, float addrspace(1)* %out.gep
991  ret void
992}
993
994; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
995; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
996; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
997; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
998; GCN: buffer_store_dword [[RESULT]]
999; GCN: buffer_store_dword [[NEG_A]]
1000define void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1001  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1002  %tid.ext = sext i32 %tid to i64
1003  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1004  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1005  %a = load volatile float, float addrspace(1)* %a.gep
1006  %fneg.a = fsub float -0.000000e+00, %a
1007  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1008  %fneg = fsub float -0.000000e+00, %rcp
1009  store volatile float %fneg, float addrspace(1)* %out.gep
1010  store volatile float %fneg.a, float addrspace(1)* undef
1011  ret void
1012}
1013
1014; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1015; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1016; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1017; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1018; GCN: buffer_store_dword [[RESULT]]
1019; GCN: buffer_store_dword [[MUL]]
1020define void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1021  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1022  %tid.ext = sext i32 %tid to i64
1023  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1024  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1025  %a = load volatile float, float addrspace(1)* %a.gep
1026  %fneg.a = fsub float -0.000000e+00, %a
1027  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1028  %fneg = fsub float -0.000000e+00, %rcp
1029  %use1 = fmul float %fneg.a, %c
1030  store volatile float %fneg, float addrspace(1)* %out.gep
1031  store volatile float %use1, float addrspace(1)* undef
1032  ret void
1033}
1034
1035; --------------------------------------------------------------------------------
1036; rcp_legacy tests
1037; --------------------------------------------------------------------------------
1038
1039; GCN-LABEL: {{^}}v_fneg_rcp_legacy_f32:
1040; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1041; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1042; GCN: buffer_store_dword [[RESULT]]
1043define void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1044  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1045  %tid.ext = sext i32 %tid to i64
1046  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1047  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1048  %a = load volatile float, float addrspace(1)* %a.gep
1049  %rcp = call float @llvm.amdgcn.rcp.legacy(float %a)
1050  %fneg = fsub float -0.000000e+00, %rcp
1051  store float %fneg, float addrspace(1)* %out.gep
1052  ret void
1053}
1054
1055; --------------------------------------------------------------------------------
1056; fmul_legacy tests
1057; --------------------------------------------------------------------------------
1058
1059; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1060; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1061; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1062; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1063; GCN-NEXT: buffer_store_dword [[RESULT]]
1064define void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1065  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1066  %tid.ext = sext i32 %tid to i64
1067  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1068  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1069  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1070  %a = load volatile float, float addrspace(1)* %a.gep
1071  %b = load volatile float, float addrspace(1)* %b.gep
1072  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1073  %fneg = fsub float -0.000000e+00, %mul
1074  store float %fneg, float addrspace(1)* %out.gep
1075  ret void
1076}
1077
1078; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1079; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1080; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1081; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
1082; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1083; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
1084; GCN: buffer_store_dword [[ADD]]
1085define void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1086  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1087  %tid.ext = sext i32 %tid to i64
1088  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1089  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1090  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1091  %a = load volatile float, float addrspace(1)* %a.gep
1092  %b = load volatile float, float addrspace(1)* %b.gep
1093  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1094  %fneg = fsub float -0.000000e+00, %mul
1095  store volatile float %fneg, float addrspace(1)* %out
1096  store volatile float %mul, float addrspace(1)* %out
1097  ret void
1098}
1099
1100; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1101; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1102; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1103; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
1104; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1105; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
1106; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
1107; GCN: buffer_store_dword [[MUL]]
1108define void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1109  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1110  %tid.ext = sext i32 %tid to i64
1111  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1112  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1113  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1114  %a = load volatile float, float addrspace(1)* %a.gep
1115  %b = load volatile float, float addrspace(1)* %b.gep
1116  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1117  %fneg = fsub float -0.000000e+00, %mul
1118  %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1119  store volatile float %fneg, float addrspace(1)* %out
1120  store volatile float %use1, float addrspace(1)* %out
1121  ret void
1122}
1123
1124; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1125; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1126; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1127; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
1128; GCN-NEXT: buffer_store_dword [[ADD]]
1129define void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1130  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1131  %tid.ext = sext i32 %tid to i64
1132  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1133  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1134  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1135  %a = load volatile float, float addrspace(1)* %a.gep
1136  %b = load volatile float, float addrspace(1)* %b.gep
1137  %fneg.a = fsub float -0.000000e+00, %a
1138  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1139  %fneg = fsub float -0.000000e+00, %mul
1140  store volatile float %fneg, float addrspace(1)* %out
1141  ret void
1142}
1143
1144; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1145; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1146; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1147; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
1148; GCN-NEXT: buffer_store_dword [[ADD]]
1149define void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1150  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1151  %tid.ext = sext i32 %tid to i64
1152  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1153  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1154  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1155  %a = load volatile float, float addrspace(1)* %a.gep
1156  %b = load volatile float, float addrspace(1)* %b.gep
1157  %fneg.b = fsub float -0.000000e+00, %b
1158  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1159  %fneg = fsub float -0.000000e+00, %mul
1160  store volatile float %fneg, float addrspace(1)* %out
1161  ret void
1162}
1163
1164; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1165; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1166; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1167; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1168; GCN-NEXT: buffer_store_dword [[ADD]]
1169define void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1170  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1171  %tid.ext = sext i32 %tid to i64
1172  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1173  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1174  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1175  %a = load volatile float, float addrspace(1)* %a.gep
1176  %b = load volatile float, float addrspace(1)* %b.gep
1177  %fneg.a = fsub float -0.000000e+00, %a
1178  %fneg.b = fsub float -0.000000e+00, %b
1179  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1180  %fneg = fsub float -0.000000e+00, %mul
1181  store volatile float %fneg, float addrspace(1)* %out
1182  ret void
1183}
1184
1185; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1186; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1187; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1188; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1189; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[B]], [[A]]
1190; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
1191; GCN: buffer_store_dword [[NEG_A]]
1192define void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1193  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1194  %tid.ext = sext i32 %tid to i64
1195  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1196  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1197  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1198  %a = load volatile float, float addrspace(1)* %a.gep
1199  %b = load volatile float, float addrspace(1)* %b.gep
1200  %fneg.a = fsub float -0.000000e+00, %a
1201  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1202  %fneg = fsub float -0.000000e+00, %mul
1203  store volatile float %fneg, float addrspace(1)* %out
1204  store volatile float %fneg.a, float addrspace(1)* %out
1205  ret void
1206}
1207
1208; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1209; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1210; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1211; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[B]], [[A]]
1212; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1213; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]]
1214; GCN: buffer_store_dword [[MUL]]
1215define void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1216  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1217  %tid.ext = sext i32 %tid to i64
1218  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1219  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1220  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1221  %a = load volatile float, float addrspace(1)* %a.gep
1222  %b = load volatile float, float addrspace(1)* %b.gep
1223  %fneg.a = fsub float -0.000000e+00, %a
1224  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1225  %fneg = fsub float -0.000000e+00, %mul
1226  %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
1227  store volatile float %fneg, float addrspace(1)* %out
1228  store volatile float %use1, float addrspace(1)* %out
1229  ret void
1230}
1231
1232; --------------------------------------------------------------------------------
1233; sin tests
1234; --------------------------------------------------------------------------------
1235
1236; GCN-LABEL: {{^}}v_fneg_sin_f32:
1237; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1238; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
1239; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[K]], -[[A]]
1240; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
1241; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
1242; GCN: buffer_store_dword [[RESULT]]
1243define void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1244  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1245  %tid.ext = sext i32 %tid to i64
1246  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1247  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1248  %a = load volatile float, float addrspace(1)* %a.gep
1249  %sin = call float @llvm.sin.f32(float %a)
1250  %fneg = fsub float -0.000000e+00, %sin
1251  store float %fneg, float addrspace(1)* %out.gep
1252  ret void
1253}
1254
1255; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
1256; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1257; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1258; GCN: buffer_store_dword [[RESULT]]
1259define void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1260  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1261  %tid.ext = sext i32 %tid to i64
1262  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1263  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1264  %a = load volatile float, float addrspace(1)* %a.gep
1265  %sin = call float @llvm.amdgcn.sin.f32(float %a)
1266  %fneg = fsub float -0.000000e+00, %sin
1267  store float %fneg, float addrspace(1)* %out.gep
1268  ret void
1269}
1270
1271declare i32 @llvm.amdgcn.workitem.id.x() #1
1272declare float @llvm.fma.f32(float, float, float) #1
1273declare float @llvm.fmuladd.f32(float, float, float) #1
1274declare float @llvm.sin.f32(float) #1
1275
1276declare float @llvm.amdgcn.sin.f32(float) #1
1277declare float @llvm.amdgcn.rcp.f32(float) #1
1278declare float @llvm.amdgcn.rcp.legacy(float) #1
1279declare float @llvm.amdgcn.fmul.legacy(float, float) #1
1280
1281attributes #0 = { nounwind }
1282attributes #1 = { nounwind readnone }
1283