1; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
2; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
3; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
4; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
5; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
6; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
7
8; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
9; GCN-POSTLINK: call fast float @_Z3sinf(
10; GCN-POSTLINK: call fast float @_Z3cosf(
11; GCN-PRELINK: call fast float @_Z6sincosfPf(
12; GCN-NATIVE: call fast float @_Z10native_sinf(
13; GCN-NATIVE: call fast float @_Z10native_cosf(
14define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) {
15entry:
16  %tmp = load float, float addrspace(1)* %a, align 4
17  %call = call fast float @_Z3sinf(float %tmp)
18  store float %call, float addrspace(1)* %a, align 4
19  %call2 = call fast float @_Z3cosf(float %tmp)
20  %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
21  store float %call2, float addrspace(1)* %arrayidx3, align 4
22  ret void
23}
24
25declare float @_Z3sinf(float)
26
27declare float @_Z3cosf(float)
28
29; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2
30; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f(
31; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f(
32; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_(
33; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f(
34; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f(
35define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) {
36entry:
37  %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8
38  %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
39  store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8
40  %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
41  %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1
42  store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8
43  ret void
44}
45
46declare <2 x float> @_Z3sinDv2_f(<2 x float>)
47
48declare <2 x float> @_Z3cosDv2_f(<2 x float>)
49
50; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3
51; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f(
52; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f(
53; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_(
54; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f(
55; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f(
56define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) {
57entry:
58  %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)*
59  %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16
60  %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
61  %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
62  %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
63  store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16
64  %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
65  %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1
66  %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
67  %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)*
68  store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16
69  ret void
70}
71
72declare <3 x float> @_Z3sinDv3_f(<3 x float>)
73
74declare <3 x float> @_Z3cosDv3_f(<3 x float>)
75
76; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4
77; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f(
78; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f(
79; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_(
80; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f(
81; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f(
82define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) {
83entry:
84  %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
85  %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
86  store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16
87  %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
88  %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1
89  store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16
90  ret void
91}
92
93declare <4 x float> @_Z3sinDv4_f(<4 x float>)
94
95declare <4 x float> @_Z3cosDv4_f(<4 x float>)
96
97; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8
98; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f(
99; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f(
100; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_(
101; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f(
102; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f(
103define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) {
104entry:
105  %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32
106  %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
107  store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32
108  %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
109  %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1
110  store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32
111  ret void
112}
113
114declare <8 x float> @_Z3sinDv8_f(<8 x float>)
115
116declare <8 x float> @_Z3cosDv8_f(<8 x float>)
117
118; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16
119; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f(
120; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f(
121; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_(
122; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f(
123; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f(
124define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) {
125entry:
126  %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64
127  %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
128  store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64
129  %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
130  %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1
131  store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64
132  ret void
133}
134
135declare <16 x float> @_Z3sinDv16_f(<16 x float>)
136
137declare <16 x float> @_Z3cosDv16_f(<16 x float>)
138
139; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip
140; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
141define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) {
142entry:
143  %call = call fast float @_Z12native_recipf(float 3.000000e+00)
144  store float %call, float addrspace(1)* %a, align 4
145  ret void
146}
147
148declare float @_Z12native_recipf(float)
149
150; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip
151; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
152define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) {
153entry:
154  %call = call fast float @_Z10half_recipf(float 3.000000e+00)
155  store float %call, float addrspace(1)* %a, align 4
156  ret void
157}
158
159declare float @_Z10half_recipf(float)
160
161; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide
162; GCN: fmul fast float %tmp, 0x3FD5555560000000
163define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) {
164entry:
165  %tmp = load float, float addrspace(1)* %a, align 4
166  %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
167  store float %call, float addrspace(1)* %a, align 4
168  ret void
169}
170
171declare float @_Z13native_divideff(float, float)
172
173; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide
174; GCN: fmul fast float %tmp, 0x3FD5555560000000
175define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) {
176entry:
177  %tmp = load float, float addrspace(1)* %a, align 4
178  %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
179  store float %call, float addrspace(1)* %a, align 4
180  ret void
181}
182
183declare float @_Z11half_divideff(float, float)
184
185; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f
186; GCN: store float 1.000000e+00, float addrspace(1)* %a
187define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) {
188entry:
189  %tmp = load float, float addrspace(1)* %a, align 4
190  %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00)
191  store float %call, float addrspace(1)* %a, align 4
192  ret void
193}
194
195declare float @_Z3powff(float, float)
196
197; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i
198; GCN: store float 1.000000e+00, float addrspace(1)* %a
199define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) {
200entry:
201  %tmp = load float, float addrspace(1)* %a, align 4
202  %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00)
203  store float %call, float addrspace(1)* %a, align 4
204  ret void
205}
206
207; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f
208; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
209; GCN: store float %tmp, float addrspace(1)* %a, align 4
210define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) {
211entry:
212  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
213  %tmp = load float, float addrspace(1)* %arrayidx, align 4
214  %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00)
215  store float %call, float addrspace(1)* %a, align 4
216  ret void
217}
218
219; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i
220; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
221; GCN: store float %tmp, float addrspace(1)* %a, align 4
222define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) {
223entry:
224  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
225  %tmp = load float, float addrspace(1)* %arrayidx, align 4
226  %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00)
227  store float %call, float addrspace(1)* %a, align 4
228  ret void
229}
230
231; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f
232; GCN: %tmp = load float, float addrspace(1)* %a, align 4
233; GCN: %__pow2 = fmul fast float %tmp, %tmp
234define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) {
235entry:
236  %tmp = load float, float addrspace(1)* %a, align 4
237  %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00)
238  store float %call, float addrspace(1)* %a, align 4
239  ret void
240}
241
242; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i
243; GCN: %tmp = load float, float addrspace(1)* %a, align 4
244; GCN: %__pow2 = fmul fast float %tmp, %tmp
245define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) {
246entry:
247  %tmp = load float, float addrspace(1)* %a, align 4
248  %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00)
249  store float %call, float addrspace(1)* %a, align 4
250  ret void
251}
252
253; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f
254; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
255; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
256define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) {
257entry:
258  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
259  %tmp = load float, float addrspace(1)* %arrayidx, align 4
260  %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00)
261  store float %call, float addrspace(1)* %a, align 4
262  ret void
263}
264
265; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i
266; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
267; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
268define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) {
269entry:
270  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
271  %tmp = load float, float addrspace(1)* %arrayidx, align 4
272  %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00)
273  store float %call, float addrspace(1)* %a, align 4
274  ret void
275}
276
277; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half
278; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01)
279; GCN-PRELINK: %__pow2sqrt = call fast float @_Z4sqrtf(float %tmp)
280define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) {
281entry:
282  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
283  %tmp = load float, float addrspace(1)* %arrayidx, align 4
284  %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01)
285  store float %call, float addrspace(1)* %a, align 4
286  ret void
287}
288
289; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf
290; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01)
291; GCN-PRELINK: %__pow2rsqrt = call fast float @_Z5rsqrtf(float %tmp)
292define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) {
293entry:
294  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
295  %tmp = load float, float addrspace(1)* %arrayidx, align 4
296  %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01)
297  store float %call, float addrspace(1)* %a, align 4
298  ret void
299}
300
301; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c
302; GCN: %__powx2 = fmul fast float %tmp, %tmp
303; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
304; GCN: %__powx22 = fmul fast float %__powx2, %tmp
305; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
306; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
307define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) {
308entry:
309  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
310  %tmp = load float, float addrspace(1)* %arrayidx, align 4
311  %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01)
312  store float %call, float addrspace(1)* %a, align 4
313  ret void
314}
315
316; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c
317; GCN: %__powx2 = fmul fast float %tmp, %tmp
318; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
319; GCN: %__powx22 = fmul fast float %__powx2, %tmp
320; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
321; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
322define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) {
323entry:
324  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
325  %tmp = load float, float addrspace(1)* %arrayidx, align 4
326  %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
327  store float %call, float addrspace(1)* %a, align 4
328  ret void
329}
330
331declare float @_Z4powrff(float, float)
332
333; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c
334; GCN: %__powx2 = fmul fast float %tmp, %tmp
335; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
336; GCN: %__powx22 = fmul fast float %__powx2, %tmp
337; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
338; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
339define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) {
340entry:
341  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
342  %tmp = load float, float addrspace(1)* %arrayidx, align 4
343  %call = call fast float @_Z4pownfi(float %tmp, i32 11)
344  store float %call, float addrspace(1)* %a, align 4
345  ret void
346}
347
348declare float @_Z4pownfi(float, i32)
349
350; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
351; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03)
352; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp)
353; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs)
354; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
355; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx)
356; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
357; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
358; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
359; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
360; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
361; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
362define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) {
363entry:
364  %tmp = load float, float addrspace(1)* %a, align 4
365  %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03)
366  store float %call, float addrspace(1)* %a, align 4
367  ret void
368}
369
370; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
371; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1)
372; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %tmp)
373; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1
374; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx)
375; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4
376; GCN-NATIVE:  %__log2 = call fast float @_Z11native_log2f(float %tmp)
377; GCN-NATIVE:  %__ylogx = fmul fast float %__log2, %tmp1
378; GCN-NATIVE:  %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx)
379; GCN-NATIVE:  store float %__exp2, float addrspace(1)* %a, align 4
380define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) {
381entry:
382  %tmp = load float, float addrspace(1)* %a, align 4
383  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
384  %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
385  %call = call fast float @_Z4powrff(float %tmp, float %tmp1)
386  store float %call, float addrspace(1)* %a, align 4
387  ret void
388}
389
390; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
391; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv)
392; GCN-PRELINK: %conv = fptosi float %tmp1 to i32
393; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp)
394; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs)
395; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float
396; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
397; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx)
398; GCN-PRELINK: %__yeven = shl i32 %conv, 31
399; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
400; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
401; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
402; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
403; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
404; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
405define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) {
406entry:
407  %tmp = load float, float addrspace(1)* %a, align 4
408  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
409  %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
410  %conv = fptosi float %tmp1 to i32
411  %call = call fast float @_Z4pownfi(float %tmp, i32 %conv)
412  store float %call, float addrspace(1)* %a, align 4
413  ret void
414}
415
416; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
417; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
418; GCN: store float %tmp, float addrspace(1)* %a, align 4
419define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) {
420entry:
421  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
422  %tmp = load float, float addrspace(1)* %arrayidx, align 4
423  %call = call fast float @_Z5rootnfi(float %tmp, i32 1)
424  store float %call, float addrspace(1)* %a, align 4
425  ret void
426}
427
428declare float @_Z5rootnfi(float, i32)
429
430; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2
431; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2)
432; GCN-PRELINK: %__rootn2sqrt = call fast float @_Z4sqrtf(float %tmp)
433define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) {
434entry:
435  %tmp = load float, float addrspace(1)* %a, align 4
436  %call = call fast float @_Z5rootnfi(float %tmp, i32 2)
437  store float %call, float addrspace(1)* %a, align 4
438  ret void
439}
440
441; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3
442; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3)
443; GCN-PRELINK: %__rootn2cbrt = call fast float @_Z4cbrtf(float %tmp)
444define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) {
445entry:
446  %tmp = load float, float addrspace(1)* %a, align 4
447  %call = call fast float @_Z5rootnfi(float %tmp, i32 3)
448  store float %call, float addrspace(1)* %a, align 4
449  ret void
450}
451
452; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1
453; GCN: fdiv fast float 1.000000e+00, %tmp
454define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) {
455entry:
456  %tmp = load float, float addrspace(1)* %a, align 4
457  %call = call fast float @_Z5rootnfi(float %tmp, i32 -1)
458  store float %call, float addrspace(1)* %a, align 4
459  ret void
460}
461
462; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2
463; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 -2)
464; GCN-PRELINK: %__rootn2rsqrt = call fast float @_Z5rsqrtf(float %tmp)
465define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) {
466entry:
467  %tmp = load float, float addrspace(1)* %a, align 4
468  %call = call fast float @_Z5rootnfi(float %tmp, i32 -2)
469  store float %call, float addrspace(1)* %a, align 4
470  ret void
471}
472
473; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x
474; GCN: store float %y, float addrspace(1)* %a
475define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) {
476entry:
477  %tmp = load float, float addrspace(1)* %a, align 4
478  %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
479  store float %call, float addrspace(1)* %a, align 4
480  ret void
481}
482
483declare float @_Z3fmafff(float, float, float)
484
485; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0
486; GCN: store float %y, float addrspace(1)* %a
487define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) {
488entry:
489  %tmp = load float, float addrspace(1)* %a, align 4
490  %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
491  store float %call, float addrspace(1)* %a, align 4
492  ret void
493}
494
495; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x
496; GCN: store float %y, float addrspace(1)* %a
497define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) {
498entry:
499  %tmp = load float, float addrspace(1)* %a, align 4
500  %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
501  store float %call, float addrspace(1)* %a, align 4
502  ret void
503}
504
505declare float @_Z3madfff(float, float, float)
506
507; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0
508; GCN: store float %y, float addrspace(1)* %a
509define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) {
510entry:
511  %tmp = load float, float addrspace(1)* %a, align 4
512  %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
513  store float %call, float addrspace(1)* %a, align 4
514  ret void
515}
516
517; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y
518; GCN: %fmaadd = fadd fast float %tmp, %y
519define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) {
520entry:
521  %tmp = load float, float addrspace(1)* %a, align 4
522  %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
523  store float %call, float addrspace(1)* %a, align 4
524  ret void
525}
526
527; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy
528; GCN: %fmaadd = fadd fast float %tmp, %y
529define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) {
530entry:
531  %tmp = load float, float addrspace(1)* %a, align 4
532  %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
533  store float %call, float addrspace(1)* %a, align 4
534  ret void
535}
536
537; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0
538; GCN: %fmamul = fmul fast float %tmp1, %tmp
539define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) {
540entry:
541  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
542  %tmp = load float, float addrspace(1)* %arrayidx, align 4
543  %tmp1 = load float, float addrspace(1)* %a, align 4
544  %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
545  store float %call, float addrspace(1)* %a, align 4
546  ret void
547}
548
549; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp
550; GCN-NATIVE: call fast float @_Z10native_expf(float %tmp)
551define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) {
552entry:
553  %tmp = load float, float addrspace(1)* %a, align 4
554  %call = call fast float @_Z3expf(float %tmp)
555  store float %call, float addrspace(1)* %a, align 4
556  ret void
557}
558
559declare float @_Z3expf(float)
560
561; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2
562; GCN-NATIVE: call fast float @_Z11native_exp2f(float %tmp)
563define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) {
564entry:
565  %tmp = load float, float addrspace(1)* %a, align 4
566  %call = call fast float @_Z4exp2f(float %tmp)
567  store float %call, float addrspace(1)* %a, align 4
568  ret void
569}
570
571declare float @_Z4exp2f(float)
572
573; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10
574; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp)
575define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) {
576entry:
577  %tmp = load float, float addrspace(1)* %a, align 4
578  %call = call fast float @_Z5exp10f(float %tmp)
579  store float %call, float addrspace(1)* %a, align 4
580  ret void
581}
582
583declare float @_Z5exp10f(float)
584
585; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log
586; GCN-NATIVE: call fast float @_Z10native_logf(float %tmp)
587define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) {
588entry:
589  %tmp = load float, float addrspace(1)* %a, align 4
590  %call = call fast float @_Z3logf(float %tmp)
591  store float %call, float addrspace(1)* %a, align 4
592  ret void
593}
594
595declare float @_Z3logf(float)
596
597; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2
598; GCN-NATIVE: call fast float @_Z11native_log2f(float %tmp)
599define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) {
600entry:
601  %tmp = load float, float addrspace(1)* %a, align 4
602  %call = call fast float @_Z4log2f(float %tmp)
603  store float %call, float addrspace(1)* %a, align 4
604  ret void
605}
606
607declare float @_Z4log2f(float)
608
609; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10
610; GCN-NATIVE: call fast float @_Z12native_log10f(float %tmp)
611define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) {
612entry:
613  %tmp = load float, float addrspace(1)* %a, align 4
614  %call = call fast float @_Z5log10f(float %tmp)
615  store float %call, float addrspace(1)* %a, align 4
616  ret void
617}
618
619declare float @_Z5log10f(float)
620
621; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
622; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
623; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp)
624; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
625; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx)
626; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
627define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) {
628entry:
629  %tmp = load float, float addrspace(1)* %a, align 4
630  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
631  %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
632  %call = call fast float @_Z4powrff(float %tmp, float %tmp1)
633  store float %call, float addrspace(1)* %a, align 4
634  ret void
635}
636
637; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
638; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp)
639define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) {
640entry:
641  %tmp = load float, float addrspace(1)* %a, align 4
642  %call = call fast float @_Z4sqrtf(float %tmp)
643  store float %call, float addrspace(1)* %a, align 4
644  ret void
645}
646
647; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
648; GCN: call fast double @_Z4sqrtd(double %tmp)
649define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) {
650entry:
651  %tmp = load double, double addrspace(1)* %a, align 8
652  %call = call fast double @_Z4sqrtd(double %tmp)
653  store double %call, double addrspace(1)* %a, align 8
654  ret void
655}
656
657declare float @_Z4sqrtf(float)
658declare double @_Z4sqrtd(double)
659
660; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
661; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp)
662define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) {
663entry:
664  %tmp = load float, float addrspace(1)* %a, align 4
665  %call = call fast float @_Z5rsqrtf(float %tmp)
666  store float %call, float addrspace(1)* %a, align 4
667  ret void
668}
669
670declare float @_Z5rsqrtf(float)
671
672; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan
673; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp)
674define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) {
675entry:
676  %tmp = load float, float addrspace(1)* %a, align 4
677  %call = call fast float @_Z3tanf(float %tmp)
678  store float %call, float addrspace(1)* %a, align 4
679  ret void
680}
681
682declare float @_Z3tanf(float)
683
684; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos
685; GCN-NATIVE: call float @_Z10native_sinf(float %tmp)
686; GCN-NATIVE: call float @_Z10native_cosf(float %tmp)
687define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) {
688entry:
689  %tmp = load float, float addrspace(1)* %a, align 4
690  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
691  %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float*
692  %call = call fast float @_Z6sincosfPf(float %tmp, float* %tmp1)
693  store float %call, float addrspace(1)* %a, align 4
694  ret void
695}
696
697declare float @_Z6sincosfPf(float, float*)
698
699%opencl.pipe_t = type opaque
700%opencl.reserve_id_t = type opaque
701
702; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
703; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]]
704; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
705define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
706entry:
707  %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
708  %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
709  %tmp2 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
710  %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
711  %tmp4 = call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
712  call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4)
713  ret void
714}
715
716declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32)
717
718declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32)
719
720declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32)
721
722declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32)
723
724; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
725; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]]
726; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
727define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
728entry:
729  %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
730  %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
731  %tmp2 = call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
732  %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
733  %tmp4 = call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
734  call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0
735  ret void
736}
737
738declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr
739
740declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr
741
742declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr
743
744declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr
745
746%struct.S = type { [100 x i32] }
747
748; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
749; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]]
750; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]]
751; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]]
752; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]]
753; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]]
754; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]]
755; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]]
756; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]]
757; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]]
758define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
759entry:
760  %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8*
761  %tmp1 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0
762  %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)*
763  %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8*
764  %tmp4 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0
765  %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)*
766  %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8*
767  %tmp7 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0
768  %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)*
769  %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8*
770  %tmp10 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0
771  %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)*
772  %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8*
773  %tmp13 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0
774  %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)*
775  %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8*
776  %tmp16 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0
777  %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)*
778  %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8*
779  %tmp19 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0
780  %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)*
781  %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8*
782  %tmp22 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0
783  %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)*
784  %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8*
785  %tmp25 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0
786  ret void
787}
788
789; GCN-PRELINK: declare float @_Z4fabsf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]]
790; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
791; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]]
792
793; GCN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind }
794; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind readonly }
795attributes #0 = { nounwind }
796