1; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s 2; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s 3; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s 4; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s 5; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s 6; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s 7 8; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos 9; GCN-POSTLINK: call fast float @_Z3sinf( 10; GCN-POSTLINK: call fast float @_Z3cosf( 11; GCN-PRELINK: call fast float @_Z6sincosfPf( 12; GCN-NATIVE: call fast float @_Z10native_sinf( 13; GCN-NATIVE: call fast float @_Z10native_cosf( 14define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) { 15entry: 16 %tmp = load float, float addrspace(1)* %a, align 4 17 %call = call fast float @_Z3sinf(float %tmp) 18 store float %call, float addrspace(1)* %a, align 4 19 %call2 = call fast float @_Z3cosf(float %tmp) 20 %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 21 store float %call2, float addrspace(1)* %arrayidx3, align 4 22 ret void 23} 24 25declare float @_Z3sinf(float) 26 27declare float @_Z3cosf(float) 28 29; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 30; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f( 31; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f( 32; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( 33; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f( 34; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f( 35define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) { 36entry: 37 %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8 38 %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) 39 store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8 40 %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) 41 %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1 42 store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8 43 ret void 44} 45 46declare <2 x float> @_Z3sinDv2_f(<2 x float>) 47 48declare <2 x float> @_Z3cosDv2_f(<2 x float>) 49 50; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 51; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f( 52; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f( 53; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( 54; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f( 55; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f( 56define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) { 57entry: 58 %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)* 59 %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16 60 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 61 %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) 62 %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 63 store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16 64 %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) 65 %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1 66 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 67 %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)* 68 store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16 69 ret void 70} 71 72declare <3 x float> @_Z3sinDv3_f(<3 x float>) 73 74declare <3 x float> @_Z3cosDv3_f(<3 x float>) 75 76; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 77; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f( 78; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f( 79; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( 80; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f( 81; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f( 82define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) { 83entry: 84 %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 85 %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) 86 store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16 87 %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) 88 %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1 89 store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16 90 ret void 91} 92 93declare <4 x float> @_Z3sinDv4_f(<4 x float>) 94 95declare <4 x float> @_Z3cosDv4_f(<4 x float>) 96 97; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 98; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f( 99; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f( 100; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( 101; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f( 102; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f( 103define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) { 104entry: 105 %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32 106 %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) 107 store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32 108 %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) 109 %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1 110 store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32 111 ret void 112} 113 114declare <8 x float> @_Z3sinDv8_f(<8 x float>) 115 116declare <8 x float> @_Z3cosDv8_f(<8 x float>) 117 118; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 119; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f( 120; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f( 121; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( 122; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f( 123; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f( 124define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) { 125entry: 126 %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64 127 %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) 128 store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64 129 %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) 130 %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1 131 store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64 132 ret void 133} 134 135declare <16 x float> @_Z3sinDv16_f(<16 x float>) 136 137declare <16 x float> @_Z3cosDv16_f(<16 x float>) 138 139; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip 140; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a 141define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) { 142entry: 143 %call = call fast float @_Z12native_recipf(float 3.000000e+00) 144 store float %call, float addrspace(1)* %a, align 4 145 ret void 146} 147 148declare float @_Z12native_recipf(float) 149 150; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip 151; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a 152define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) { 153entry: 154 %call = call fast float @_Z10half_recipf(float 3.000000e+00) 155 store float %call, float addrspace(1)* %a, align 4 156 ret void 157} 158 159declare float @_Z10half_recipf(float) 160 161; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide 162; GCN: fmul fast float %tmp, 0x3FD5555560000000 163define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) { 164entry: 165 %tmp = load float, float addrspace(1)* %a, align 4 166 %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) 167 store float %call, float addrspace(1)* %a, align 4 168 ret void 169} 170 171declare float @_Z13native_divideff(float, float) 172 173; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide 174; GCN: fmul fast float %tmp, 0x3FD5555560000000 175define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) { 176entry: 177 %tmp = load float, float addrspace(1)* %a, align 4 178 %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) 179 store float %call, float addrspace(1)* %a, align 4 180 ret void 181} 182 183declare float @_Z11half_divideff(float, float) 184 185; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f 186; GCN: store float 1.000000e+00, float addrspace(1)* %a 187define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) { 188entry: 189 %tmp = load float, float addrspace(1)* %a, align 4 190 %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) 191 store float %call, float addrspace(1)* %a, align 4 192 ret void 193} 194 195declare float @_Z3powff(float, float) 196 197; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i 198; GCN: store float 1.000000e+00, float addrspace(1)* %a 199define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) { 200entry: 201 %tmp = load float, float addrspace(1)* %a, align 4 202 %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) 203 store float %call, float addrspace(1)* %a, align 4 204 ret void 205} 206 207; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f 208; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 209; GCN: store float %tmp, float addrspace(1)* %a, align 4 210define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) { 211entry: 212 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 213 %tmp = load float, float addrspace(1)* %arrayidx, align 4 214 %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) 215 store float %call, float addrspace(1)* %a, align 4 216 ret void 217} 218 219; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i 220; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 221; GCN: store float %tmp, float addrspace(1)* %a, align 4 222define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) { 223entry: 224 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 225 %tmp = load float, float addrspace(1)* %arrayidx, align 4 226 %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) 227 store float %call, float addrspace(1)* %a, align 4 228 ret void 229} 230 231; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f 232; GCN: %tmp = load float, float addrspace(1)* %a, align 4 233; GCN: %__pow2 = fmul fast float %tmp, %tmp 234define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) { 235entry: 236 %tmp = load float, float addrspace(1)* %a, align 4 237 %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) 238 store float %call, float addrspace(1)* %a, align 4 239 ret void 240} 241 242; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i 243; GCN: %tmp = load float, float addrspace(1)* %a, align 4 244; GCN: %__pow2 = fmul fast float %tmp, %tmp 245define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) { 246entry: 247 %tmp = load float, float addrspace(1)* %a, align 4 248 %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) 249 store float %call, float addrspace(1)* %a, align 4 250 ret void 251} 252 253; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f 254; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 255; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 256define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) { 257entry: 258 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 259 %tmp = load float, float addrspace(1)* %arrayidx, align 4 260 %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) 261 store float %call, float addrspace(1)* %a, align 4 262 ret void 263} 264 265; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i 266; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 267; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 268define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) { 269entry: 270 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 271 %tmp = load float, float addrspace(1)* %arrayidx, align 4 272 %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) 273 store float %call, float addrspace(1)* %a, align 4 274 ret void 275} 276 277; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half 278; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01) 279; GCN-PRELINK: %__pow2sqrt = call fast float @_Z4sqrtf(float %tmp) 280define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) { 281entry: 282 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 283 %tmp = load float, float addrspace(1)* %arrayidx, align 4 284 %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01) 285 store float %call, float addrspace(1)* %a, align 4 286 ret void 287} 288 289; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf 290; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01) 291; GCN-PRELINK: %__pow2rsqrt = call fast float @_Z5rsqrtf(float %tmp) 292define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) { 293entry: 294 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 295 %tmp = load float, float addrspace(1)* %arrayidx, align 4 296 %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01) 297 store float %call, float addrspace(1)* %a, align 4 298 ret void 299} 300 301; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c 302; GCN: %__powx2 = fmul fast float %tmp, %tmp 303; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 304; GCN: %__powx22 = fmul fast float %__powx2, %tmp 305; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 306; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 307define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { 308entry: 309 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 310 %tmp = load float, float addrspace(1)* %arrayidx, align 4 311 %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01) 312 store float %call, float addrspace(1)* %a, align 4 313 ret void 314} 315 316; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c 317; GCN: %__powx2 = fmul fast float %tmp, %tmp 318; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 319; GCN: %__powx22 = fmul fast float %__powx2, %tmp 320; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 321; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 322define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { 323entry: 324 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 325 %tmp = load float, float addrspace(1)* %arrayidx, align 4 326 %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01) 327 store float %call, float addrspace(1)* %a, align 4 328 ret void 329} 330 331declare float @_Z4powrff(float, float) 332 333; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c 334; GCN: %__powx2 = fmul fast float %tmp, %tmp 335; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 336; GCN: %__powx22 = fmul fast float %__powx2, %tmp 337; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 338; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 339define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { 340entry: 341 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 342 %tmp = load float, float addrspace(1)* %arrayidx, align 4 343 %call = call fast float @_Z4pownfi(float %tmp, i32 11) 344 store float %call, float addrspace(1)* %a, align 4 345 ret void 346} 347 348declare float @_Z4pownfi(float, i32) 349 350; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow 351; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03) 352; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) 353; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) 354; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 355; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) 356; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 357; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 358; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 359; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] 360; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* 361; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 362define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { 363entry: 364 %tmp = load float, float addrspace(1)* %a, align 4 365 %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03) 366 store float %call, float addrspace(1)* %a, align 4 367 ret void 368} 369 370; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr 371; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1) 372; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %tmp) 373; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1 374; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) 375; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4 376; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) 377; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 378; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) 379; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 380define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) { 381entry: 382 %tmp = load float, float addrspace(1)* %a, align 4 383 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 384 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 385 %call = call fast float @_Z4powrff(float %tmp, float %tmp1) 386 store float %call, float addrspace(1)* %a, align 4 387 ret void 388} 389 390; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown 391; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv) 392; GCN-PRELINK: %conv = fptosi float %tmp1 to i32 393; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) 394; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) 395; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float 396; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F 397; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) 398; GCN-PRELINK: %__yeven = shl i32 %conv, 31 399; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 400; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] 401; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 402; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] 403; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* 404; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 405define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { 406entry: 407 %tmp = load float, float addrspace(1)* %a, align 4 408 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 409 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 410 %conv = fptosi float %tmp1 to i32 411 %call = call fast float @_Z4pownfi(float %tmp, i32 %conv) 412 store float %call, float addrspace(1)* %a, align 4 413 ret void 414} 415 416; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1 417; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 418; GCN: store float %tmp, float addrspace(1)* %a, align 4 419define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) { 420entry: 421 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 422 %tmp = load float, float addrspace(1)* %arrayidx, align 4 423 %call = call fast float @_Z5rootnfi(float %tmp, i32 1) 424 store float %call, float addrspace(1)* %a, align 4 425 ret void 426} 427 428declare float @_Z5rootnfi(float, i32) 429 430; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 431; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2) 432; GCN-PRELINK: %__rootn2sqrt = call fast float @_Z4sqrtf(float %tmp) 433define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) { 434entry: 435 %tmp = load float, float addrspace(1)* %a, align 4 436 %call = call fast float @_Z5rootnfi(float %tmp, i32 2) 437 store float %call, float addrspace(1)* %a, align 4 438 ret void 439} 440 441; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 442; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3) 443; GCN-PRELINK: %__rootn2cbrt = call fast float @_Z4cbrtf(float %tmp) 444define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) { 445entry: 446 %tmp = load float, float addrspace(1)* %a, align 4 447 %call = call fast float @_Z5rootnfi(float %tmp, i32 3) 448 store float %call, float addrspace(1)* %a, align 4 449 ret void 450} 451 452; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1 453; GCN: fdiv fast float 1.000000e+00, %tmp 454define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) { 455entry: 456 %tmp = load float, float addrspace(1)* %a, align 4 457 %call = call fast float @_Z5rootnfi(float %tmp, i32 -1) 458 store float %call, float addrspace(1)* %a, align 4 459 ret void 460} 461 462; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 463; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 -2) 464; GCN-PRELINK: %__rootn2rsqrt = call fast float @_Z5rsqrtf(float %tmp) 465define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) { 466entry: 467 %tmp = load float, float addrspace(1)* %a, align 4 468 %call = call fast float @_Z5rootnfi(float %tmp, i32 -2) 469 store float %call, float addrspace(1)* %a, align 4 470 ret void 471} 472 473; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x 474; GCN: store float %y, float addrspace(1)* %a 475define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) { 476entry: 477 %tmp = load float, float addrspace(1)* %a, align 4 478 %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) 479 store float %call, float addrspace(1)* %a, align 4 480 ret void 481} 482 483declare float @_Z3fmafff(float, float, float) 484 485; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0 486; GCN: store float %y, float addrspace(1)* %a 487define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) { 488entry: 489 %tmp = load float, float addrspace(1)* %a, align 4 490 %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) 491 store float %call, float addrspace(1)* %a, align 4 492 ret void 493} 494 495; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x 496; GCN: store float %y, float addrspace(1)* %a 497define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) { 498entry: 499 %tmp = load float, float addrspace(1)* %a, align 4 500 %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) 501 store float %call, float addrspace(1)* %a, align 4 502 ret void 503} 504 505declare float @_Z3madfff(float, float, float) 506 507; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0 508; GCN: store float %y, float addrspace(1)* %a 509define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) { 510entry: 511 %tmp = load float, float addrspace(1)* %a, align 4 512 %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) 513 store float %call, float addrspace(1)* %a, align 4 514 ret void 515} 516 517; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y 518; GCN: %fmaadd = fadd fast float %tmp, %y 519define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) { 520entry: 521 %tmp = load float, float addrspace(1)* %a, align 4 522 %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) 523 store float %call, float addrspace(1)* %a, align 4 524 ret void 525} 526 527; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy 528; GCN: %fmaadd = fadd fast float %tmp, %y 529define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) { 530entry: 531 %tmp = load float, float addrspace(1)* %a, align 4 532 %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) 533 store float %call, float addrspace(1)* %a, align 4 534 ret void 535} 536 537; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0 538; GCN: %fmamul = fmul fast float %tmp1, %tmp 539define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) { 540entry: 541 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 542 %tmp = load float, float addrspace(1)* %arrayidx, align 4 543 %tmp1 = load float, float addrspace(1)* %a, align 4 544 %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) 545 store float %call, float addrspace(1)* %a, align 4 546 ret void 547} 548 549; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp 550; GCN-NATIVE: call fast float @_Z10native_expf(float %tmp) 551define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) { 552entry: 553 %tmp = load float, float addrspace(1)* %a, align 4 554 %call = call fast float @_Z3expf(float %tmp) 555 store float %call, float addrspace(1)* %a, align 4 556 ret void 557} 558 559declare float @_Z3expf(float) 560 561; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 562; GCN-NATIVE: call fast float @_Z11native_exp2f(float %tmp) 563define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) { 564entry: 565 %tmp = load float, float addrspace(1)* %a, align 4 566 %call = call fast float @_Z4exp2f(float %tmp) 567 store float %call, float addrspace(1)* %a, align 4 568 ret void 569} 570 571declare float @_Z4exp2f(float) 572 573; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 574; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp) 575define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) { 576entry: 577 %tmp = load float, float addrspace(1)* %a, align 4 578 %call = call fast float @_Z5exp10f(float %tmp) 579 store float %call, float addrspace(1)* %a, align 4 580 ret void 581} 582 583declare float @_Z5exp10f(float) 584 585; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log 586; GCN-NATIVE: call fast float @_Z10native_logf(float %tmp) 587define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) { 588entry: 589 %tmp = load float, float addrspace(1)* %a, align 4 590 %call = call fast float @_Z3logf(float %tmp) 591 store float %call, float addrspace(1)* %a, align 4 592 ret void 593} 594 595declare float @_Z3logf(float) 596 597; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 598; GCN-NATIVE: call fast float @_Z11native_log2f(float %tmp) 599define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) { 600entry: 601 %tmp = load float, float addrspace(1)* %a, align 4 602 %call = call fast float @_Z4log2f(float %tmp) 603 store float %call, float addrspace(1)* %a, align 4 604 ret void 605} 606 607declare float @_Z4log2f(float) 608 609; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 610; GCN-NATIVE: call fast float @_Z12native_log10f(float %tmp) 611define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) { 612entry: 613 %tmp = load float, float addrspace(1)* %a, align 4 614 %call = call fast float @_Z5log10f(float %tmp) 615 store float %call, float addrspace(1)* %a, align 4 616 ret void 617} 618 619declare float @_Z5log10f(float) 620 621; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr 622; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 623; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) 624; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 625; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) 626; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 627define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) { 628entry: 629 %tmp = load float, float addrspace(1)* %a, align 4 630 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 631 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 632 %call = call fast float @_Z4powrff(float %tmp, float %tmp1) 633 store float %call, float addrspace(1)* %a, align 4 634 ret void 635} 636 637; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt 638; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp) 639define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) { 640entry: 641 %tmp = load float, float addrspace(1)* %a, align 4 642 %call = call fast float @_Z4sqrtf(float %tmp) 643 store float %call, float addrspace(1)* %a, align 4 644 ret void 645} 646 647; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 648; GCN: call fast double @_Z4sqrtd(double %tmp) 649define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) { 650entry: 651 %tmp = load double, double addrspace(1)* %a, align 8 652 %call = call fast double @_Z4sqrtd(double %tmp) 653 store double %call, double addrspace(1)* %a, align 8 654 ret void 655} 656 657declare float @_Z4sqrtf(float) 658declare double @_Z4sqrtd(double) 659 660; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt 661; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp) 662define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) { 663entry: 664 %tmp = load float, float addrspace(1)* %a, align 4 665 %call = call fast float @_Z5rsqrtf(float %tmp) 666 store float %call, float addrspace(1)* %a, align 4 667 ret void 668} 669 670declare float @_Z5rsqrtf(float) 671 672; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan 673; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp) 674define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) { 675entry: 676 %tmp = load float, float addrspace(1)* %a, align 4 677 %call = call fast float @_Z3tanf(float %tmp) 678 store float %call, float addrspace(1)* %a, align 4 679 ret void 680} 681 682declare float @_Z3tanf(float) 683 684; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos 685; GCN-NATIVE: call float @_Z10native_sinf(float %tmp) 686; GCN-NATIVE: call float @_Z10native_cosf(float %tmp) 687define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) { 688entry: 689 %tmp = load float, float addrspace(1)* %a, align 4 690 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 691 %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float* 692 %call = call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) 693 store float %call, float addrspace(1)* %a, align 4 694 ret void 695} 696 697declare float @_Z6sincosfPf(float, float*) 698 699%opencl.pipe_t = type opaque 700%opencl.reserve_id_t = type opaque 701 702; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) 703; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]] 704; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] 705define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { 706entry: 707 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* 708 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* 709 %tmp2 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 710 %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) 711 %tmp4 = call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 712 call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) 713 ret void 714} 715 716declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) 717 718declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) 719 720declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) 721 722declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) 723 724; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) 725; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]] 726; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] 727define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { 728entry: 729 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* 730 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* 731 %tmp2 = call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 732 %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 733 %tmp4 = call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 734 call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 735 ret void 736} 737 738declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr 739 740declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr 741 742declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr 743 744declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr 745 746%struct.S = type { [100 x i32] } 747 748; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size 749; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]] 750; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]] 751; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]] 752; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]] 753; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]] 754; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]] 755; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]] 756; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]] 757; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]] 758define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { 759entry: 760 %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* 761 %tmp1 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 762 %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* 763 %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8* 764 %tmp4 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 765 %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* 766 %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8* 767 %tmp7 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 768 %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* 769 %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8* 770 %tmp10 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 771 %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* 772 %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8* 773 %tmp13 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 774 %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* 775 %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8* 776 %tmp16 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 777 %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* 778 %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8* 779 %tmp19 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 780 %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* 781 %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8* 782 %tmp22 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 783 %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* 784 %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8* 785 %tmp25 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 786 ret void 787} 788 789; GCN-PRELINK: declare float @_Z4fabsf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]] 790; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]] 791; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]] 792 793; GCN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind } 794; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind readonly } 795attributes #0 = { nounwind } 796