1*f31811f2SFangrui Song; RUN: opt -mtriple=amdgcn-amd-amdhsa -basic-aa -load-store-vectorizer -S -o - %s | FileCheck %s 2cee313d2SEric Christopher; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s 3cee313d2SEric Christopher 4cee313d2SEric Christophertarget datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 5cee313d2SEric Christopher 6cee313d2SEric Christopherdeclare i64 @_Z12get_local_idj(i32) 7cee313d2SEric Christopher 8cee313d2SEric Christopherdeclare i64 @_Z12get_group_idj(i32) 9cee313d2SEric Christopher 10cee313d2SEric Christopherdeclare double @llvm.fmuladd.f64(double, double, double) 11cee313d2SEric Christopher 12cee313d2SEric Christopher; CHECK-LABEL: @factorizedVsNonfactorizedAccess( 13cee313d2SEric Christopher; CHECK: load <2 x float> 14cee313d2SEric Christopher; CHECK: store <2 x float> 15cee313d2SEric Christopherdefine amdgpu_kernel void @factorizedVsNonfactorizedAccess(float addrspace(1)* nocapture %c) { 16cee313d2SEric Christopherentry: 17cee313d2SEric Christopher %call = tail call i64 @_Z12get_local_idj(i32 0) 18cee313d2SEric Christopher %call1 = tail call i64 @_Z12get_group_idj(i32 0) 19cee313d2SEric Christopher %div = lshr i64 %call, 4 20cee313d2SEric Christopher %div2 = lshr i64 %call1, 3 21cee313d2SEric Christopher %mul = shl i64 %div2, 7 22cee313d2SEric Christopher %rem = shl i64 %call, 3 23cee313d2SEric Christopher %mul3 = and i64 %rem, 120 24cee313d2SEric Christopher %add = or i64 %mul, %mul3 25cee313d2SEric Christopher %rem4 = shl i64 %call1, 7 26cee313d2SEric Christopher %mul5 = and i64 %rem4, 896 27cee313d2SEric Christopher %mul6 = shl nuw nsw i64 %div, 3 28cee313d2SEric Christopher %add7 = add nuw i64 %mul5, %mul6 29cee313d2SEric Christopher %mul9 = shl i64 %add7, 10 30cee313d2SEric Christopher %add10 = add i64 %mul9, %add 31cee313d2SEric Christopher %arrayidx = getelementptr inbounds float, float addrspace(1)* %c, i64 %add10 32cee313d2SEric Christopher %load1 = load float, float addrspace(1)* %arrayidx, align 4 33cee313d2SEric Christopher %conv = fpext float %load1 to double 34cee313d2SEric Christopher %mul11 = fmul double %conv, 0x3FEAB481D8F35506 35cee313d2SEric Christopher %conv12 = fptrunc double %mul11 to float 36cee313d2SEric Christopher %conv18 = fpext float %conv12 to double 37cee313d2SEric Christopher %storeval1 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv18) 38cee313d2SEric Christopher %cstoreval1 = fptrunc double %storeval1 to float 39cee313d2SEric Christopher store float %cstoreval1, float addrspace(1)* %arrayidx, align 4 40cee313d2SEric Christopher 41cee313d2SEric Christopher %add23 = or i64 %add10, 1 42cee313d2SEric Christopher %arrayidx24 = getelementptr inbounds float, float addrspace(1)* %c, i64 %add23 43cee313d2SEric Christopher %load2 = load float, float addrspace(1)* %arrayidx24, align 4 44cee313d2SEric Christopher %conv25 = fpext float %load2 to double 45cee313d2SEric Christopher %mul26 = fmul double %conv25, 0x3FEAB481D8F35506 46cee313d2SEric Christopher %conv27 = fptrunc double %mul26 to float 47cee313d2SEric Christopher %conv34 = fpext float %conv27 to double 48cee313d2SEric Christopher %storeval2 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv34) 49cee313d2SEric Christopher %cstoreval2 = fptrunc double %storeval2 to float 50cee313d2SEric Christopher store float %cstoreval2, float addrspace(1)* %arrayidx24, align 4 51cee313d2SEric Christopher ret void 52cee313d2SEric Christopher} 53