1*cee313d2SEric Christopher; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -dce -S -o - %s | FileCheck %s 2*cee313d2SEric Christopher 3*cee313d2SEric Christophertarget datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 4*cee313d2SEric Christopher 5*cee313d2SEric Christopherdefine void @base_case(i1 %cnd, i32 addrspace(1)* %a, i32 addrspace(1)* %b, <3 x i32> addrspace(1)* %out) { 6*cee313d2SEric Christopher; CHECK-LABEL: @base_case 7*cee313d2SEric Christopher; CHECK: load <3 x i32> 8*cee313d2SEric Christopherentry: 9*cee313d2SEric Christopher %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 1 10*cee313d2SEric Christopher %gep2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 2 11*cee313d2SEric Christopher %gep4 = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 1 12*cee313d2SEric Christopher %gep5 = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 2 13*cee313d2SEric Christopher %selected = select i1 %cnd, i32 addrspace(1)* %a, i32 addrspace(1)* %b 14*cee313d2SEric Christopher %selected14 = select i1 %cnd, i32 addrspace(1)* %gep1, i32 addrspace(1)* %gep4 15*cee313d2SEric Christopher %selected25 = select i1 %cnd, i32 addrspace(1)* %gep2, i32 addrspace(1)* %gep5 16*cee313d2SEric Christopher %val0 = load i32, i32 addrspace(1)* %selected, align 4 17*cee313d2SEric Christopher %val1 = load i32, i32 addrspace(1)* %selected14, align 4 18*cee313d2SEric Christopher %val2 = load i32, i32 addrspace(1)* %selected25, align 4 19*cee313d2SEric Christopher %t0 = insertelement <3 x i32> undef, i32 %val0, i32 0 20*cee313d2SEric Christopher %t1 = insertelement <3 x i32> %t0, i32 %val1, i32 1 21*cee313d2SEric Christopher %t2 = insertelement <3 x i32> %t1, i32 %val2, i32 2 22*cee313d2SEric Christopher store <3 x i32> %t2, <3 x i32> addrspace(1)* %out 23*cee313d2SEric Christopher ret void 24*cee313d2SEric Christopher} 25*cee313d2SEric Christopher 26*cee313d2SEric Christopherdefine void @scev_targeting_complex_case(i1 %cnd, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %base, <2 x i32> addrspace(1)* %out) { 27*cee313d2SEric Christopher; CHECK-LABEL: @scev_targeting_complex_case 28*cee313d2SEric Christopher; CHECK: load <2 x i32> 29*cee313d2SEric Christopherentry: 30*cee313d2SEric Christopher %base.x4 = shl i32 %base, 2 31*cee313d2SEric Christopher %base.x4.p1 = add i32 %base.x4, 1 32*cee313d2SEric Christopher %base.x4.p2 = add i32 %base.x4, 2 33*cee313d2SEric Christopher %base.x4.p3 = add i32 %base.x4, 3 34*cee313d2SEric Christopher %zext.x4 = zext i32 %base.x4 to i64 35*cee313d2SEric Christopher %zext.x4.p1 = zext i32 %base.x4.p1 to i64 36*cee313d2SEric Christopher %zext.x4.p2 = zext i32 %base.x4.p2 to i64 37*cee313d2SEric Christopher %zext.x4.p3 = zext i32 %base.x4.p3 to i64 38*cee313d2SEric Christopher %base.x16 = mul i64 %zext.x4, 4 39*cee313d2SEric Christopher %base.x16.p4 = shl i64 %zext.x4.p1, 2 40*cee313d2SEric Christopher %base.x16.p8 = shl i64 %zext.x4.p2, 2 41*cee313d2SEric Christopher %base.x16.p12 = mul i64 %zext.x4.p3, 4 42*cee313d2SEric Christopher %a.pi8 = bitcast i32 addrspace(1)* %a to i8 addrspace(1)* 43*cee313d2SEric Christopher %b.pi8 = bitcast i32 addrspace(1)* %b to i8 addrspace(1)* 44*cee313d2SEric Christopher %gep.a.base.x16 = getelementptr inbounds i8, i8 addrspace(1)* %a.pi8, i64 %base.x16 45*cee313d2SEric Christopher %gep.b.base.x16.p4 = getelementptr inbounds i8, i8 addrspace(1)* %b.pi8, i64 %base.x16.p4 46*cee313d2SEric Christopher %gep.a.base.x16.p8 = getelementptr inbounds i8, i8 addrspace(1)* %a.pi8, i64 %base.x16.p8 47*cee313d2SEric Christopher %gep.b.base.x16.p12 = getelementptr inbounds i8, i8 addrspace(1)* %b.pi8, i64 %base.x16.p12 48*cee313d2SEric Christopher %a.base.x16 = bitcast i8 addrspace(1)* %gep.a.base.x16 to i32 addrspace(1)* 49*cee313d2SEric Christopher %b.base.x16.p4 = bitcast i8 addrspace(1)* %gep.b.base.x16.p4 to i32 addrspace(1)* 50*cee313d2SEric Christopher %selected.base.x16.p0.or.4 = select i1 %cnd, i32 addrspace(1)* %a.base.x16, i32 addrspace(1)* %b.base.x16.p4 51*cee313d2SEric Christopher %gep.selected.base.x16.p8.or.12 = select i1 %cnd, i8 addrspace(1)* %gep.a.base.x16.p8, i8 addrspace(1)* %gep.b.base.x16.p12 52*cee313d2SEric Christopher %selected.base.x16.p8.or.12 = bitcast i8 addrspace(1)* %gep.selected.base.x16.p8.or.12 to i32 addrspace(1)* 53*cee313d2SEric Christopher %selected.base.x16.p40.or.44 = getelementptr inbounds i32, i32 addrspace(1)* %selected.base.x16.p0.or.4, i64 10 54*cee313d2SEric Christopher %selected.base.x16.p44.or.48 = getelementptr inbounds i32, i32 addrspace(1)* %selected.base.x16.p8.or.12, i64 9 55*cee313d2SEric Christopher %val0 = load i32, i32 addrspace(1)* %selected.base.x16.p40.or.44, align 4 56*cee313d2SEric Christopher %val1 = load i32, i32 addrspace(1)* %selected.base.x16.p44.or.48, align 4 57*cee313d2SEric Christopher %t0 = insertelement <2 x i32> undef, i32 %val0, i32 0 58*cee313d2SEric Christopher %t1 = insertelement <2 x i32> %t0, i32 %val1, i32 1 59*cee313d2SEric Christopher store <2 x i32> %t1, <2 x i32> addrspace(1)* %out 60*cee313d2SEric Christopher ret void 61*cee313d2SEric Christopher} 62*cee313d2SEric Christopher 63*cee313d2SEric Christopherdefine void @nested_selects(i1 %cnd0, i1 %cnd1, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %base, <2 x i32> addrspace(1)* %out) { 64*cee313d2SEric Christopher; CHECK-LABEL: @nested_selects 65*cee313d2SEric Christopher; CHECK: load <2 x i32> 66*cee313d2SEric Christopherentry: 67*cee313d2SEric Christopher %base.p1 = add nsw i32 %base, 1 68*cee313d2SEric Christopher %base.p2 = add i32 %base, 2 69*cee313d2SEric Christopher %base.p3 = add nsw i32 %base, 3 70*cee313d2SEric Christopher %base.x4 = mul i32 %base, 4 71*cee313d2SEric Christopher %base.x4.p5 = add i32 %base.x4, 5 72*cee313d2SEric Christopher %base.x4.p6 = add i32 %base.x4, 6 73*cee313d2SEric Christopher %sext = sext i32 %base to i64 74*cee313d2SEric Christopher %sext.p1 = sext i32 %base.p1 to i64 75*cee313d2SEric Christopher %sext.p2 = sext i32 %base.p2 to i64 76*cee313d2SEric Christopher %sext.p3 = sext i32 %base.p3 to i64 77*cee313d2SEric Christopher %sext.x4.p5 = sext i32 %base.x4.p5 to i64 78*cee313d2SEric Christopher %sext.x4.p6 = sext i32 %base.x4.p6 to i64 79*cee313d2SEric Christopher %gep.a.base = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext 80*cee313d2SEric Christopher %gep.a.base.p1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.p1 81*cee313d2SEric Christopher %gep.a.base.p2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.p2 82*cee313d2SEric Christopher %gep.a.base.p3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.p3 83*cee313d2SEric Christopher %gep.b.base.x4.p5 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.x4.p5 84*cee313d2SEric Christopher %gep.b.base.x4.p6 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %sext.x4.p6 85*cee313d2SEric Christopher %selected.1.L = select i1 %cnd1, i32 addrspace(1)* %gep.a.base.p2, i32 addrspace(1)* %gep.b.base.x4.p5 86*cee313d2SEric Christopher %selected.1.R = select i1 %cnd1, i32 addrspace(1)* %gep.a.base.p3, i32 addrspace(1)* %gep.b.base.x4.p6 87*cee313d2SEric Christopher %selected.0.L = select i1 %cnd0, i32 addrspace(1)* %gep.a.base, i32 addrspace(1)* %selected.1.L 88*cee313d2SEric Christopher %selected.0.R = select i1 %cnd0, i32 addrspace(1)* %gep.a.base.p1, i32 addrspace(1)* %selected.1.R 89*cee313d2SEric Christopher %val0 = load i32, i32 addrspace(1)* %selected.0.L, align 4 90*cee313d2SEric Christopher %val1 = load i32, i32 addrspace(1)* %selected.0.R, align 4 91*cee313d2SEric Christopher %t0 = insertelement <2 x i32> undef, i32 %val0, i32 0 92*cee313d2SEric Christopher %t1 = insertelement <2 x i32> %t0, i32 %val1, i32 1 93*cee313d2SEric Christopher store <2 x i32> %t1, <2 x i32> addrspace(1)* %out 94*cee313d2SEric Christopher ret void 95*cee313d2SEric Christopher} 96