1*cee313d2SEric Christopher; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s 2*cee313d2SEric Christopher 3*cee313d2SEric Christophertarget datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4*cee313d2SEric Christopher 5*cee313d2SEric Christopher%struct.coordinate = type { i32, i32 } 6*cee313d2SEric Christopher 7*cee313d2SEric Christopher; Make sure that we don't generate a wide load when accessing the struct. 8*cee313d2SEric Christopher; struct coordinate { 9*cee313d2SEric Christopher; int x; 10*cee313d2SEric Christopher; int y; 11*cee313d2SEric Christopher; }; 12*cee313d2SEric Christopher; 13*cee313d2SEric Christopher; 14*cee313d2SEric Christopher; int foo(struct coordinate *A, int n) { 15*cee313d2SEric Christopher; 16*cee313d2SEric Christopher; int sum = 0; 17*cee313d2SEric Christopher; for (int i = 0; i < n; ++i) 18*cee313d2SEric Christopher; sum += A[i].x; 19*cee313d2SEric Christopher; 20*cee313d2SEric Christopher; return sum; 21*cee313d2SEric Christopher; } 22*cee313d2SEric Christopher 23*cee313d2SEric Christopher;CHECK-LABEL: @foo( 24*cee313d2SEric Christopher;CHECK-NOT: load <4 x i32> 25*cee313d2SEric Christopher;CHECK: ret 26*cee313d2SEric Christopherdefine i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp { 27*cee313d2SEric Christopherentry: 28*cee313d2SEric Christopher %cmp4 = icmp sgt i32 %n, 0 29*cee313d2SEric Christopher br i1 %cmp4, label %for.body, label %for.end 30*cee313d2SEric Christopher 31*cee313d2SEric Christopherfor.body: ; preds = %entry, %for.body 32*cee313d2SEric Christopher %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 33*cee313d2SEric Christopher %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 34*cee313d2SEric Christopher %x = getelementptr inbounds %struct.coordinate, %struct.coordinate* %A, i64 %indvars.iv, i32 0 35*cee313d2SEric Christopher %0 = load i32, i32* %x, align 4 36*cee313d2SEric Christopher %add = add nsw i32 %0, %sum.05 37*cee313d2SEric Christopher %indvars.iv.next = add i64 %indvars.iv, 1 38*cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 39*cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %n 40*cee313d2SEric Christopher br i1 %exitcond, label %for.end, label %for.body 41*cee313d2SEric Christopher 42*cee313d2SEric Christopherfor.end: ; preds = %for.body, %entry 43*cee313d2SEric Christopher %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] 44*cee313d2SEric Christopher ret i32 %sum.0.lcssa 45*cee313d2SEric Christopher} 46*cee313d2SEric Christopher 47*cee313d2SEric Christopher%struct.lit = type { i32 } 48*cee313d2SEric Christopher 49*cee313d2SEric Christopher; Verify that we still vectorize the access if the struct has the same size as 50*cee313d2SEric Christopher; the loaded element. 51*cee313d2SEric Christopher; struct lit { 52*cee313d2SEric Christopher; int x; 53*cee313d2SEric Christopher; }; 54*cee313d2SEric Christopher; 55*cee313d2SEric Christopher; 56*cee313d2SEric Christopher; int bar(struct lit *A, int n) { 57*cee313d2SEric Christopher; 58*cee313d2SEric Christopher; int sum = 0; 59*cee313d2SEric Christopher; for (int i = 0; i < n; ++i) 60*cee313d2SEric Christopher; sum += A[i].x; 61*cee313d2SEric Christopher; 62*cee313d2SEric Christopher; return sum; 63*cee313d2SEric Christopher; } 64*cee313d2SEric Christopher 65*cee313d2SEric Christopher;CHECK-LABEL: @bar( 66*cee313d2SEric Christopher;CHECK: load <4 x i32> 67*cee313d2SEric Christopher;CHECK: ret 68*cee313d2SEric Christopherdefine i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp { 69*cee313d2SEric Christopherentry: 70*cee313d2SEric Christopher %cmp4 = icmp sgt i32 %n, 0 71*cee313d2SEric Christopher br i1 %cmp4, label %for.body, label %for.end 72*cee313d2SEric Christopher 73*cee313d2SEric Christopherfor.body: ; preds = %entry, %for.body 74*cee313d2SEric Christopher %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 75*cee313d2SEric Christopher %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 76*cee313d2SEric Christopher %x = getelementptr inbounds %struct.lit, %struct.lit* %A, i64 %indvars.iv, i32 0 77*cee313d2SEric Christopher %0 = load i32, i32* %x, align 4 78*cee313d2SEric Christopher %add = add nsw i32 %0, %sum.05 79*cee313d2SEric Christopher %indvars.iv.next = add i64 %indvars.iv, 1 80*cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 81*cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %n 82*cee313d2SEric Christopher br i1 %exitcond, label %for.end, label %for.body 83*cee313d2SEric Christopher 84*cee313d2SEric Christopherfor.end: ; preds = %for.body, %entry 85*cee313d2SEric Christopher %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] 86*cee313d2SEric Christopher ret i32 %sum.0.lcssa 87*cee313d2SEric Christopher} 88