1; RUN: opt -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s 2; RUN: opt -load-store-vectorizer %s -S -o - | FileCheck %s 3 4target triple = "x86_64--" 5 6%union = type { { [4 x [4 x [4 x [16 x float]]]], [4 x [4 x [4 x [16 x float]]]], [10 x [10 x [4 x float]]] } } 7 8@global_pointer = external unnamed_addr global { %union, [2000 x i8] }, align 4 9 10; Function Attrs: convergent nounwind 11define void @test(i32 %base) #0 { 12; CHECK-LABEL: @test( 13; CHECK-NOT: load i32 14; CHECK: load <2 x i32> 15; CHECK-NOT: load i32 16entry: 17 %mul331 = and i32 %base, -4 18 %add350.4 = add i32 4, %mul331 19 %idx351.4 = zext i32 %add350.4 to i64 20 %arrayidx352.4 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.4 21 %tmp296.4 = bitcast float* %arrayidx352.4 to i32* 22 %add350.5 = add i32 5, %mul331 23 %idx351.5 = zext i32 %add350.5 to i64 24 %arrayidx352.5 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.5 25 %tmp296.5 = bitcast float* %arrayidx352.5 to i32* 26 %cnd = icmp ult i32 %base, 1000 27 br i1 %cnd, label %loads, label %exit 28 29loads: 30 ; If and only if the loads are in a different BB from the GEPs codegenprepare 31 ; would try to turn the GEPs into math, which makes LoadStoreVectorizer's job 32 ; harder 33 %tmp297.4 = load i32, i32* %tmp296.4, align 4, !tbaa !0 34 %tmp297.5 = load i32, i32* %tmp296.5, align 4, !tbaa !0 35 br label %exit 36 37exit: 38 ret void 39} 40 41; Function Attrs: convergent nounwind 42define void @test.codegenprepared(i32 %base) #0 { 43; CHECK-LABEL: @test.codegenprepared( 44; CHECK-NOT: load i32 45; CHECK: load <2 x i32> 46; CHECK-NOT: load i32 47entry: 48 %mul331 = and i32 %base, -4 49 %add350.4 = add i32 4, %mul331 50 %idx351.4 = zext i32 %add350.4 to i64 51 %add350.5 = add i32 5, %mul331 52 %idx351.5 = zext i32 %add350.5 to i64 53 %cnd = icmp ult i32 %base, 1000 54 br i1 %cnd, label %loads, label %exit 55 56loads: ; preds = %entry 57 %sunkaddr = mul i64 %idx351.4, 4 58 %sunkaddr1 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr 59 %sunkaddr2 = getelementptr inbounds i8, i8* %sunkaddr1, i64 4096 60 %0 = bitcast i8* %sunkaddr2 to i32* 61 %tmp297.4 = load i32, i32* %0, align 4, !tbaa !0 62 %sunkaddr3 = mul i64 %idx351.5, 4 63 %sunkaddr4 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr3 64 %sunkaddr5 = getelementptr inbounds i8, i8* %sunkaddr4, i64 4096 65 %1 = bitcast i8* %sunkaddr5 to i32* 66 %tmp297.5 = load i32, i32* %1, align 4, !tbaa !0 67 br label %exit 68 69exit: ; preds = %loads, %entry 70 ret void 71} 72 73attributes #0 = { convergent nounwind } 74 75!0 = !{!1, !1, i64 0} 76!1 = !{!"float", !2, i64 0} 77!2 = !{!"omnipotent char", !3, i64 0} 78!3 = !{!"Simple C++ TBAA"} 79