1; RUN: opt -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s
2; RUN: opt                 -load-store-vectorizer %s -S -o - | FileCheck %s
3
4target triple = "x86_64--"
5
6%union = type { { [4 x [4 x [4 x [16 x float]]]], [4 x [4 x [4 x [16 x float]]]], [10 x [10 x [4 x float]]] } }
7
8@global_pointer = external unnamed_addr global { %union, [2000 x i8] }, align 4
9
10; Function Attrs: convergent nounwind
11define void @test(i32 %base) #0 {
12; CHECK-LABEL: @test(
13; CHECK-NOT: load i32
14; CHECK: load <2 x i32>
15; CHECK-NOT: load i32
16entry:
17  %mul331 = and i32 %base, -4
18  %add350.4 = add i32 4, %mul331
19  %idx351.4 = zext i32 %add350.4 to i64
20  %arrayidx352.4 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.4
21  %tmp296.4 = bitcast float* %arrayidx352.4 to i32*
22  %add350.5 = add i32 5, %mul331
23  %idx351.5 = zext i32 %add350.5 to i64
24  %arrayidx352.5 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.5
25  %tmp296.5 = bitcast float* %arrayidx352.5 to i32*
26  %cnd = icmp ult i32 %base, 1000
27  br i1 %cnd, label %loads, label %exit
28
29loads:
30  ; If and only if the loads are in a different BB from the GEPs codegenprepare
31  ; would try to turn the GEPs into math, which makes LoadStoreVectorizer's job
32  ; harder
33  %tmp297.4 = load i32, i32* %tmp296.4, align 4, !tbaa !0
34  %tmp297.5 = load i32, i32* %tmp296.5, align 4, !tbaa !0
35  br label %exit
36
37exit:
38  ret void
39}
40
41; Function Attrs: convergent nounwind
42define void @test.codegenprepared(i32 %base) #0 {
43; CHECK-LABEL: @test.codegenprepared(
44; CHECK-NOT: load i32
45; CHECK: load <2 x i32>
46; CHECK-NOT: load i32
47entry:
48  %mul331 = and i32 %base, -4
49  %add350.4 = add i32 4, %mul331
50  %idx351.4 = zext i32 %add350.4 to i64
51  %add350.5 = add i32 5, %mul331
52  %idx351.5 = zext i32 %add350.5 to i64
53  %cnd = icmp ult i32 %base, 1000
54  br i1 %cnd, label %loads, label %exit
55
56loads:                                            ; preds = %entry
57  %sunkaddr = mul i64 %idx351.4, 4
58  %sunkaddr1 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr
59  %sunkaddr2 = getelementptr inbounds i8, i8* %sunkaddr1, i64 4096
60  %0 = bitcast i8* %sunkaddr2 to i32*
61  %tmp297.4 = load i32, i32* %0, align 4, !tbaa !0
62  %sunkaddr3 = mul i64 %idx351.5, 4
63  %sunkaddr4 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr3
64  %sunkaddr5 = getelementptr inbounds i8, i8* %sunkaddr4, i64 4096
65  %1 = bitcast i8* %sunkaddr5 to i32*
66  %tmp297.5 = load i32, i32* %1, align 4, !tbaa !0
67  br label %exit
68
69exit:                                             ; preds = %loads, %entry
70  ret void
71}
72
73attributes #0 = { convergent nounwind }
74
75!0 = !{!1, !1, i64 0}
76!1 = !{!"float", !2, i64 0}
77!2 = !{!"omnipotent char", !3, i64 0}
78!3 = !{!"Simple C++ TBAA"}
79