1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -force-vector-width=4 -loop-vectorize -mcpu=haswell < %s | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
5target triple = "x86_64-unknown-linux-gnu"
6
7;; This file includes tests for avoiding the need for a masked.load
8;; We don't need a masked.load for this due to deref facts, and can instead
9;; use a plain vector load.
10
11declare void @init(i32* nocapture nofree)
12
13;; For ease of explanation, this one demonstrates
14;; with a range check, but there are better lowering options specifically for
15;; this test (i.e. reducing the iteration space of the vector copy), so
16;; following tests are written more generically.
17define i32 @test_explicit_pred(i64 %len) {
18; CHECK-LABEL: @test_explicit_pred(
19; CHECK-NEXT:  entry:
20; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
21; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
22; CHECK-NEXT:    call void @init(i32* [[BASE]])
23; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
24; CHECK:       vector.ph:
25; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[LEN:%.*]], i32 0
26; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
27; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
28; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
29; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
30; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
31; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
32; CHECK-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> poison, <4 x i32> zeroinitializer
33; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
34; CHECK:       vector.body:
35; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
36; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
37; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ]
38; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ]
39; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
40; CHECK-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ]
41; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
42; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
43; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
44; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
45; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
46; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
47; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
48; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
49; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
50; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
51; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
52; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
53; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
54; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
55; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
56; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
57; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
58; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
59; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
60; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
61; CHECK-NEXT:    [[TMP17:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT8]]
62; CHECK-NEXT:    [[TMP18:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT10]]
63; CHECK-NEXT:    [[TMP19:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT12]]
64; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
65; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
66; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
67; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
68; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i32, i32* [[TMP20]], i32 0
69; CHECK-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
70; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP25]], align 4
71; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr i32, i32* [[TMP20]], i32 4
72; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
73; CHECK-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP27]], align 4
74; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i32, i32* [[TMP20]], i32 8
75; CHECK-NEXT:    [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
76; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
77; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr i32, i32* [[TMP20]], i32 12
78; CHECK-NEXT:    [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <4 x i32>*
79; CHECK-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP31]], align 4
80; CHECK-NEXT:    [[TMP32:%.*]] = xor <4 x i1> [[TMP16]], <i1 true, i1 true, i1 true, i1 true>
81; CHECK-NEXT:    [[TMP33:%.*]] = xor <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
82; CHECK-NEXT:    [[TMP34:%.*]] = xor <4 x i1> [[TMP18]], <i1 true, i1 true, i1 true, i1 true>
83; CHECK-NEXT:    [[TMP35:%.*]] = xor <4 x i1> [[TMP19]], <i1 true, i1 true, i1 true, i1 true>
84; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer
85; CHECK-NEXT:    [[PREDPHI16:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[WIDE_LOAD13]], <4 x i32> zeroinitializer
86; CHECK-NEXT:    [[PREDPHI17:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[WIDE_LOAD14]], <4 x i32> zeroinitializer
87; CHECK-NEXT:    [[PREDPHI18:%.*]] = select <4 x i1> [[TMP19]], <4 x i32> [[WIDE_LOAD15]], <4 x i32> zeroinitializer
88; CHECK-NEXT:    [[TMP36]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
89; CHECK-NEXT:    [[TMP37]] = add <4 x i32> [[VEC_PHI4]], [[PREDPHI16]]
90; CHECK-NEXT:    [[TMP38]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI17]]
91; CHECK-NEXT:    [[TMP39]] = add <4 x i32> [[VEC_PHI6]], [[PREDPHI18]]
92; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
93; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
94; CHECK-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
95; CHECK-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
96; CHECK:       middle.block:
97; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP37]], [[TMP36]]
98; CHECK-NEXT:    [[BIN_RDX19:%.*]] = add <4 x i32> [[TMP38]], [[BIN_RDX]]
99; CHECK-NEXT:    [[BIN_RDX20:%.*]] = add <4 x i32> [[TMP39]], [[BIN_RDX19]]
100; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX20]])
101; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
102; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
103; CHECK:       scalar.ph:
104; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
105; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ]
106; CHECK-NEXT:    br label [[LOOP:%.*]]
107; CHECK:       loop:
108; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
109; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
110; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
111; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp slt i64 [[IV]], [[LEN]]
112; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
113; CHECK:       pred:
114; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
115; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
116; CHECK-NEXT:    br label [[LATCH]]
117; CHECK:       latch:
118; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
119; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
120; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
121; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
122; CHECK:       loop_exit:
123; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ]
124; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
125;
126entry:
127  %alloca = alloca [4096 x i32]
128  %base = bitcast [4096 x i32]* %alloca to i32*
129  call void @init(i32* %base)
130  br label %loop
131loop:
132  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
133  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
134  %iv.next = add i64 %iv, 1
135  %earlycnd = icmp slt i64 %iv, %len
136  br i1 %earlycnd, label %pred, label %latch
137pred:
138  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
139  %val = load i32, i32* %addr
140  br label %latch
141latch:
142  %val.phi = phi i32 [0, %loop], [%val, %pred]
143  %accum.next = add i32 %accum, %val.phi
144  %exit = icmp ugt i64 %iv, 4094
145  br i1 %exit, label %loop_exit, label %loop
146
147loop_exit:
148  ret i32 %accum.next
149}
150
151;; Similiar to the above, but without an analyzeable condition.
152define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) {
153; CHECK-LABEL: @test_explicit_pred_generic(
154; CHECK-NEXT:  entry:
155; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
156; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
157; CHECK-NEXT:    call void @init(i32* [[BASE]])
158; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
159; CHECK:       vector.ph:
160; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
161; CHECK:       vector.body:
162; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
163; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
164; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
165; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
166; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
167; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
168; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
169; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
170; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
171; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
172; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
173; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
174; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
175; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
176; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
177; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
178; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
179; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
180; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
181; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
182; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
183; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
184; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
185; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
186; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
187; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
188; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
189; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
190; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
191; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
192; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
193; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
194; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
195; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
196; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
197; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
198; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
199; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
200; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
201; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
202; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
203; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
204; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
205; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
206; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
207; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
208; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
209; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
210; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
211; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
212; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
213; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
214; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
215; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
216; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
217; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
218; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
219; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
220; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
221; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
222; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
223; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
224; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
225; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
226; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
227; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
228; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
229; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
230; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
231; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
232; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
233; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
234; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
235; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
236; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
237; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
238; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
239; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
240; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4
241; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
242; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
243; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4
244; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
245; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
246; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4
247; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
248; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
249; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
250; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
251; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer
252; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer
253; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer
254; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer
255; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
256; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
257; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
258; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
259; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
260; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
261; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
262; CHECK:       middle.block:
263; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
264; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
265; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
266; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
267; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
268; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
269; CHECK:       scalar.ph:
270; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
271; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
272; CHECK-NEXT:    br label [[LOOP:%.*]]
273; CHECK:       loop:
274; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
275; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
276; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
277; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
278; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
279; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
280; CHECK:       pred:
281; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
282; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
283; CHECK-NEXT:    br label [[LATCH]]
284; CHECK:       latch:
285; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
286; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
287; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
288; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
289; CHECK:       loop_exit:
290; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
291; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
292;
293entry:
294  %alloca = alloca [4096 x i32]
295  %base = bitcast [4096 x i32]* %alloca to i32*
296  call void @init(i32* %base)
297  br label %loop
298loop:
299  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
300  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
301  %iv.next = add i64 %iv, 1
302  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
303  %earlycnd = load i1, i1* %test_addr
304  br i1 %earlycnd, label %pred, label %latch
305pred:
306  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
307  %val = load i32, i32* %addr
308  br label %latch
309latch:
310  %val.phi = phi i32 [0, %loop], [%val, %pred]
311  %accum.next = add i32 %accum, %val.phi
312  %exit = icmp ugt i64 %iv, 4094
313  br i1 %exit, label %loop_exit, label %loop
314
315loop_exit:
316  ret i32 %accum.next
317}
318
319; Trivial case where the address loaded from it loop invariant (and yes,
320; there are better lowerings, this is a test of robustness of vectorization,
321; nothing more.)
322; TODO: currently shows predication which can be removed
323define i32 @test_invariant_address(i64 %len, i1* %test_base) {
324; CHECK-LABEL: @test_invariant_address(
325; CHECK-NEXT:  entry:
326; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
327; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
328; CHECK-NEXT:    call void @init(i32* [[BASE]])
329; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
330; CHECK:       vector.ph:
331; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
332; CHECK:       vector.body:
333; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
334; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP100:%.*]], [[VECTOR_BODY]] ]
335; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP101:%.*]], [[VECTOR_BODY]] ]
336; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP102:%.*]], [[VECTOR_BODY]] ]
337; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP103:%.*]], [[VECTOR_BODY]] ]
338; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
339; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
340; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
341; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
342; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
343; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
344; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
345; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
346; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
347; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
348; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
349; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
350; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
351; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
352; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
353; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
354; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
355; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
356; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
357; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
358; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
359; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
360; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
361; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
362; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
363; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
364; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
365; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
366; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
367; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
368; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
369; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
370; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
371; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
372; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
373; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
374; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
375; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
376; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
377; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
378; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
379; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
380; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
381; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
382; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
383; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
384; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
385; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
386; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
387; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
388; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
389; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
390; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
391; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
392; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
393; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
394; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
395; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
396; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
397; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
398; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
399; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
400; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
401; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
402; CHECK-NEXT:    [[TMP64:%.*]] = load i32, i32* [[BASE]], align 4
403; CHECK-NEXT:    [[TMP65:%.*]] = load i32, i32* [[BASE]], align 4
404; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[BASE]], align 4
405; CHECK-NEXT:    [[TMP67:%.*]] = load i32, i32* [[BASE]], align 4
406; CHECK-NEXT:    [[TMP68:%.*]] = insertelement <4 x i32> poison, i32 [[TMP64]], i32 0
407; CHECK-NEXT:    [[TMP69:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP65]], i32 1
408; CHECK-NEXT:    [[TMP70:%.*]] = insertelement <4 x i32> [[TMP69]], i32 [[TMP66]], i32 2
409; CHECK-NEXT:    [[TMP71:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP67]], i32 3
410; CHECK-NEXT:    [[TMP72:%.*]] = load i32, i32* [[BASE]], align 4
411; CHECK-NEXT:    [[TMP73:%.*]] = load i32, i32* [[BASE]], align 4
412; CHECK-NEXT:    [[TMP74:%.*]] = load i32, i32* [[BASE]], align 4
413; CHECK-NEXT:    [[TMP75:%.*]] = load i32, i32* [[BASE]], align 4
414; CHECK-NEXT:    [[TMP76:%.*]] = insertelement <4 x i32> poison, i32 [[TMP72]], i32 0
415; CHECK-NEXT:    [[TMP77:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP73]], i32 1
416; CHECK-NEXT:    [[TMP78:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP74]], i32 2
417; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP75]], i32 3
418; CHECK-NEXT:    [[TMP80:%.*]] = load i32, i32* [[BASE]], align 4
419; CHECK-NEXT:    [[TMP81:%.*]] = load i32, i32* [[BASE]], align 4
420; CHECK-NEXT:    [[TMP82:%.*]] = load i32, i32* [[BASE]], align 4
421; CHECK-NEXT:    [[TMP83:%.*]] = load i32, i32* [[BASE]], align 4
422; CHECK-NEXT:    [[TMP84:%.*]] = insertelement <4 x i32> poison, i32 [[TMP80]], i32 0
423; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <4 x i32> [[TMP84]], i32 [[TMP81]], i32 1
424; CHECK-NEXT:    [[TMP86:%.*]] = insertelement <4 x i32> [[TMP85]], i32 [[TMP82]], i32 2
425; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP83]], i32 3
426; CHECK-NEXT:    [[TMP88:%.*]] = load i32, i32* [[BASE]], align 4
427; CHECK-NEXT:    [[TMP89:%.*]] = load i32, i32* [[BASE]], align 4
428; CHECK-NEXT:    [[TMP90:%.*]] = load i32, i32* [[BASE]], align 4
429; CHECK-NEXT:    [[TMP91:%.*]] = load i32, i32* [[BASE]], align 4
430; CHECK-NEXT:    [[TMP92:%.*]] = insertelement <4 x i32> poison, i32 [[TMP88]], i32 0
431; CHECK-NEXT:    [[TMP93:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP89]], i32 1
432; CHECK-NEXT:    [[TMP94:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP90]], i32 2
433; CHECK-NEXT:    [[TMP95:%.*]] = insertelement <4 x i32> [[TMP94]], i32 [[TMP91]], i32 3
434; CHECK-NEXT:    [[TMP96:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
435; CHECK-NEXT:    [[TMP97:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
436; CHECK-NEXT:    [[TMP98:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
437; CHECK-NEXT:    [[TMP99:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
438; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP71]], <4 x i32> zeroinitializer
439; CHECK-NEXT:    [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP79]], <4 x i32> zeroinitializer
440; CHECK-NEXT:    [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP87]], <4 x i32> zeroinitializer
441; CHECK-NEXT:    [[PREDPHI6:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer
442; CHECK-NEXT:    [[TMP100]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
443; CHECK-NEXT:    [[TMP101]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI4]]
444; CHECK-NEXT:    [[TMP102]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]]
445; CHECK-NEXT:    [[TMP103]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]]
446; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
447; CHECK-NEXT:    [[TMP104:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
448; CHECK-NEXT:    br i1 [[TMP104]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
449; CHECK:       middle.block:
450; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP101]], [[TMP100]]
451; CHECK-NEXT:    [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP102]], [[BIN_RDX]]
452; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP103]], [[BIN_RDX7]]
453; CHECK-NEXT:    [[TMP105:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]])
454; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
455; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
456; CHECK:       scalar.ph:
457; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
458; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP105]], [[MIDDLE_BLOCK]] ]
459; CHECK-NEXT:    br label [[LOOP:%.*]]
460; CHECK:       loop:
461; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
462; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
463; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
464; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
465; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
466; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
467; CHECK:       pred:
468; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[BASE]], align 4
469; CHECK-NEXT:    br label [[LATCH]]
470; CHECK:       latch:
471; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
472; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
473; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
474; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
475; CHECK:       loop_exit:
476; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP105]], [[MIDDLE_BLOCK]] ]
477; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
478;
479entry:
480  %alloca = alloca [4096 x i32]
481  %base = bitcast [4096 x i32]* %alloca to i32*
482  call void @init(i32* %base)
483  br label %loop
484loop:
485  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
486  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
487  %iv.next = add i64 %iv, 1
488  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
489  %earlycnd = load i1, i1* %test_addr
490  br i1 %earlycnd, label %pred, label %latch
491pred:
492  %val = load i32, i32* %base
493  br label %latch
494latch:
495  %val.phi = phi i32 [0, %loop], [%val, %pred]
496  %accum.next = add i32 %accum, %val.phi
497  %exit = icmp ugt i64 %iv, 4094
498  br i1 %exit, label %loop_exit, label %loop
499
500loop_exit:
501  ret i32 %accum.next
502}
503
504; Overlapping loads - Fails alignment checking, not dereferenceability
505define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
506; CHECK-LABEL: @test_step_narrower_than_access(
507; CHECK-NEXT:  entry:
508; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
509; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
510; CHECK-NEXT:    call void @init(i32* [[BASE]])
511; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
512; CHECK:       vector.ph:
513; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
514; CHECK:       vector.body:
515; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE33:%.*]] ]
516; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP180:%.*]], [[PRED_LOAD_CONTINUE33]] ]
517; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP181:%.*]], [[PRED_LOAD_CONTINUE33]] ]
518; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP182:%.*]], [[PRED_LOAD_CONTINUE33]] ]
519; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP183:%.*]], [[PRED_LOAD_CONTINUE33]] ]
520; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
521; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
522; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
523; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
524; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
525; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
526; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
527; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
528; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
529; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
530; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
531; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
532; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
533; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
534; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
535; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
536; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
537; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
538; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
539; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
540; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
541; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
542; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
543; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
544; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
545; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
546; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
547; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
548; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
549; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
550; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
551; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
552; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
553; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
554; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
555; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
556; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
557; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
558; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
559; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
560; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
561; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
562; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
563; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
564; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
565; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
566; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
567; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
568; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
569; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
570; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
571; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
572; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
573; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
574; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
575; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
576; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
577; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
578; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
579; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
580; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
581; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
582; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
583; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
584; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0
585; CHECK-NEXT:    br i1 [[TMP64]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
586; CHECK:       pred.load.if:
587; CHECK-NEXT:    [[TMP65:%.*]] = bitcast i32* [[BASE]] to i16*
588; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i16, i16* [[TMP65]], i64 [[TMP0]]
589; CHECK-NEXT:    [[TMP67:%.*]] = bitcast i16* [[TMP66]] to i32*
590; CHECK-NEXT:    [[TMP68:%.*]] = load i32, i32* [[TMP67]], align 4
591; CHECK-NEXT:    [[TMP69:%.*]] = insertelement <4 x i32> poison, i32 [[TMP68]], i32 0
592; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
593; CHECK:       pred.load.continue:
594; CHECK-NEXT:    [[TMP70:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP69]], [[PRED_LOAD_IF]] ]
595; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1
596; CHECK-NEXT:    br i1 [[TMP71]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
597; CHECK:       pred.load.if4:
598; CHECK-NEXT:    [[TMP72:%.*]] = bitcast i32* [[BASE]] to i16*
599; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i16, i16* [[TMP72]], i64 [[TMP1]]
600; CHECK-NEXT:    [[TMP74:%.*]] = bitcast i16* [[TMP73]] to i32*
601; CHECK-NEXT:    [[TMP75:%.*]] = load i32, i32* [[TMP74]], align 4
602; CHECK-NEXT:    [[TMP76:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP75]], i32 1
603; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE5]]
604; CHECK:       pred.load.continue5:
605; CHECK-NEXT:    [[TMP77:%.*]] = phi <4 x i32> [ [[TMP70]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP76]], [[PRED_LOAD_IF4]] ]
606; CHECK-NEXT:    [[TMP78:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2
607; CHECK-NEXT:    br i1 [[TMP78]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
608; CHECK:       pred.load.if6:
609; CHECK-NEXT:    [[TMP79:%.*]] = bitcast i32* [[BASE]] to i16*
610; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds i16, i16* [[TMP79]], i64 [[TMP2]]
611; CHECK-NEXT:    [[TMP81:%.*]] = bitcast i16* [[TMP80]] to i32*
612; CHECK-NEXT:    [[TMP82:%.*]] = load i32, i32* [[TMP81]], align 4
613; CHECK-NEXT:    [[TMP83:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP82]], i32 2
614; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
615; CHECK:       pred.load.continue7:
616; CHECK-NEXT:    [[TMP84:%.*]] = phi <4 x i32> [ [[TMP77]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP83]], [[PRED_LOAD_IF6]] ]
617; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3
618; CHECK-NEXT:    br i1 [[TMP85]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
619; CHECK:       pred.load.if8:
620; CHECK-NEXT:    [[TMP86:%.*]] = bitcast i32* [[BASE]] to i16*
621; CHECK-NEXT:    [[TMP87:%.*]] = getelementptr inbounds i16, i16* [[TMP86]], i64 [[TMP3]]
622; CHECK-NEXT:    [[TMP88:%.*]] = bitcast i16* [[TMP87]] to i32*
623; CHECK-NEXT:    [[TMP89:%.*]] = load i32, i32* [[TMP88]], align 4
624; CHECK-NEXT:    [[TMP90:%.*]] = insertelement <4 x i32> [[TMP84]], i32 [[TMP89]], i32 3
625; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
626; CHECK:       pred.load.continue9:
627; CHECK-NEXT:    [[TMP91:%.*]] = phi <4 x i32> [ [[TMP84]], [[PRED_LOAD_CONTINUE7]] ], [ [[TMP90]], [[PRED_LOAD_IF8]] ]
628; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <4 x i1> [[TMP47]], i32 0
629; CHECK-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
630; CHECK:       pred.load.if10:
631; CHECK-NEXT:    [[TMP93:%.*]] = bitcast i32* [[BASE]] to i16*
632; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i16, i16* [[TMP93]], i64 [[TMP4]]
633; CHECK-NEXT:    [[TMP95:%.*]] = bitcast i16* [[TMP94]] to i32*
634; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
635; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> poison, i32 [[TMP96]], i32 0
636; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
637; CHECK:       pred.load.continue11:
638; CHECK-NEXT:    [[TMP98:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP97]], [[PRED_LOAD_IF10]] ]
639; CHECK-NEXT:    [[TMP99:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1
640; CHECK-NEXT:    br i1 [[TMP99]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
641; CHECK:       pred.load.if12:
642; CHECK-NEXT:    [[TMP100:%.*]] = bitcast i32* [[BASE]] to i16*
643; CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds i16, i16* [[TMP100]], i64 [[TMP5]]
644; CHECK-NEXT:    [[TMP102:%.*]] = bitcast i16* [[TMP101]] to i32*
645; CHECK-NEXT:    [[TMP103:%.*]] = load i32, i32* [[TMP102]], align 4
646; CHECK-NEXT:    [[TMP104:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP103]], i32 1
647; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
648; CHECK:       pred.load.continue13:
649; CHECK-NEXT:    [[TMP105:%.*]] = phi <4 x i32> [ [[TMP98]], [[PRED_LOAD_CONTINUE11]] ], [ [[TMP104]], [[PRED_LOAD_IF12]] ]
650; CHECK-NEXT:    [[TMP106:%.*]] = extractelement <4 x i1> [[TMP47]], i32 2
651; CHECK-NEXT:    br i1 [[TMP106]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
652; CHECK:       pred.load.if14:
653; CHECK-NEXT:    [[TMP107:%.*]] = bitcast i32* [[BASE]] to i16*
654; CHECK-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i16, i16* [[TMP107]], i64 [[TMP6]]
655; CHECK-NEXT:    [[TMP109:%.*]] = bitcast i16* [[TMP108]] to i32*
656; CHECK-NEXT:    [[TMP110:%.*]] = load i32, i32* [[TMP109]], align 4
657; CHECK-NEXT:    [[TMP111:%.*]] = insertelement <4 x i32> [[TMP105]], i32 [[TMP110]], i32 2
658; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
659; CHECK:       pred.load.continue15:
660; CHECK-NEXT:    [[TMP112:%.*]] = phi <4 x i32> [ [[TMP105]], [[PRED_LOAD_CONTINUE13]] ], [ [[TMP111]], [[PRED_LOAD_IF14]] ]
661; CHECK-NEXT:    [[TMP113:%.*]] = extractelement <4 x i1> [[TMP47]], i32 3
662; CHECK-NEXT:    br i1 [[TMP113]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
663; CHECK:       pred.load.if16:
664; CHECK-NEXT:    [[TMP114:%.*]] = bitcast i32* [[BASE]] to i16*
665; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i16, i16* [[TMP114]], i64 [[TMP7]]
666; CHECK-NEXT:    [[TMP116:%.*]] = bitcast i16* [[TMP115]] to i32*
667; CHECK-NEXT:    [[TMP117:%.*]] = load i32, i32* [[TMP116]], align 4
668; CHECK-NEXT:    [[TMP118:%.*]] = insertelement <4 x i32> [[TMP112]], i32 [[TMP117]], i32 3
669; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
670; CHECK:       pred.load.continue17:
671; CHECK-NEXT:    [[TMP119:%.*]] = phi <4 x i32> [ [[TMP112]], [[PRED_LOAD_CONTINUE15]] ], [ [[TMP118]], [[PRED_LOAD_IF16]] ]
672; CHECK-NEXT:    [[TMP120:%.*]] = extractelement <4 x i1> [[TMP55]], i32 0
673; CHECK-NEXT:    br i1 [[TMP120]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
674; CHECK:       pred.load.if18:
675; CHECK-NEXT:    [[TMP121:%.*]] = bitcast i32* [[BASE]] to i16*
676; CHECK-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i16, i16* [[TMP121]], i64 [[TMP8]]
677; CHECK-NEXT:    [[TMP123:%.*]] = bitcast i16* [[TMP122]] to i32*
678; CHECK-NEXT:    [[TMP124:%.*]] = load i32, i32* [[TMP123]], align 4
679; CHECK-NEXT:    [[TMP125:%.*]] = insertelement <4 x i32> poison, i32 [[TMP124]], i32 0
680; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
681; CHECK:       pred.load.continue19:
682; CHECK-NEXT:    [[TMP126:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP125]], [[PRED_LOAD_IF18]] ]
683; CHECK-NEXT:    [[TMP127:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1
684; CHECK-NEXT:    br i1 [[TMP127]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
685; CHECK:       pred.load.if20:
686; CHECK-NEXT:    [[TMP128:%.*]] = bitcast i32* [[BASE]] to i16*
687; CHECK-NEXT:    [[TMP129:%.*]] = getelementptr inbounds i16, i16* [[TMP128]], i64 [[TMP9]]
688; CHECK-NEXT:    [[TMP130:%.*]] = bitcast i16* [[TMP129]] to i32*
689; CHECK-NEXT:    [[TMP131:%.*]] = load i32, i32* [[TMP130]], align 4
690; CHECK-NEXT:    [[TMP132:%.*]] = insertelement <4 x i32> [[TMP126]], i32 [[TMP131]], i32 1
691; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
692; CHECK:       pred.load.continue21:
693; CHECK-NEXT:    [[TMP133:%.*]] = phi <4 x i32> [ [[TMP126]], [[PRED_LOAD_CONTINUE19]] ], [ [[TMP132]], [[PRED_LOAD_IF20]] ]
694; CHECK-NEXT:    [[TMP134:%.*]] = extractelement <4 x i1> [[TMP55]], i32 2
695; CHECK-NEXT:    br i1 [[TMP134]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
696; CHECK:       pred.load.if22:
697; CHECK-NEXT:    [[TMP135:%.*]] = bitcast i32* [[BASE]] to i16*
698; CHECK-NEXT:    [[TMP136:%.*]] = getelementptr inbounds i16, i16* [[TMP135]], i64 [[TMP10]]
699; CHECK-NEXT:    [[TMP137:%.*]] = bitcast i16* [[TMP136]] to i32*
700; CHECK-NEXT:    [[TMP138:%.*]] = load i32, i32* [[TMP137]], align 4
701; CHECK-NEXT:    [[TMP139:%.*]] = insertelement <4 x i32> [[TMP133]], i32 [[TMP138]], i32 2
702; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
703; CHECK:       pred.load.continue23:
704; CHECK-NEXT:    [[TMP140:%.*]] = phi <4 x i32> [ [[TMP133]], [[PRED_LOAD_CONTINUE21]] ], [ [[TMP139]], [[PRED_LOAD_IF22]] ]
705; CHECK-NEXT:    [[TMP141:%.*]] = extractelement <4 x i1> [[TMP55]], i32 3
706; CHECK-NEXT:    br i1 [[TMP141]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
707; CHECK:       pred.load.if24:
708; CHECK-NEXT:    [[TMP142:%.*]] = bitcast i32* [[BASE]] to i16*
709; CHECK-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i16, i16* [[TMP142]], i64 [[TMP11]]
710; CHECK-NEXT:    [[TMP144:%.*]] = bitcast i16* [[TMP143]] to i32*
711; CHECK-NEXT:    [[TMP145:%.*]] = load i32, i32* [[TMP144]], align 4
712; CHECK-NEXT:    [[TMP146:%.*]] = insertelement <4 x i32> [[TMP140]], i32 [[TMP145]], i32 3
713; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
714; CHECK:       pred.load.continue25:
715; CHECK-NEXT:    [[TMP147:%.*]] = phi <4 x i32> [ [[TMP140]], [[PRED_LOAD_CONTINUE23]] ], [ [[TMP146]], [[PRED_LOAD_IF24]] ]
716; CHECK-NEXT:    [[TMP148:%.*]] = extractelement <4 x i1> [[TMP63]], i32 0
717; CHECK-NEXT:    br i1 [[TMP148]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
718; CHECK:       pred.load.if26:
719; CHECK-NEXT:    [[TMP149:%.*]] = bitcast i32* [[BASE]] to i16*
720; CHECK-NEXT:    [[TMP150:%.*]] = getelementptr inbounds i16, i16* [[TMP149]], i64 [[TMP12]]
721; CHECK-NEXT:    [[TMP151:%.*]] = bitcast i16* [[TMP150]] to i32*
722; CHECK-NEXT:    [[TMP152:%.*]] = load i32, i32* [[TMP151]], align 4
723; CHECK-NEXT:    [[TMP153:%.*]] = insertelement <4 x i32> poison, i32 [[TMP152]], i32 0
724; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
725; CHECK:       pred.load.continue27:
726; CHECK-NEXT:    [[TMP154:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP153]], [[PRED_LOAD_IF26]] ]
727; CHECK-NEXT:    [[TMP155:%.*]] = extractelement <4 x i1> [[TMP63]], i32 1
728; CHECK-NEXT:    br i1 [[TMP155]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
729; CHECK:       pred.load.if28:
730; CHECK-NEXT:    [[TMP156:%.*]] = bitcast i32* [[BASE]] to i16*
731; CHECK-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i16, i16* [[TMP156]], i64 [[TMP13]]
732; CHECK-NEXT:    [[TMP158:%.*]] = bitcast i16* [[TMP157]] to i32*
733; CHECK-NEXT:    [[TMP159:%.*]] = load i32, i32* [[TMP158]], align 4
734; CHECK-NEXT:    [[TMP160:%.*]] = insertelement <4 x i32> [[TMP154]], i32 [[TMP159]], i32 1
735; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
736; CHECK:       pred.load.continue29:
737; CHECK-NEXT:    [[TMP161:%.*]] = phi <4 x i32> [ [[TMP154]], [[PRED_LOAD_CONTINUE27]] ], [ [[TMP160]], [[PRED_LOAD_IF28]] ]
738; CHECK-NEXT:    [[TMP162:%.*]] = extractelement <4 x i1> [[TMP63]], i32 2
739; CHECK-NEXT:    br i1 [[TMP162]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
740; CHECK:       pred.load.if30:
741; CHECK-NEXT:    [[TMP163:%.*]] = bitcast i32* [[BASE]] to i16*
742; CHECK-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i16, i16* [[TMP163]], i64 [[TMP14]]
743; CHECK-NEXT:    [[TMP165:%.*]] = bitcast i16* [[TMP164]] to i32*
744; CHECK-NEXT:    [[TMP166:%.*]] = load i32, i32* [[TMP165]], align 4
745; CHECK-NEXT:    [[TMP167:%.*]] = insertelement <4 x i32> [[TMP161]], i32 [[TMP166]], i32 2
746; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
747; CHECK:       pred.load.continue31:
748; CHECK-NEXT:    [[TMP168:%.*]] = phi <4 x i32> [ [[TMP161]], [[PRED_LOAD_CONTINUE29]] ], [ [[TMP167]], [[PRED_LOAD_IF30]] ]
749; CHECK-NEXT:    [[TMP169:%.*]] = extractelement <4 x i1> [[TMP63]], i32 3
750; CHECK-NEXT:    br i1 [[TMP169]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33]]
751; CHECK:       pred.load.if32:
752; CHECK-NEXT:    [[TMP170:%.*]] = bitcast i32* [[BASE]] to i16*
753; CHECK-NEXT:    [[TMP171:%.*]] = getelementptr inbounds i16, i16* [[TMP170]], i64 [[TMP15]]
754; CHECK-NEXT:    [[TMP172:%.*]] = bitcast i16* [[TMP171]] to i32*
755; CHECK-NEXT:    [[TMP173:%.*]] = load i32, i32* [[TMP172]], align 4
756; CHECK-NEXT:    [[TMP174:%.*]] = insertelement <4 x i32> [[TMP168]], i32 [[TMP173]], i32 3
757; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
758; CHECK:       pred.load.continue33:
759; CHECK-NEXT:    [[TMP175:%.*]] = phi <4 x i32> [ [[TMP168]], [[PRED_LOAD_CONTINUE31]] ], [ [[TMP174]], [[PRED_LOAD_IF32]] ]
760; CHECK-NEXT:    [[TMP176:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
761; CHECK-NEXT:    [[TMP177:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
762; CHECK-NEXT:    [[TMP178:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
763; CHECK-NEXT:    [[TMP179:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
764; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP91]], <4 x i32> zeroinitializer
765; CHECK-NEXT:    [[PREDPHI34:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP119]], <4 x i32> zeroinitializer
766; CHECK-NEXT:    [[PREDPHI35:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP147]], <4 x i32> zeroinitializer
767; CHECK-NEXT:    [[PREDPHI36:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP175]], <4 x i32> zeroinitializer
768; CHECK-NEXT:    [[TMP180]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
769; CHECK-NEXT:    [[TMP181]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI34]]
770; CHECK-NEXT:    [[TMP182]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI35]]
771; CHECK-NEXT:    [[TMP183]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI36]]
772; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
773; CHECK-NEXT:    [[TMP184:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
774; CHECK-NEXT:    br i1 [[TMP184]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
775; CHECK:       middle.block:
776; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP181]], [[TMP180]]
777; CHECK-NEXT:    [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP182]], [[BIN_RDX]]
778; CHECK-NEXT:    [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP183]], [[BIN_RDX37]]
779; CHECK-NEXT:    [[TMP185:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]])
780; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
781; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
782; CHECK:       scalar.ph:
783; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
784; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP185]], [[MIDDLE_BLOCK]] ]
785; CHECK-NEXT:    br label [[LOOP:%.*]]
786; CHECK:       loop:
787; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
788; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
789; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
790; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
791; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
792; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
793; CHECK:       pred:
794; CHECK-NEXT:    [[BASE_I16P:%.*]] = bitcast i32* [[BASE]] to i16*
795; CHECK-NEXT:    [[ADDR_I16P:%.*]] = getelementptr inbounds i16, i16* [[BASE_I16P]], i64 [[IV]]
796; CHECK-NEXT:    [[ADDR:%.*]] = bitcast i16* [[ADDR_I16P]] to i32*
797; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
798; CHECK-NEXT:    br label [[LATCH]]
799; CHECK:       latch:
800; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
801; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
802; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
803; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
804; CHECK:       loop_exit:
805; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP185]], [[MIDDLE_BLOCK]] ]
806; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
807;
808entry:
809  %alloca = alloca [4096 x i32]
810  %base = bitcast [4096 x i32]* %alloca to i32*
811  call void @init(i32* %base)
812  br label %loop
813loop:
814  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
815  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
816  %iv.next = add i64 %iv, 1
817  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
818  %earlycnd = load i1, i1* %test_addr
819  br i1 %earlycnd, label %pred, label %latch
820pred:
821  %base.i16p = bitcast i32* %base to i16*
822  %addr.i16p = getelementptr inbounds i16, i16* %base.i16p, i64 %iv
823  %addr = bitcast i16* %addr.i16p to i32*
824  %val = load i32, i32* %addr
825  br label %latch
826latch:
827  %val.phi = phi i32 [0, %loop], [%val, %pred]
828  %accum.next = add i32 %accum, %val.phi
829  %exit = icmp ugt i64 %iv, 4094
830  br i1 %exit, label %loop_exit, label %loop
831
832loop_exit:
833  ret i32 %accum.next
834}
835
836define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
837; CHECK-LABEL: @test_max_trip_count(
838; CHECK-NEXT:  entry:
839; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
840; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
841; CHECK-NEXT:    call void @init(i32* [[BASE]])
842; CHECK-NEXT:    [[MIN_CMP:%.*]] = icmp ult i64 4096, [[N:%.*]]
843; CHECK-NEXT:    [[MIN_N:%.*]] = select i1 [[MIN_CMP]], i64 4096, i64 [[N]]
844; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[MIN_N]], 2
845; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
846; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
847; CHECK:       vector.ph:
848; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
849; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
850; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
851; CHECK:       vector.body:
852; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
853; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
854; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
855; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
856; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP84:%.*]], [[VECTOR_BODY]] ]
857; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
858; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
859; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 2
860; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 3
861; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 4
862; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 5
863; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 6
864; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 7
865; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 8
866; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 9
867; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 10
868; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 11
869; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 12
870; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 13
871; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 14
872; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 15
873; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP1]]
874; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
875; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
876; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
877; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
878; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
879; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
880; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
881; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
882; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
883; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
884; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
885; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
886; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
887; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
888; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP16]]
889; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
890; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
891; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
892; CHECK-NEXT:    [[TMP36:%.*]] = load i1, i1* [[TMP20]], align 1
893; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> poison, i1 [[TMP33]], i32 0
894; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 1
895; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 2
896; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <4 x i1> [[TMP39]], i1 [[TMP36]], i32 3
897; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
898; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
899; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
900; CHECK-NEXT:    [[TMP44:%.*]] = load i1, i1* [[TMP24]], align 1
901; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> poison, i1 [[TMP41]], i32 0
902; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 1
903; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 2
904; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <4 x i1> [[TMP47]], i1 [[TMP44]], i32 3
905; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
906; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
907; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
908; CHECK-NEXT:    [[TMP52:%.*]] = load i1, i1* [[TMP28]], align 1
909; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> poison, i1 [[TMP49]], i32 0
910; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 1
911; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 2
912; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <4 x i1> [[TMP55]], i1 [[TMP52]], i32 3
913; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
914; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
915; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
916; CHECK-NEXT:    [[TMP60:%.*]] = load i1, i1* [[TMP32]], align 1
917; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> poison, i1 [[TMP57]], i32 0
918; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
919; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
920; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
921; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP1]]
922; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP5]]
923; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP9]]
924; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP13]]
925; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr i32, i32* [[TMP65]], i32 0
926; CHECK-NEXT:    [[TMP70:%.*]] = bitcast i32* [[TMP69]] to <4 x i32>*
927; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP70]], i32 4, <4 x i1> [[TMP40]], <4 x i32> poison)
928; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr i32, i32* [[TMP65]], i32 4
929; CHECK-NEXT:    [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
930; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP72]], i32 4, <4 x i1> [[TMP48]], <4 x i32> poison)
931; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr i32, i32* [[TMP65]], i32 8
932; CHECK-NEXT:    [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
933; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP74]], i32 4, <4 x i1> [[TMP56]], <4 x i32> poison)
934; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr i32, i32* [[TMP65]], i32 12
935; CHECK-NEXT:    [[TMP76:%.*]] = bitcast i32* [[TMP75]] to <4 x i32>*
936; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP76]], i32 4, <4 x i1> [[TMP64]], <4 x i32> poison)
937; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
938; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP48]], <i1 true, i1 true, i1 true, i1 true>
939; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP56]], <i1 true, i1 true, i1 true, i1 true>
940; CHECK-NEXT:    [[TMP80:%.*]] = xor <4 x i1> [[TMP64]], <i1 true, i1 true, i1 true, i1 true>
941; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP40]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
942; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
943; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP56]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
944; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP64]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
945; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
946; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
947; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
948; CHECK-NEXT:    [[TMP84]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
949; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
950; CHECK-NEXT:    [[TMP85:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
951; CHECK-NEXT:    br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
952; CHECK:       middle.block:
953; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP82]], [[TMP81]]
954; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX]]
955; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP84]], [[BIN_RDX10]]
956; CHECK-NEXT:    [[TMP86:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
957; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
958; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
959; CHECK:       scalar.ph:
960; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
961; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
962; CHECK-NEXT:    br label [[LOOP:%.*]]
963; CHECK:       loop:
964; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
965; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
966; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
967; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
968; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
969; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
970; CHECK:       pred:
971; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
972; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
973; CHECK-NEXT:    br label [[LATCH]]
974; CHECK:       latch:
975; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
976; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
977; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], [[MIN_N]]
978; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
979; CHECK:       loop_exit:
980; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
981; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
982;
983entry:
984  %alloca = alloca [4096 x i32]
985  %base = bitcast [4096 x i32]* %alloca to i32*
986  call void @init(i32* %base)
987  %min.cmp = icmp ult i64 4096, %n
988  %min.n = select i1 %min.cmp, i64 4096, i64 %n
989  br label %loop
990loop:
991  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
992  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
993  %iv.next = add i64 %iv, 1
994  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
995  %earlycnd = load i1, i1* %test_addr
996  br i1 %earlycnd, label %pred, label %latch
997pred:
998  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
999  %val = load i32, i32* %addr
1000  br label %latch
1001latch:
1002  %val.phi = phi i32 [0, %loop], [%val, %pred]
1003  %accum.next = add i32 %accum, %val.phi
1004  %exit = icmp ugt i64 %iv, %min.n
1005  br i1 %exit, label %loop_exit, label %loop
1006
1007loop_exit:
1008  ret i32 %accum.next
1009}
1010
1011
1012
1013
1014define i32 @test_non_zero_start(i64 %len, i1* %test_base) {
1015; CHECK-LABEL: @test_non_zero_start(
1016; CHECK-NEXT:  entry:
1017; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
1018; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
1019; CHECK-NEXT:    call void @init(i32* [[BASE]])
1020; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1021; CHECK:       vector.ph:
1022; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1023; CHECK:       vector.body:
1024; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1025; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1026; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1027; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1028; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1029; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1024, [[INDEX]]
1030; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1031; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
1032; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
1033; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
1034; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4
1035; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 5
1036; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
1037; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 7
1038; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 8
1039; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 9
1040; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 10
1041; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 11
1042; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 12
1043; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 13
1044; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 14
1045; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 15
1046; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1047; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1048; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1049; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1050; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1051; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1052; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1053; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1054; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1055; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1056; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1057; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1058; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1059; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1060; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1061; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1062; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1063; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1064; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1065; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1066; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1067; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1068; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1069; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1070; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1071; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1072; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1073; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1074; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1075; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1076; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1077; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1078; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1079; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1080; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1081; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1082; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1083; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1084; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1085; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1086; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1087; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1088; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1089; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1090; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1091; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1092; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1093; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1094; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1095; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1096; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1097; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1098; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1099; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1100; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1101; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1102; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1103; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1104; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1105; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1106; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1107; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1108; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1109; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1110; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1111; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1112; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1113; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1114; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1115; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1116; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1117; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1118; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1119; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1120; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1121; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1122; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1123; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 3072
1124; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1125; CHECK:       middle.block:
1126; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1127; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1128; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1129; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1130; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 3072, 3072
1131; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1132; CHECK:       scalar.ph:
1133; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 1024, [[ENTRY:%.*]] ]
1134; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1135; CHECK-NEXT:    br label [[LOOP:%.*]]
1136; CHECK:       loop:
1137; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1138; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1139; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1140; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1141; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1142; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1143; CHECK:       pred:
1144; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1145; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1146; CHECK-NEXT:    br label [[LATCH]]
1147; CHECK:       latch:
1148; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1149; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1150; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1151; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
1152; CHECK:       loop_exit:
1153; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1154; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1155;
1156entry:
1157  %alloca = alloca [4096 x i32]
1158  %base = bitcast [4096 x i32]* %alloca to i32*
1159  call void @init(i32* %base)
1160  br label %loop
1161loop:
1162  %iv = phi i64 [ 1024, %entry ], [ %iv.next, %latch ]
1163  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1164  %iv.next = add i64 %iv, 1
1165  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1166  %earlycnd = load i1, i1* %test_addr
1167  br i1 %earlycnd, label %pred, label %latch
1168pred:
1169  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1170  %val = load i32, i32* %addr
1171  br label %latch
1172latch:
1173  %val.phi = phi i32 [0, %loop], [%val, %pred]
1174  %accum.next = add i32 %accum, %val.phi
1175  %exit = icmp ugt i64 %iv, 4094
1176  br i1 %exit, label %loop_exit, label %loop
1177
1178loop_exit:
1179  ret i32 %accum.next
1180}
1181
1182define i32 @neg_out_of_bounds_start(i64 %len, i1* %test_base) {
1183; CHECK-LABEL: @neg_out_of_bounds_start(
1184; CHECK-NEXT:  entry:
1185; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
1186; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
1187; CHECK-NEXT:    call void @init(i32* [[BASE]])
1188; CHECK-NEXT:    br label [[LOOP:%.*]]
1189; CHECK:       loop:
1190; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ -10, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1191; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1192; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1193; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[IV]]
1194; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1195; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1196; CHECK:       pred:
1197; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1198; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1199; CHECK-NEXT:    br label [[LATCH]]
1200; CHECK:       latch:
1201; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1202; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1203; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1204; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
1205; CHECK:       loop_exit:
1206; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ]
1207; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1208;
1209entry:
1210  %alloca = alloca [4096 x i32]
1211  %base = bitcast [4096 x i32]* %alloca to i32*
1212  call void @init(i32* %base)
1213  br label %loop
1214loop:
1215  %iv = phi i64 [ -10, %entry ], [ %iv.next, %latch ]
1216  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1217  %iv.next = add i64 %iv, 1
1218  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1219  %earlycnd = load i1, i1* %test_addr
1220  br i1 %earlycnd, label %pred, label %latch
1221pred:
1222  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1223  %val = load i32, i32* %addr
1224  br label %latch
1225latch:
1226  %val.phi = phi i32 [0, %loop], [%val, %pred]
1227  %accum.next = add i32 %accum, %val.phi
1228  %exit = icmp ugt i64 %iv, 4094
1229  br i1 %exit, label %loop_exit, label %loop
1230
1231loop_exit:
1232  ret i32 %accum.next
1233}
1234
1235
1236;; TODO: handle non-unit strides
1237define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
1238; CHECK-LABEL: @test_non_unit_stride(
1239; CHECK-NEXT:  entry:
1240; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
1241; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
1242; CHECK-NEXT:    call void @init(i32* [[BASE]])
1243; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1244; CHECK:       vector.ph:
1245; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1246; CHECK:       vector.body:
1247; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE33:%.*]] ]
1248; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP148:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1249; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP149:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1250; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP150:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1251; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP151:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1252; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1253; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1254; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
1255; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
1256; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
1257; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
1258; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
1259; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
1260; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
1261; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
1262; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
1263; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
1264; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
1265; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
1266; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
1267; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
1268; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
1269; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1270; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1271; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1272; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1273; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1274; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1275; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1276; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1277; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1278; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1279; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1280; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1281; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1282; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1283; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1284; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1285; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1286; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1287; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1288; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1289; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1290; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1291; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1292; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1293; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1294; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1295; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1296; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1297; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1298; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1299; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1300; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1301; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1302; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1303; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1304; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1305; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1306; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1307; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1308; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1309; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1310; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1311; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1312; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1313; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1314; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1315; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1316; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1317; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0
1318; CHECK-NEXT:    br i1 [[TMP64]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1319; CHECK:       pred.load.if:
1320; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP0]]
1321; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[TMP65]], align 4
1322; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0
1323; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1324; CHECK:       pred.load.continue:
1325; CHECK-NEXT:    [[TMP68:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP67]], [[PRED_LOAD_IF]] ]
1326; CHECK-NEXT:    [[TMP69:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1
1327; CHECK-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
1328; CHECK:       pred.load.if4:
1329; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP1]]
1330; CHECK-NEXT:    [[TMP71:%.*]] = load i32, i32* [[TMP70]], align 4
1331; CHECK-NEXT:    [[TMP72:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP71]], i32 1
1332; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE5]]
1333; CHECK:       pred.load.continue5:
1334; CHECK-NEXT:    [[TMP73:%.*]] = phi <4 x i32> [ [[TMP68]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP72]], [[PRED_LOAD_IF4]] ]
1335; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2
1336; CHECK-NEXT:    br i1 [[TMP74]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
1337; CHECK:       pred.load.if6:
1338; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP2]]
1339; CHECK-NEXT:    [[TMP76:%.*]] = load i32, i32* [[TMP75]], align 4
1340; CHECK-NEXT:    [[TMP77:%.*]] = insertelement <4 x i32> [[TMP73]], i32 [[TMP76]], i32 2
1341; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
1342; CHECK:       pred.load.continue7:
1343; CHECK-NEXT:    [[TMP78:%.*]] = phi <4 x i32> [ [[TMP73]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP77]], [[PRED_LOAD_IF6]] ]
1344; CHECK-NEXT:    [[TMP79:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3
1345; CHECK-NEXT:    br i1 [[TMP79]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
1346; CHECK:       pred.load.if8:
1347; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP3]]
1348; CHECK-NEXT:    [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4
1349; CHECK-NEXT:    [[TMP82:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP81]], i32 3
1350; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
1351; CHECK:       pred.load.continue9:
1352; CHECK-NEXT:    [[TMP83:%.*]] = phi <4 x i32> [ [[TMP78]], [[PRED_LOAD_CONTINUE7]] ], [ [[TMP82]], [[PRED_LOAD_IF8]] ]
1353; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <4 x i1> [[TMP47]], i32 0
1354; CHECK-NEXT:    br i1 [[TMP84]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
1355; CHECK:       pred.load.if10:
1356; CHECK-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP4]]
1357; CHECK-NEXT:    [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4
1358; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <4 x i32> poison, i32 [[TMP86]], i32 0
1359; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
1360; CHECK:       pred.load.continue11:
1361; CHECK-NEXT:    [[TMP88:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP87]], [[PRED_LOAD_IF10]] ]
1362; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1
1363; CHECK-NEXT:    br i1 [[TMP89]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
1364; CHECK:       pred.load.if12:
1365; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP5]]
1366; CHECK-NEXT:    [[TMP91:%.*]] = load i32, i32* [[TMP90]], align 4
1367; CHECK-NEXT:    [[TMP92:%.*]] = insertelement <4 x i32> [[TMP88]], i32 [[TMP91]], i32 1
1368; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
1369; CHECK:       pred.load.continue13:
1370; CHECK-NEXT:    [[TMP93:%.*]] = phi <4 x i32> [ [[TMP88]], [[PRED_LOAD_CONTINUE11]] ], [ [[TMP92]], [[PRED_LOAD_IF12]] ]
1371; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <4 x i1> [[TMP47]], i32 2
1372; CHECK-NEXT:    br i1 [[TMP94]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
1373; CHECK:       pred.load.if14:
1374; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP6]]
1375; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
1376; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP96]], i32 2
1377; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
1378; CHECK:       pred.load.continue15:
1379; CHECK-NEXT:    [[TMP98:%.*]] = phi <4 x i32> [ [[TMP93]], [[PRED_LOAD_CONTINUE13]] ], [ [[TMP97]], [[PRED_LOAD_IF14]] ]
1380; CHECK-NEXT:    [[TMP99:%.*]] = extractelement <4 x i1> [[TMP47]], i32 3
1381; CHECK-NEXT:    br i1 [[TMP99]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
1382; CHECK:       pred.load.if16:
1383; CHECK-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP7]]
1384; CHECK-NEXT:    [[TMP101:%.*]] = load i32, i32* [[TMP100]], align 4
1385; CHECK-NEXT:    [[TMP102:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP101]], i32 3
1386; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
1387; CHECK:       pred.load.continue17:
1388; CHECK-NEXT:    [[TMP103:%.*]] = phi <4 x i32> [ [[TMP98]], [[PRED_LOAD_CONTINUE15]] ], [ [[TMP102]], [[PRED_LOAD_IF16]] ]
1389; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <4 x i1> [[TMP55]], i32 0
1390; CHECK-NEXT:    br i1 [[TMP104]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
1391; CHECK:       pred.load.if18:
1392; CHECK-NEXT:    [[TMP105:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP8]]
1393; CHECK-NEXT:    [[TMP106:%.*]] = load i32, i32* [[TMP105]], align 4
1394; CHECK-NEXT:    [[TMP107:%.*]] = insertelement <4 x i32> poison, i32 [[TMP106]], i32 0
1395; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
1396; CHECK:       pred.load.continue19:
1397; CHECK-NEXT:    [[TMP108:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP107]], [[PRED_LOAD_IF18]] ]
1398; CHECK-NEXT:    [[TMP109:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1
1399; CHECK-NEXT:    br i1 [[TMP109]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
1400; CHECK:       pred.load.if20:
1401; CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP9]]
1402; CHECK-NEXT:    [[TMP111:%.*]] = load i32, i32* [[TMP110]], align 4
1403; CHECK-NEXT:    [[TMP112:%.*]] = insertelement <4 x i32> [[TMP108]], i32 [[TMP111]], i32 1
1404; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
1405; CHECK:       pred.load.continue21:
1406; CHECK-NEXT:    [[TMP113:%.*]] = phi <4 x i32> [ [[TMP108]], [[PRED_LOAD_CONTINUE19]] ], [ [[TMP112]], [[PRED_LOAD_IF20]] ]
1407; CHECK-NEXT:    [[TMP114:%.*]] = extractelement <4 x i1> [[TMP55]], i32 2
1408; CHECK-NEXT:    br i1 [[TMP114]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
1409; CHECK:       pred.load.if22:
1410; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP10]]
1411; CHECK-NEXT:    [[TMP116:%.*]] = load i32, i32* [[TMP115]], align 4
1412; CHECK-NEXT:    [[TMP117:%.*]] = insertelement <4 x i32> [[TMP113]], i32 [[TMP116]], i32 2
1413; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
1414; CHECK:       pred.load.continue23:
1415; CHECK-NEXT:    [[TMP118:%.*]] = phi <4 x i32> [ [[TMP113]], [[PRED_LOAD_CONTINUE21]] ], [ [[TMP117]], [[PRED_LOAD_IF22]] ]
1416; CHECK-NEXT:    [[TMP119:%.*]] = extractelement <4 x i1> [[TMP55]], i32 3
1417; CHECK-NEXT:    br i1 [[TMP119]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
1418; CHECK:       pred.load.if24:
1419; CHECK-NEXT:    [[TMP120:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP11]]
1420; CHECK-NEXT:    [[TMP121:%.*]] = load i32, i32* [[TMP120]], align 4
1421; CHECK-NEXT:    [[TMP122:%.*]] = insertelement <4 x i32> [[TMP118]], i32 [[TMP121]], i32 3
1422; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
1423; CHECK:       pred.load.continue25:
1424; CHECK-NEXT:    [[TMP123:%.*]] = phi <4 x i32> [ [[TMP118]], [[PRED_LOAD_CONTINUE23]] ], [ [[TMP122]], [[PRED_LOAD_IF24]] ]
1425; CHECK-NEXT:    [[TMP124:%.*]] = extractelement <4 x i1> [[TMP63]], i32 0
1426; CHECK-NEXT:    br i1 [[TMP124]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
1427; CHECK:       pred.load.if26:
1428; CHECK-NEXT:    [[TMP125:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
1429; CHECK-NEXT:    [[TMP126:%.*]] = load i32, i32* [[TMP125]], align 4
1430; CHECK-NEXT:    [[TMP127:%.*]] = insertelement <4 x i32> poison, i32 [[TMP126]], i32 0
1431; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
1432; CHECK:       pred.load.continue27:
1433; CHECK-NEXT:    [[TMP128:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP127]], [[PRED_LOAD_IF26]] ]
1434; CHECK-NEXT:    [[TMP129:%.*]] = extractelement <4 x i1> [[TMP63]], i32 1
1435; CHECK-NEXT:    br i1 [[TMP129]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
1436; CHECK:       pred.load.if28:
1437; CHECK-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP13]]
1438; CHECK-NEXT:    [[TMP131:%.*]] = load i32, i32* [[TMP130]], align 4
1439; CHECK-NEXT:    [[TMP132:%.*]] = insertelement <4 x i32> [[TMP128]], i32 [[TMP131]], i32 1
1440; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
1441; CHECK:       pred.load.continue29:
1442; CHECK-NEXT:    [[TMP133:%.*]] = phi <4 x i32> [ [[TMP128]], [[PRED_LOAD_CONTINUE27]] ], [ [[TMP132]], [[PRED_LOAD_IF28]] ]
1443; CHECK-NEXT:    [[TMP134:%.*]] = extractelement <4 x i1> [[TMP63]], i32 2
1444; CHECK-NEXT:    br i1 [[TMP134]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
1445; CHECK:       pred.load.if30:
1446; CHECK-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP14]]
1447; CHECK-NEXT:    [[TMP136:%.*]] = load i32, i32* [[TMP135]], align 4
1448; CHECK-NEXT:    [[TMP137:%.*]] = insertelement <4 x i32> [[TMP133]], i32 [[TMP136]], i32 2
1449; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
1450; CHECK:       pred.load.continue31:
1451; CHECK-NEXT:    [[TMP138:%.*]] = phi <4 x i32> [ [[TMP133]], [[PRED_LOAD_CONTINUE29]] ], [ [[TMP137]], [[PRED_LOAD_IF30]] ]
1452; CHECK-NEXT:    [[TMP139:%.*]] = extractelement <4 x i1> [[TMP63]], i32 3
1453; CHECK-NEXT:    br i1 [[TMP139]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33]]
1454; CHECK:       pred.load.if32:
1455; CHECK-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP15]]
1456; CHECK-NEXT:    [[TMP141:%.*]] = load i32, i32* [[TMP140]], align 4
1457; CHECK-NEXT:    [[TMP142:%.*]] = insertelement <4 x i32> [[TMP138]], i32 [[TMP141]], i32 3
1458; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
1459; CHECK:       pred.load.continue33:
1460; CHECK-NEXT:    [[TMP143:%.*]] = phi <4 x i32> [ [[TMP138]], [[PRED_LOAD_CONTINUE31]] ], [ [[TMP142]], [[PRED_LOAD_IF32]] ]
1461; CHECK-NEXT:    [[TMP144:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1462; CHECK-NEXT:    [[TMP145:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1463; CHECK-NEXT:    [[TMP146:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1464; CHECK-NEXT:    [[TMP147:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1465; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP83]], <4 x i32> zeroinitializer
1466; CHECK-NEXT:    [[PREDPHI34:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer
1467; CHECK-NEXT:    [[PREDPHI35:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP123]], <4 x i32> zeroinitializer
1468; CHECK-NEXT:    [[PREDPHI36:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP143]], <4 x i32> zeroinitializer
1469; CHECK-NEXT:    [[TMP148]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1470; CHECK-NEXT:    [[TMP149]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI34]]
1471; CHECK-NEXT:    [[TMP150]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI35]]
1472; CHECK-NEXT:    [[TMP151]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI36]]
1473; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1474; CHECK-NEXT:    [[TMP152:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2048
1475; CHECK-NEXT:    br i1 [[TMP152]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1476; CHECK:       middle.block:
1477; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP149]], [[TMP148]]
1478; CHECK-NEXT:    [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP150]], [[BIN_RDX]]
1479; CHECK-NEXT:    [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP151]], [[BIN_RDX37]]
1480; CHECK-NEXT:    [[TMP153:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]])
1481; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 2048, 2048
1482; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1483; CHECK:       scalar.ph:
1484; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1485; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ]
1486; CHECK-NEXT:    br label [[LOOP:%.*]]
1487; CHECK:       loop:
1488; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1489; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1490; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 2
1491; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1492; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1493; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1494; CHECK:       pred:
1495; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1496; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1497; CHECK-NEXT:    br label [[LATCH]]
1498; CHECK:       latch:
1499; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1500; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1501; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4093
1502; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1503; CHECK:       loop_exit:
1504; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ]
1505; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1506;
1507entry:
1508  %alloca = alloca [4096 x i32]
1509  %base = bitcast [4096 x i32]* %alloca to i32*
1510  call void @init(i32* %base)
1511  br label %loop
1512loop:
1513  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
1514  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1515  %iv.next = add i64 %iv, 2
1516  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1517  %earlycnd = load i1, i1* %test_addr
1518  br i1 %earlycnd, label %pred, label %latch
1519pred:
1520  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1521  %val = load i32, i32* %addr
1522  br label %latch
1523latch:
1524  %val.phi = phi i32 [0, %loop], [%val, %pred]
1525  %accum.next = add i32 %accum, %val.phi
1526  %exit = icmp ugt i64 %iv, 4093
1527  br i1 %exit, label %loop_exit, label %loop
1528
1529loop_exit:
1530  ret i32 %accum.next
1531}
1532
1533define i32 @neg_off_by_many(i64 %len, i1* %test_base) {
1534; CHECK-LABEL: @neg_off_by_many(
1535; CHECK-NEXT:  entry:
1536; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [1024 x i32], align 4
1537; CHECK-NEXT:    [[BASE:%.*]] = bitcast [1024 x i32]* [[ALLOCA]] to i32*
1538; CHECK-NEXT:    call void @init(i32* [[BASE]])
1539; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1540; CHECK:       vector.ph:
1541; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1542; CHECK:       vector.body:
1543; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1544; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1545; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1546; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1547; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1548; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1549; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1550; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1551; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1552; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1553; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1554; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1555; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1556; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1557; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1558; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1559; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1560; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1561; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1562; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1563; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1564; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1565; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1566; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1567; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1568; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1569; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1570; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1571; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1572; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1573; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1574; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1575; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1576; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1577; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1578; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1579; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1580; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1581; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1582; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1583; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1584; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1585; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1586; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1587; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1588; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1589; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1590; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1591; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1592; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1593; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1594; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1595; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1596; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1597; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1598; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1599; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1600; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1601; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1602; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1603; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1604; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1605; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1606; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1607; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1608; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1609; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1610; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1611; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1612; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1613; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1614; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1615; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1616; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1617; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1618; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1619; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1620; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1621; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1622; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1623; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1624; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1625; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1626; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1627; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1628; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1629; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1630; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1631; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1632; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1633; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1634; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1635; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1636; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1637; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1638; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1639; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1640; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1641; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1642; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1643; CHECK:       middle.block:
1644; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1645; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1646; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1647; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1648; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1649; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1650; CHECK:       scalar.ph:
1651; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1652; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1653; CHECK-NEXT:    br label [[LOOP:%.*]]
1654; CHECK:       loop:
1655; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1656; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1657; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1658; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1659; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1660; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1661; CHECK:       pred:
1662; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1663; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1664; CHECK-NEXT:    br label [[LATCH]]
1665; CHECK:       latch:
1666; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1667; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1668; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1669; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
1670; CHECK:       loop_exit:
1671; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1672; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1673;
1674entry:
1675  %alloca = alloca [1024 x i32]
1676  %base = bitcast [1024 x i32]* %alloca to i32*
1677  call void @init(i32* %base)
1678  br label %loop
1679loop:
1680  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
1681  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1682  %iv.next = add i64 %iv, 1
1683  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1684  %earlycnd = load i1, i1* %test_addr
1685  br i1 %earlycnd, label %pred, label %latch
1686pred:
1687  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1688  %val = load i32, i32* %addr
1689  br label %latch
1690latch:
1691  %val.phi = phi i32 [0, %loop], [%val, %pred]
1692  %accum.next = add i32 %accum, %val.phi
1693  %exit = icmp ugt i64 %iv, 4094
1694  br i1 %exit, label %loop_exit, label %loop
1695
1696loop_exit:
1697  ret i32 %accum.next
1698}
1699
1700define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) {
1701; CHECK-LABEL: @neg_off_by_one_iteration(
1702; CHECK-NEXT:  entry:
1703; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4095 x i32], align 4
1704; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4095 x i32]* [[ALLOCA]] to i32*
1705; CHECK-NEXT:    call void @init(i32* [[BASE]])
1706; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1707; CHECK:       vector.ph:
1708; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1709; CHECK:       vector.body:
1710; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1711; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1712; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1713; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1714; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1715; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1716; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1717; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1718; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1719; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1720; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1721; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1722; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1723; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1724; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1725; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1726; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1727; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1728; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1729; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1730; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1731; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1732; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1733; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1734; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1735; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1736; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1737; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1738; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1739; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1740; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1741; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1742; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1743; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1744; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1745; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1746; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1747; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1748; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1749; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1750; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1751; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1752; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1753; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1754; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1755; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1756; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1757; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1758; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1759; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1760; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1761; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1762; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1763; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1764; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1765; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1766; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1767; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1768; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1769; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1770; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1771; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1772; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1773; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1774; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1775; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1776; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1777; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1778; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1779; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1780; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1781; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1782; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1783; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1784; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1785; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1786; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1787; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1788; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1789; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1790; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1791; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1792; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1793; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1794; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1795; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1796; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1797; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1798; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1799; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1800; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1801; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1802; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1803; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1804; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1805; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1806; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1807; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1808; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1809; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1810; CHECK:       middle.block:
1811; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1812; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1813; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1814; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1815; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1816; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1817; CHECK:       scalar.ph:
1818; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1819; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1820; CHECK-NEXT:    br label [[LOOP:%.*]]
1821; CHECK:       loop:
1822; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1823; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1824; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1825; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1826; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1827; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1828; CHECK:       pred:
1829; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1830; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1831; CHECK-NEXT:    br label [[LATCH]]
1832; CHECK:       latch:
1833; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1834; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1835; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1836; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
1837; CHECK:       loop_exit:
1838; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1839; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1840;
1841entry:
1842  %alloca = alloca [4095 x i32]
1843  %base = bitcast [4095 x i32]* %alloca to i32*
1844  call void @init(i32* %base)
1845  br label %loop
1846loop:
1847  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
1848  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1849  %iv.next = add i64 %iv, 1
1850  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1851  %earlycnd = load i1, i1* %test_addr
1852  br i1 %earlycnd, label %pred, label %latch
1853pred:
1854  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1855  %val = load i32, i32* %addr
1856  br label %latch
1857latch:
1858  %val.phi = phi i32 [0, %loop], [%val, %pred]
1859  %accum.next = add i32 %accum, %val.phi
1860  %exit = icmp ugt i64 %iv, 4094
1861  br i1 %exit, label %loop_exit, label %loop
1862
1863loop_exit:
1864  ret i32 %accum.next
1865}
1866
1867define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) {
1868; CHECK-LABEL: @neg_off_by_one_byte(
1869; CHECK-NEXT:  entry:
1870; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [16383 x i8], align 1
1871; CHECK-NEXT:    [[BASE:%.*]] = bitcast [16383 x i8]* [[ALLOCA]] to i32*
1872; CHECK-NEXT:    call void @init(i32* [[BASE]])
1873; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1874; CHECK:       vector.ph:
1875; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1876; CHECK:       vector.body:
1877; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1878; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1879; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1880; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1881; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1882; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1883; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1884; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1885; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1886; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1887; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1888; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1889; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1890; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1891; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1892; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1893; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1894; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1895; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1896; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1897; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1898; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1899; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1900; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1901; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1902; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1903; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1904; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1905; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1906; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1907; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1908; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1909; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1910; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1911; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1912; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1913; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1914; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1915; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1916; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1917; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1918; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1919; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1920; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1921; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1922; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1923; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1924; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1925; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1926; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1927; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1928; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1929; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1930; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1931; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1932; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1933; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1934; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1935; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1936; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1937; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1938; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1939; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1940; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1941; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1942; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1943; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1944; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1945; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1946; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1947; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1948; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1949; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1950; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1951; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1952; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1953; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1954; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1955; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1956; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1957; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1958; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1959; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1960; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1961; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1962; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1963; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1964; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1965; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1966; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1967; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1968; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1969; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1970; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1971; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1972; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1973; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1974; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1975; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1976; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1977; CHECK:       middle.block:
1978; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1979; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1980; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1981; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1982; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1983; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1984; CHECK:       scalar.ph:
1985; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1986; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1987; CHECK-NEXT:    br label [[LOOP:%.*]]
1988; CHECK:       loop:
1989; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1990; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1991; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1992; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1993; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1994; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1995; CHECK:       pred:
1996; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1997; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1998; CHECK-NEXT:    br label [[LATCH]]
1999; CHECK:       latch:
2000; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2001; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2002; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
2003; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
2004; CHECK:       loop_exit:
2005; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2006; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2007;
2008entry:
2009  %alloca = alloca [16383 x i8]
2010  %base = bitcast [16383 x i8]* %alloca to i32*
2011  call void @init(i32* %base)
2012  br label %loop
2013loop:
2014  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2015  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2016  %iv.next = add i64 %iv, 1
2017  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2018  %earlycnd = load i1, i1* %test_addr
2019  br i1 %earlycnd, label %pred, label %latch
2020pred:
2021  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2022  %val = load i32, i32* %addr
2023  br label %latch
2024latch:
2025  %val.phi = phi i32 [0, %loop], [%val, %pred]
2026  %accum.next = add i32 %accum, %val.phi
2027  %exit = icmp ugt i64 %iv, 4094
2028  br i1 %exit, label %loop_exit, label %loop
2029
2030loop_exit:
2031  ret i32 %accum.next
2032}
2033
2034
2035; Show that we handle case where exit count is non-constant, but that we
2036; have a constant bound on it which is sufficient to show dereferenceability.
2037define i32 @test_constant_max(i64 %len, i1* %test_base) {
2038; CHECK-LABEL: @test_constant_max(
2039; CHECK-NEXT:  entry:
2040; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
2041; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
2042; CHECK-NEXT:    call void @init(i32* [[BASE]])
2043; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[LEN:%.*]], 4094
2044; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 4094, i64 [[LEN]]
2045; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[MIN]], 2
2046; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
2047; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2048; CHECK:       vector.ph:
2049; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
2050; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
2051; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2052; CHECK:       vector.body:
2053; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2054; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2055; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2056; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2057; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP84:%.*]], [[VECTOR_BODY]] ]
2058; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
2059; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2060; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 2
2061; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 3
2062; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 4
2063; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 5
2064; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 6
2065; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 7
2066; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 8
2067; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 9
2068; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 10
2069; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 11
2070; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 12
2071; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 13
2072; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 14
2073; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 15
2074; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP1]]
2075; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2076; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2077; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2078; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2079; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2080; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2081; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2082; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2083; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2084; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2085; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2086; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2087; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2088; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2089; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP16]]
2090; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2091; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2092; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2093; CHECK-NEXT:    [[TMP36:%.*]] = load i1, i1* [[TMP20]], align 1
2094; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> poison, i1 [[TMP33]], i32 0
2095; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 1
2096; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 2
2097; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <4 x i1> [[TMP39]], i1 [[TMP36]], i32 3
2098; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2099; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2100; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2101; CHECK-NEXT:    [[TMP44:%.*]] = load i1, i1* [[TMP24]], align 1
2102; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> poison, i1 [[TMP41]], i32 0
2103; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 1
2104; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 2
2105; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <4 x i1> [[TMP47]], i1 [[TMP44]], i32 3
2106; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2107; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2108; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2109; CHECK-NEXT:    [[TMP52:%.*]] = load i1, i1* [[TMP28]], align 1
2110; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> poison, i1 [[TMP49]], i32 0
2111; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 1
2112; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 2
2113; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <4 x i1> [[TMP55]], i1 [[TMP52]], i32 3
2114; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2115; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2116; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2117; CHECK-NEXT:    [[TMP60:%.*]] = load i1, i1* [[TMP32]], align 1
2118; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> poison, i1 [[TMP57]], i32 0
2119; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
2120; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
2121; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
2122; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP1]]
2123; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP5]]
2124; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP9]]
2125; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP13]]
2126; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr i32, i32* [[TMP65]], i32 0
2127; CHECK-NEXT:    [[TMP70:%.*]] = bitcast i32* [[TMP69]] to <4 x i32>*
2128; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP70]], align 4
2129; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr i32, i32* [[TMP65]], i32 4
2130; CHECK-NEXT:    [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
2131; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP72]], align 4
2132; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr i32, i32* [[TMP65]], i32 8
2133; CHECK-NEXT:    [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
2134; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
2135; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr i32, i32* [[TMP65]], i32 12
2136; CHECK-NEXT:    [[TMP76:%.*]] = bitcast i32* [[TMP75]] to <4 x i32>*
2137; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP76]], align 4
2138; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
2139; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP48]], <i1 true, i1 true, i1 true, i1 true>
2140; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP56]], <i1 true, i1 true, i1 true, i1 true>
2141; CHECK-NEXT:    [[TMP80:%.*]] = xor <4 x i1> [[TMP64]], <i1 true, i1 true, i1 true, i1 true>
2142; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP40]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer
2143; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer
2144; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP56]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer
2145; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP64]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer
2146; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2147; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2148; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2149; CHECK-NEXT:    [[TMP84]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2150; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2151; CHECK-NEXT:    [[TMP85:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2152; CHECK-NEXT:    br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
2153; CHECK:       middle.block:
2154; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP82]], [[TMP81]]
2155; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX]]
2156; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP84]], [[BIN_RDX10]]
2157; CHECK-NEXT:    [[TMP86:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2158; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
2159; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2160; CHECK:       scalar.ph:
2161; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2162; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
2163; CHECK-NEXT:    br label [[LOOP:%.*]]
2164; CHECK:       loop:
2165; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2166; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2167; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2168; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2169; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2170; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2171; CHECK:       pred:
2172; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2173; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2174; CHECK-NEXT:    br label [[LATCH]]
2175; CHECK:       latch:
2176; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2177; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2178; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], [[MIN]]
2179; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
2180; CHECK:       loop_exit:
2181; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
2182; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2183;
2184entry:
2185  %alloca = alloca [4096 x i32]
2186  %base = bitcast [4096 x i32]* %alloca to i32*
2187  call void @init(i32* %base)
2188  %cmp = icmp ugt i64 %len, 4094
2189  %min = select i1 %cmp, i64 4094, i64 %len
2190  br label %loop
2191loop:
2192  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2193  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2194  %iv.next = add i64 %iv, 1
2195  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2196  %earlycnd = load i1, i1* %test_addr
2197  br i1 %earlycnd, label %pred, label %latch
2198pred:
2199  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2200  %val = load i32, i32* %addr
2201  br label %latch
2202latch:
2203  %val.phi = phi i32 [0, %loop], [%val, %pred]
2204  %accum.next = add i32 %accum, %val.phi
2205  %exit = icmp ugt i64 %iv, %min
2206  br i1 %exit, label %loop_exit, label %loop
2207
2208loop_exit:
2209  ret i32 %accum.next
2210}
2211
2212
2213;; Model a custom allocate which allocates in chunks of 8 bytes
2214declare align 8 dereferenceable_or_null(8) i8* @my_alloc(i32) allocsize(0)
2215declare align 8 dereferenceable_or_null(8) i8* @my_array_alloc(i32, i32) allocsize(0, 1)
2216
2217define i32 @test_allocsize(i64 %len, i1* %test_base) nofree nosync {
2218; CHECK-LABEL: @test_allocsize(
2219; CHECK-NEXT:  entry:
2220; CHECK-NEXT:    [[ALLOCATION:%.*]] = call nonnull i8* @my_alloc(i32 16384)
2221; CHECK-NEXT:    [[BASE:%.*]] = bitcast i8* [[ALLOCATION]] to i32*
2222; CHECK-NEXT:    call void @init(i32* [[BASE]])
2223; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2224; CHECK:       vector.ph:
2225; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2226; CHECK:       vector.body:
2227; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2228; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
2229; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2230; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2231; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2232; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2233; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
2234; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
2235; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
2236; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
2237; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
2238; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
2239; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
2240; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
2241; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
2242; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
2243; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
2244; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
2245; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
2246; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
2247; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
2248; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
2249; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
2250; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2251; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2252; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2253; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2254; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2255; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2256; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2257; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2258; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2259; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2260; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2261; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2262; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2263; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2264; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
2265; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2266; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2267; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2268; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
2269; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
2270; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
2271; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
2272; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
2273; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2274; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2275; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2276; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
2277; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
2278; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
2279; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
2280; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
2281; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2282; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2283; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2284; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
2285; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
2286; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
2287; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
2288; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
2289; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2290; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2291; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2292; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
2293; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
2294; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
2295; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
2296; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
2297; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
2298; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
2299; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
2300; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
2301; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
2302; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
2303; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
2304; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
2305; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
2306; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
2307; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
2308; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
2309; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
2310; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
2311; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
2312; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
2313; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
2314; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
2315; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
2316; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
2317; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
2318; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
2319; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
2320; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2321; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2322; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2323; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2324; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2325; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
2326; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2327; CHECK:       middle.block:
2328; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
2329; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
2330; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
2331; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2332; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
2333; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2334; CHECK:       scalar.ph:
2335; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2336; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2337; CHECK-NEXT:    br label [[LOOP:%.*]]
2338; CHECK:       loop:
2339; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2340; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2341; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2342; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2343; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2344; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2345; CHECK:       pred:
2346; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2347; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2348; CHECK-NEXT:    br label [[LATCH]]
2349; CHECK:       latch:
2350; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2351; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2352; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
2353; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
2354; CHECK:       loop_exit:
2355; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2356; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2357;
2358entry:
2359  %allocation = call nonnull i8* @my_alloc(i32 16384)
2360  %base = bitcast i8* %allocation to i32*
2361  call void @init(i32* %base)
2362  br label %loop
2363loop:
2364  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2365  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2366  %iv.next = add i64 %iv, 1
2367  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2368  %earlycnd = load i1, i1* %test_addr
2369  br i1 %earlycnd, label %pred, label %latch
2370pred:
2371  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2372  %val = load i32, i32* %addr
2373  br label %latch
2374latch:
2375  %val.phi = phi i32 [0, %loop], [%val, %pred]
2376  %accum.next = add i32 %accum, %val.phi
2377  %exit = icmp ugt i64 %iv, 4094
2378  br i1 %exit, label %loop_exit, label %loop
2379
2380loop_exit:
2381  ret i32 %accum.next
2382}
2383
2384
2385define i32 @test_allocsize_array(i64 %len, i1* %test_base) nofree nosync {
2386; CHECK-LABEL: @test_allocsize_array(
2387; CHECK-NEXT:  entry:
2388; CHECK-NEXT:    [[ALLOCATION:%.*]] = call nonnull i8* @my_array_alloc(i32 4096, i32 4)
2389; CHECK-NEXT:    [[BASE:%.*]] = bitcast i8* [[ALLOCATION]] to i32*
2390; CHECK-NEXT:    call void @init(i32* [[BASE]])
2391; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2392; CHECK:       vector.ph:
2393; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2394; CHECK:       vector.body:
2395; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2396; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
2397; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2398; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2399; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2400; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2401; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
2402; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
2403; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
2404; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
2405; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
2406; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
2407; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
2408; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
2409; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
2410; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
2411; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
2412; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
2413; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
2414; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
2415; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
2416; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
2417; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
2418; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2419; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2420; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2421; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2422; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2423; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2424; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2425; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2426; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2427; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2428; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2429; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2430; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2431; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2432; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
2433; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2434; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2435; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2436; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
2437; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
2438; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
2439; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
2440; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
2441; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2442; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2443; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2444; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
2445; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
2446; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
2447; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
2448; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
2449; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2450; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2451; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2452; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
2453; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
2454; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
2455; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
2456; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
2457; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2458; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2459; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2460; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
2461; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
2462; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
2463; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
2464; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
2465; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
2466; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
2467; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
2468; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
2469; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
2470; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
2471; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
2472; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
2473; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
2474; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
2475; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
2476; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
2477; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
2478; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
2479; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
2480; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
2481; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
2482; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
2483; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
2484; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
2485; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
2486; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
2487; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
2488; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2489; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2490; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2491; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2492; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2493; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
2494; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2495; CHECK:       middle.block:
2496; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
2497; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
2498; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
2499; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2500; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
2501; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2502; CHECK:       scalar.ph:
2503; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2504; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2505; CHECK-NEXT:    br label [[LOOP:%.*]]
2506; CHECK:       loop:
2507; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2508; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2509; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2510; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2511; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2512; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2513; CHECK:       pred:
2514; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2515; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2516; CHECK-NEXT:    br label [[LATCH]]
2517; CHECK:       latch:
2518; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2519; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2520; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
2521; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
2522; CHECK:       loop_exit:
2523; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2524; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2525;
2526entry:
2527  %allocation = call nonnull i8* @my_array_alloc(i32 4096, i32 4)
2528  %base = bitcast i8* %allocation to i32*
2529  call void @init(i32* %base)
2530  br label %loop
2531loop:
2532  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2533  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2534  %iv.next = add i64 %iv, 1
2535  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2536  %earlycnd = load i1, i1* %test_addr
2537  br i1 %earlycnd, label %pred, label %latch
2538pred:
2539  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2540  %val = load i32, i32* %addr
2541  br label %latch
2542latch:
2543  %val.phi = phi i32 [0, %loop], [%val, %pred]
2544  %accum.next = add i32 %accum, %val.phi
2545  %exit = icmp ugt i64 %iv, 4094
2546  br i1 %exit, label %loop_exit, label %loop
2547
2548loop_exit:
2549  ret i32 %accum.next
2550}
2551
2552declare void @my_free(i8*)
2553
2554; For the point in time variant of deref(N) semantics, show a negative
2555; example where hoisting without explicit predication might introduce a
2556; dynamic use after free.  (e.g. allzero is true when all elements of the
2557; test vector are false and thus base is never accessed.)
2558define i32 @test_allocsize_cond_deref(i1 %allzero, i1* %test_base) {
2559; CHECK-LABEL: @test_allocsize_cond_deref(
2560; CHECK-NEXT:  entry:
2561; CHECK-NEXT:    [[ALLOCATION:%.*]] = call nonnull i8* @my_alloc(i32 16384)
2562; CHECK-NEXT:    [[BASE:%.*]] = bitcast i8* [[ALLOCATION]] to i32*
2563; CHECK-NEXT:    call void @init(i32* [[BASE]])
2564; CHECK-NEXT:    br i1 [[ALLZERO:%.*]], label [[FREEIT:%.*]], label [[PREHEADER:%.*]]
2565; CHECK:       freeit:
2566; CHECK-NEXT:    call void @my_free(i8* [[ALLOCATION]])
2567; CHECK-NEXT:    br label [[PREHEADER]]
2568; CHECK:       preheader:
2569; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2570; CHECK:       vector.ph:
2571; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2572; CHECK:       vector.body:
2573; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2574; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
2575; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2576; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2577; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2578; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2579; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
2580; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
2581; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
2582; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
2583; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
2584; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
2585; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
2586; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
2587; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
2588; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
2589; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
2590; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
2591; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
2592; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
2593; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
2594; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
2595; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
2596; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2597; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2598; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2599; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2600; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2601; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2602; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2603; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2604; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2605; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2606; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2607; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2608; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2609; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2610; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
2611; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2612; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2613; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2614; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
2615; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
2616; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
2617; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
2618; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
2619; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2620; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2621; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2622; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
2623; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
2624; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
2625; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
2626; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
2627; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2628; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2629; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2630; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
2631; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
2632; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
2633; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
2634; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
2635; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2636; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2637; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2638; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
2639; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
2640; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
2641; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
2642; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
2643; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
2644; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
2645; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
2646; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
2647; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
2648; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
2649; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
2650; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
2651; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
2652; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
2653; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
2654; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
2655; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
2656; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
2657; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
2658; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
2659; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
2660; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
2661; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
2662; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
2663; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
2664; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
2665; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
2666; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2667; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2668; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2669; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2670; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2671; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
2672; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2673; CHECK:       middle.block:
2674; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
2675; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
2676; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
2677; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2678; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
2679; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2680; CHECK:       scalar.ph:
2681; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[PREHEADER]] ]
2682; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2683; CHECK-NEXT:    br label [[LOOP:%.*]]
2684; CHECK:       loop:
2685; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2686; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2687; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2688; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2689; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2690; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2691; CHECK:       pred:
2692; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2693; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2694; CHECK-NEXT:    br label [[LATCH]]
2695; CHECK:       latch:
2696; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2697; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2698; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
2699; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2700; CHECK:       loop_exit:
2701; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2702; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2703;
2704entry:
2705  %allocation = call nonnull i8* @my_alloc(i32 16384)
2706  %base = bitcast i8* %allocation to i32*
2707  call void @init(i32* %base)
2708  br i1 %allzero, label %freeit, label %preheader
2709freeit:
2710  call void @my_free(i8* %allocation)
2711  br label %preheader
2712preheader:
2713  br label %loop
2714loop:
2715  %iv = phi i64 [ 0, %preheader ], [ %iv.next, %latch ]
2716  %accum = phi i32 [ 0, %preheader ], [ %accum.next, %latch ]
2717  %iv.next = add i64 %iv, 1
2718  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2719  %earlycnd = load i1, i1* %test_addr
2720  br i1 %earlycnd, label %pred, label %latch
2721pred:
2722  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2723  %val = load i32, i32* %addr
2724  br label %latch
2725latch:
2726  %val.phi = phi i32 [0, %loop], [%val, %pred]
2727  %accum.next = add i32 %accum, %val.phi
2728  %exit = icmp ugt i64 %iv, 4094
2729  br i1 %exit, label %loop_exit, label %loop
2730
2731loop_exit:
2732  ret i32 %accum.next
2733}
2734