1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -force-vector-width=4 -loop-vectorize -mcpu=haswell < %s | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
5target triple = "x86_64-unknown-linux-gnu"
6
7;; This file includes tests for avoiding the need for a masked.load
8;; We don't need a masked.load for this due to deref facts, and can instead
9;; use a plain vector load.
10
11declare void @init(i32* nocapture nofree)
12
13;; For ease of explanation, this one demonstrates
14;; with a range check, but there are better lowering options specifically for
15;; this test (i.e. reducing the iteration space of the vector copy), so
16;; following tests are written more generically.
17define i32 @test_explicit_pred(i64 %len) {
18; CHECK-LABEL: @test_explicit_pred(
19; CHECK-NEXT:  entry:
20; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
21; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
22; CHECK-NEXT:    call void @init(i32* [[BASE]])
23; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
24; CHECK:       vector.ph:
25; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[LEN:%.*]], i32 0
26; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
27; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
28; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
29; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
30; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
31; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
32; CHECK-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> poison, <4 x i32> zeroinitializer
33; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
34; CHECK:       vector.body:
35; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
36; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
37; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ]
38; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
39; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
40; CHECK-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
41; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
42; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
43; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
44; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
45; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
46; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
47; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
48; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
49; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT8]]
50; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT10]]
51; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT12]]
52; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
53; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP1]]
54; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP2]]
55; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP3]]
56; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP8]], i32 0
57; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
58; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4
59; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i32, i32* [[TMP8]], i32 4
60; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
61; CHECK-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP15]], align 4
62; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i32, i32* [[TMP8]], i32 8
63; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
64; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP17]], align 4
65; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i32, i32* [[TMP8]], i32 12
66; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
67; CHECK-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
68; CHECK-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
69; CHECK-NEXT:    [[TMP21:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
70; CHECK-NEXT:    [[TMP22:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
71; CHECK-NEXT:    [[TMP23:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
72; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer
73; CHECK-NEXT:    [[PREDPHI16:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD13]], <4 x i32> zeroinitializer
74; CHECK-NEXT:    [[PREDPHI17:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD14]], <4 x i32> zeroinitializer
75; CHECK-NEXT:    [[PREDPHI18:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[WIDE_LOAD15]], <4 x i32> zeroinitializer
76; CHECK-NEXT:    [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
77; CHECK-NEXT:    [[TMP25]] = add <4 x i32> [[VEC_PHI4]], [[PREDPHI16]]
78; CHECK-NEXT:    [[TMP26]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI17]]
79; CHECK-NEXT:    [[TMP27]] = add <4 x i32> [[VEC_PHI6]], [[PREDPHI18]]
80; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
81; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
82; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
83; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
84; CHECK:       middle.block:
85; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP25]], [[TMP24]]
86; CHECK-NEXT:    [[BIN_RDX19:%.*]] = add <4 x i32> [[TMP26]], [[BIN_RDX]]
87; CHECK-NEXT:    [[BIN_RDX20:%.*]] = add <4 x i32> [[TMP27]], [[BIN_RDX19]]
88; CHECK-NEXT:    [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX20]])
89; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
90; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
91; CHECK:       scalar.ph:
92; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
93; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ]
94; CHECK-NEXT:    br label [[LOOP:%.*]]
95; CHECK:       loop:
96; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
97; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
98; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
99; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp slt i64 [[IV]], [[LEN]]
100; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
101; CHECK:       pred:
102; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
103; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
104; CHECK-NEXT:    br label [[LATCH]]
105; CHECK:       latch:
106; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
107; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
108; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
109; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
110; CHECK:       loop_exit:
111; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ]
112; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
113;
114entry:
115  %alloca = alloca [4096 x i32]
116  %base = bitcast [4096 x i32]* %alloca to i32*
117  call void @init(i32* %base)
118  br label %loop
119loop:
120  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
121  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
122  %iv.next = add i64 %iv, 1
123  %earlycnd = icmp slt i64 %iv, %len
124  br i1 %earlycnd, label %pred, label %latch
125pred:
126  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
127  %val = load i32, i32* %addr
128  br label %latch
129latch:
130  %val.phi = phi i32 [0, %loop], [%val, %pred]
131  %accum.next = add i32 %accum, %val.phi
132  %exit = icmp ugt i64 %iv, 4094
133  br i1 %exit, label %loop_exit, label %loop
134
135loop_exit:
136  ret i32 %accum.next
137}
138
139;; Similiar to the above, but without an analyzeable condition.
140define i32 @test_explicit_pred_generic(i64 %len, i1* %test_base) {
141; CHECK-LABEL: @test_explicit_pred_generic(
142; CHECK-NEXT:  entry:
143; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
144; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
145; CHECK-NEXT:    call void @init(i32* [[BASE]])
146; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
147; CHECK:       vector.ph:
148; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
149; CHECK:       vector.body:
150; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
151; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
152; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
153; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
154; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
155; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
156; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
157; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
158; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
159; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
160; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
161; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
162; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
163; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
164; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
165; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
166; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
167; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
168; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
169; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
170; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
171; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
172; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
173; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
174; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
175; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
176; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
177; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
178; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
179; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
180; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
181; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
182; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
183; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
184; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
185; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
186; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
187; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
188; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
189; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
190; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
191; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
192; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
193; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
194; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
195; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
196; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
197; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
198; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
199; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
200; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
201; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
202; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
203; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
204; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
205; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
206; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
207; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
208; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
209; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
210; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
211; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
212; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
213; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
214; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
215; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
216; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
217; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
218; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
219; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
220; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
221; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
222; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
223; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
224; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
225; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
226; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
227; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
228; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4
229; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
230; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
231; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4
232; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
233; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
234; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4
235; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
236; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
237; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
238; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
239; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer
240; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer
241; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer
242; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer
243; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
244; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
245; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
246; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
247; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
248; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
249; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
250; CHECK:       middle.block:
251; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
252; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
253; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
254; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
255; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
256; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
257; CHECK:       scalar.ph:
258; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
259; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
260; CHECK-NEXT:    br label [[LOOP:%.*]]
261; CHECK:       loop:
262; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
263; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
264; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
265; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
266; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
267; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
268; CHECK:       pred:
269; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
270; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
271; CHECK-NEXT:    br label [[LATCH]]
272; CHECK:       latch:
273; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
274; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
275; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
276; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
277; CHECK:       loop_exit:
278; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
279; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
280;
281entry:
282  %alloca = alloca [4096 x i32]
283  %base = bitcast [4096 x i32]* %alloca to i32*
284  call void @init(i32* %base)
285  br label %loop
286loop:
287  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
288  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
289  %iv.next = add i64 %iv, 1
290  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
291  %earlycnd = load i1, i1* %test_addr
292  br i1 %earlycnd, label %pred, label %latch
293pred:
294  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
295  %val = load i32, i32* %addr
296  br label %latch
297latch:
298  %val.phi = phi i32 [0, %loop], [%val, %pred]
299  %accum.next = add i32 %accum, %val.phi
300  %exit = icmp ugt i64 %iv, 4094
301  br i1 %exit, label %loop_exit, label %loop
302
303loop_exit:
304  ret i32 %accum.next
305}
306
307; Trivial case where the address loaded from it loop invariant (and yes,
308; there are better lowerings, this is a test of robustness of vectorization,
309; nothing more.)
310; TODO: currently shows predication which can be removed
311define i32 @test_invariant_address(i64 %len, i1* %test_base) {
312; CHECK-LABEL: @test_invariant_address(
313; CHECK-NEXT:  entry:
314; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
315; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
316; CHECK-NEXT:    call void @init(i32* [[BASE]])
317; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
318; CHECK:       vector.ph:
319; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
320; CHECK:       vector.body:
321; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
322; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP100:%.*]], [[VECTOR_BODY]] ]
323; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP101:%.*]], [[VECTOR_BODY]] ]
324; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP102:%.*]], [[VECTOR_BODY]] ]
325; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP103:%.*]], [[VECTOR_BODY]] ]
326; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
327; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
328; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
329; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
330; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
331; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
332; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
333; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
334; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
335; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
336; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
337; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
338; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
339; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
340; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
341; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
342; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
343; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
344; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
345; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
346; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
347; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
348; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
349; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
350; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
351; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
352; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
353; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
354; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
355; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
356; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
357; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
358; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
359; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
360; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
361; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
362; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
363; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
364; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
365; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
366; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
367; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
368; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
369; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
370; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
371; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
372; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
373; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
374; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
375; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
376; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
377; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
378; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
379; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
380; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
381; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
382; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
383; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
384; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
385; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
386; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
387; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
388; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
389; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
390; CHECK-NEXT:    [[TMP64:%.*]] = load i32, i32* [[BASE]], align 4
391; CHECK-NEXT:    [[TMP65:%.*]] = load i32, i32* [[BASE]], align 4
392; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[BASE]], align 4
393; CHECK-NEXT:    [[TMP67:%.*]] = load i32, i32* [[BASE]], align 4
394; CHECK-NEXT:    [[TMP68:%.*]] = insertelement <4 x i32> poison, i32 [[TMP64]], i32 0
395; CHECK-NEXT:    [[TMP69:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP65]], i32 1
396; CHECK-NEXT:    [[TMP70:%.*]] = insertelement <4 x i32> [[TMP69]], i32 [[TMP66]], i32 2
397; CHECK-NEXT:    [[TMP71:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP67]], i32 3
398; CHECK-NEXT:    [[TMP72:%.*]] = load i32, i32* [[BASE]], align 4
399; CHECK-NEXT:    [[TMP73:%.*]] = load i32, i32* [[BASE]], align 4
400; CHECK-NEXT:    [[TMP74:%.*]] = load i32, i32* [[BASE]], align 4
401; CHECK-NEXT:    [[TMP75:%.*]] = load i32, i32* [[BASE]], align 4
402; CHECK-NEXT:    [[TMP76:%.*]] = insertelement <4 x i32> poison, i32 [[TMP72]], i32 0
403; CHECK-NEXT:    [[TMP77:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP73]], i32 1
404; CHECK-NEXT:    [[TMP78:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP74]], i32 2
405; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP75]], i32 3
406; CHECK-NEXT:    [[TMP80:%.*]] = load i32, i32* [[BASE]], align 4
407; CHECK-NEXT:    [[TMP81:%.*]] = load i32, i32* [[BASE]], align 4
408; CHECK-NEXT:    [[TMP82:%.*]] = load i32, i32* [[BASE]], align 4
409; CHECK-NEXT:    [[TMP83:%.*]] = load i32, i32* [[BASE]], align 4
410; CHECK-NEXT:    [[TMP84:%.*]] = insertelement <4 x i32> poison, i32 [[TMP80]], i32 0
411; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <4 x i32> [[TMP84]], i32 [[TMP81]], i32 1
412; CHECK-NEXT:    [[TMP86:%.*]] = insertelement <4 x i32> [[TMP85]], i32 [[TMP82]], i32 2
413; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP83]], i32 3
414; CHECK-NEXT:    [[TMP88:%.*]] = load i32, i32* [[BASE]], align 4
415; CHECK-NEXT:    [[TMP89:%.*]] = load i32, i32* [[BASE]], align 4
416; CHECK-NEXT:    [[TMP90:%.*]] = load i32, i32* [[BASE]], align 4
417; CHECK-NEXT:    [[TMP91:%.*]] = load i32, i32* [[BASE]], align 4
418; CHECK-NEXT:    [[TMP92:%.*]] = insertelement <4 x i32> poison, i32 [[TMP88]], i32 0
419; CHECK-NEXT:    [[TMP93:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP89]], i32 1
420; CHECK-NEXT:    [[TMP94:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP90]], i32 2
421; CHECK-NEXT:    [[TMP95:%.*]] = insertelement <4 x i32> [[TMP94]], i32 [[TMP91]], i32 3
422; CHECK-NEXT:    [[TMP96:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
423; CHECK-NEXT:    [[TMP97:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
424; CHECK-NEXT:    [[TMP98:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
425; CHECK-NEXT:    [[TMP99:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
426; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP71]], <4 x i32> zeroinitializer
427; CHECK-NEXT:    [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP79]], <4 x i32> zeroinitializer
428; CHECK-NEXT:    [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP87]], <4 x i32> zeroinitializer
429; CHECK-NEXT:    [[PREDPHI6:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer
430; CHECK-NEXT:    [[TMP100]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
431; CHECK-NEXT:    [[TMP101]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI4]]
432; CHECK-NEXT:    [[TMP102]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]]
433; CHECK-NEXT:    [[TMP103]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]]
434; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
435; CHECK-NEXT:    [[TMP104:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
436; CHECK-NEXT:    br i1 [[TMP104]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
437; CHECK:       middle.block:
438; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP101]], [[TMP100]]
439; CHECK-NEXT:    [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP102]], [[BIN_RDX]]
440; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP103]], [[BIN_RDX7]]
441; CHECK-NEXT:    [[TMP105:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]])
442; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
443; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
444; CHECK:       scalar.ph:
445; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
446; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP105]], [[MIDDLE_BLOCK]] ]
447; CHECK-NEXT:    br label [[LOOP:%.*]]
448; CHECK:       loop:
449; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
450; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
451; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
452; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
453; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
454; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
455; CHECK:       pred:
456; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[BASE]], align 4
457; CHECK-NEXT:    br label [[LATCH]]
458; CHECK:       latch:
459; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
460; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
461; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
462; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
463; CHECK:       loop_exit:
464; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP105]], [[MIDDLE_BLOCK]] ]
465; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
466;
467entry:
468  %alloca = alloca [4096 x i32]
469  %base = bitcast [4096 x i32]* %alloca to i32*
470  call void @init(i32* %base)
471  br label %loop
472loop:
473  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
474  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
475  %iv.next = add i64 %iv, 1
476  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
477  %earlycnd = load i1, i1* %test_addr
478  br i1 %earlycnd, label %pred, label %latch
479pred:
480  %val = load i32, i32* %base
481  br label %latch
482latch:
483  %val.phi = phi i32 [0, %loop], [%val, %pred]
484  %accum.next = add i32 %accum, %val.phi
485  %exit = icmp ugt i64 %iv, 4094
486  br i1 %exit, label %loop_exit, label %loop
487
488loop_exit:
489  ret i32 %accum.next
490}
491
492; Overlapping loads - Fails alignment checking, not dereferenceability
493define i32 @test_step_narrower_than_access(i64 %len, i1* %test_base) {
494; CHECK-LABEL: @test_step_narrower_than_access(
495; CHECK-NEXT:  entry:
496; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
497; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
498; CHECK-NEXT:    call void @init(i32* [[BASE]])
499; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
500; CHECK:       vector.ph:
501; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
502; CHECK:       vector.body:
503; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE33:%.*]] ]
504; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP180:%.*]], [[PRED_LOAD_CONTINUE33]] ]
505; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP181:%.*]], [[PRED_LOAD_CONTINUE33]] ]
506; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP182:%.*]], [[PRED_LOAD_CONTINUE33]] ]
507; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP183:%.*]], [[PRED_LOAD_CONTINUE33]] ]
508; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
509; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
510; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
511; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
512; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
513; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
514; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
515; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
516; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
517; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
518; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
519; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
520; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
521; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
522; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
523; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
524; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
525; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
526; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
527; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
528; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
529; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
530; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
531; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
532; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
533; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
534; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
535; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
536; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
537; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
538; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
539; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
540; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
541; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
542; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
543; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
544; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
545; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
546; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
547; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
548; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
549; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
550; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
551; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
552; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
553; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
554; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
555; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
556; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
557; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
558; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
559; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
560; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
561; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
562; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
563; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
564; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
565; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
566; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
567; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
568; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
569; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
570; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
571; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
572; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0
573; CHECK-NEXT:    br i1 [[TMP64]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
574; CHECK:       pred.load.if:
575; CHECK-NEXT:    [[TMP65:%.*]] = bitcast i32* [[BASE]] to i16*
576; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds i16, i16* [[TMP65]], i64 [[TMP0]]
577; CHECK-NEXT:    [[TMP67:%.*]] = bitcast i16* [[TMP66]] to i32*
578; CHECK-NEXT:    [[TMP68:%.*]] = load i32, i32* [[TMP67]], align 4
579; CHECK-NEXT:    [[TMP69:%.*]] = insertelement <4 x i32> poison, i32 [[TMP68]], i32 0
580; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
581; CHECK:       pred.load.continue:
582; CHECK-NEXT:    [[TMP70:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP69]], [[PRED_LOAD_IF]] ]
583; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1
584; CHECK-NEXT:    br i1 [[TMP71]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
585; CHECK:       pred.load.if4:
586; CHECK-NEXT:    [[TMP72:%.*]] = bitcast i32* [[BASE]] to i16*
587; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i16, i16* [[TMP72]], i64 [[TMP1]]
588; CHECK-NEXT:    [[TMP74:%.*]] = bitcast i16* [[TMP73]] to i32*
589; CHECK-NEXT:    [[TMP75:%.*]] = load i32, i32* [[TMP74]], align 4
590; CHECK-NEXT:    [[TMP76:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP75]], i32 1
591; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE5]]
592; CHECK:       pred.load.continue5:
593; CHECK-NEXT:    [[TMP77:%.*]] = phi <4 x i32> [ [[TMP70]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP76]], [[PRED_LOAD_IF4]] ]
594; CHECK-NEXT:    [[TMP78:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2
595; CHECK-NEXT:    br i1 [[TMP78]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
596; CHECK:       pred.load.if6:
597; CHECK-NEXT:    [[TMP79:%.*]] = bitcast i32* [[BASE]] to i16*
598; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds i16, i16* [[TMP79]], i64 [[TMP2]]
599; CHECK-NEXT:    [[TMP81:%.*]] = bitcast i16* [[TMP80]] to i32*
600; CHECK-NEXT:    [[TMP82:%.*]] = load i32, i32* [[TMP81]], align 4
601; CHECK-NEXT:    [[TMP83:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP82]], i32 2
602; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
603; CHECK:       pred.load.continue7:
604; CHECK-NEXT:    [[TMP84:%.*]] = phi <4 x i32> [ [[TMP77]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP83]], [[PRED_LOAD_IF6]] ]
605; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3
606; CHECK-NEXT:    br i1 [[TMP85]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
607; CHECK:       pred.load.if8:
608; CHECK-NEXT:    [[TMP86:%.*]] = bitcast i32* [[BASE]] to i16*
609; CHECK-NEXT:    [[TMP87:%.*]] = getelementptr inbounds i16, i16* [[TMP86]], i64 [[TMP3]]
610; CHECK-NEXT:    [[TMP88:%.*]] = bitcast i16* [[TMP87]] to i32*
611; CHECK-NEXT:    [[TMP89:%.*]] = load i32, i32* [[TMP88]], align 4
612; CHECK-NEXT:    [[TMP90:%.*]] = insertelement <4 x i32> [[TMP84]], i32 [[TMP89]], i32 3
613; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
614; CHECK:       pred.load.continue9:
615; CHECK-NEXT:    [[TMP91:%.*]] = phi <4 x i32> [ [[TMP84]], [[PRED_LOAD_CONTINUE7]] ], [ [[TMP90]], [[PRED_LOAD_IF8]] ]
616; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <4 x i1> [[TMP47]], i32 0
617; CHECK-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
618; CHECK:       pred.load.if10:
619; CHECK-NEXT:    [[TMP93:%.*]] = bitcast i32* [[BASE]] to i16*
620; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i16, i16* [[TMP93]], i64 [[TMP4]]
621; CHECK-NEXT:    [[TMP95:%.*]] = bitcast i16* [[TMP94]] to i32*
622; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
623; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> poison, i32 [[TMP96]], i32 0
624; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
625; CHECK:       pred.load.continue11:
626; CHECK-NEXT:    [[TMP98:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP97]], [[PRED_LOAD_IF10]] ]
627; CHECK-NEXT:    [[TMP99:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1
628; CHECK-NEXT:    br i1 [[TMP99]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
629; CHECK:       pred.load.if12:
630; CHECK-NEXT:    [[TMP100:%.*]] = bitcast i32* [[BASE]] to i16*
631; CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds i16, i16* [[TMP100]], i64 [[TMP5]]
632; CHECK-NEXT:    [[TMP102:%.*]] = bitcast i16* [[TMP101]] to i32*
633; CHECK-NEXT:    [[TMP103:%.*]] = load i32, i32* [[TMP102]], align 4
634; CHECK-NEXT:    [[TMP104:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP103]], i32 1
635; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
636; CHECK:       pred.load.continue13:
637; CHECK-NEXT:    [[TMP105:%.*]] = phi <4 x i32> [ [[TMP98]], [[PRED_LOAD_CONTINUE11]] ], [ [[TMP104]], [[PRED_LOAD_IF12]] ]
638; CHECK-NEXT:    [[TMP106:%.*]] = extractelement <4 x i1> [[TMP47]], i32 2
639; CHECK-NEXT:    br i1 [[TMP106]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
640; CHECK:       pred.load.if14:
641; CHECK-NEXT:    [[TMP107:%.*]] = bitcast i32* [[BASE]] to i16*
642; CHECK-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i16, i16* [[TMP107]], i64 [[TMP6]]
643; CHECK-NEXT:    [[TMP109:%.*]] = bitcast i16* [[TMP108]] to i32*
644; CHECK-NEXT:    [[TMP110:%.*]] = load i32, i32* [[TMP109]], align 4
645; CHECK-NEXT:    [[TMP111:%.*]] = insertelement <4 x i32> [[TMP105]], i32 [[TMP110]], i32 2
646; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
647; CHECK:       pred.load.continue15:
648; CHECK-NEXT:    [[TMP112:%.*]] = phi <4 x i32> [ [[TMP105]], [[PRED_LOAD_CONTINUE13]] ], [ [[TMP111]], [[PRED_LOAD_IF14]] ]
649; CHECK-NEXT:    [[TMP113:%.*]] = extractelement <4 x i1> [[TMP47]], i32 3
650; CHECK-NEXT:    br i1 [[TMP113]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
651; CHECK:       pred.load.if16:
652; CHECK-NEXT:    [[TMP114:%.*]] = bitcast i32* [[BASE]] to i16*
653; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i16, i16* [[TMP114]], i64 [[TMP7]]
654; CHECK-NEXT:    [[TMP116:%.*]] = bitcast i16* [[TMP115]] to i32*
655; CHECK-NEXT:    [[TMP117:%.*]] = load i32, i32* [[TMP116]], align 4
656; CHECK-NEXT:    [[TMP118:%.*]] = insertelement <4 x i32> [[TMP112]], i32 [[TMP117]], i32 3
657; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
658; CHECK:       pred.load.continue17:
659; CHECK-NEXT:    [[TMP119:%.*]] = phi <4 x i32> [ [[TMP112]], [[PRED_LOAD_CONTINUE15]] ], [ [[TMP118]], [[PRED_LOAD_IF16]] ]
660; CHECK-NEXT:    [[TMP120:%.*]] = extractelement <4 x i1> [[TMP55]], i32 0
661; CHECK-NEXT:    br i1 [[TMP120]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
662; CHECK:       pred.load.if18:
663; CHECK-NEXT:    [[TMP121:%.*]] = bitcast i32* [[BASE]] to i16*
664; CHECK-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i16, i16* [[TMP121]], i64 [[TMP8]]
665; CHECK-NEXT:    [[TMP123:%.*]] = bitcast i16* [[TMP122]] to i32*
666; CHECK-NEXT:    [[TMP124:%.*]] = load i32, i32* [[TMP123]], align 4
667; CHECK-NEXT:    [[TMP125:%.*]] = insertelement <4 x i32> poison, i32 [[TMP124]], i32 0
668; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
669; CHECK:       pred.load.continue19:
670; CHECK-NEXT:    [[TMP126:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP125]], [[PRED_LOAD_IF18]] ]
671; CHECK-NEXT:    [[TMP127:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1
672; CHECK-NEXT:    br i1 [[TMP127]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
673; CHECK:       pred.load.if20:
674; CHECK-NEXT:    [[TMP128:%.*]] = bitcast i32* [[BASE]] to i16*
675; CHECK-NEXT:    [[TMP129:%.*]] = getelementptr inbounds i16, i16* [[TMP128]], i64 [[TMP9]]
676; CHECK-NEXT:    [[TMP130:%.*]] = bitcast i16* [[TMP129]] to i32*
677; CHECK-NEXT:    [[TMP131:%.*]] = load i32, i32* [[TMP130]], align 4
678; CHECK-NEXT:    [[TMP132:%.*]] = insertelement <4 x i32> [[TMP126]], i32 [[TMP131]], i32 1
679; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
680; CHECK:       pred.load.continue21:
681; CHECK-NEXT:    [[TMP133:%.*]] = phi <4 x i32> [ [[TMP126]], [[PRED_LOAD_CONTINUE19]] ], [ [[TMP132]], [[PRED_LOAD_IF20]] ]
682; CHECK-NEXT:    [[TMP134:%.*]] = extractelement <4 x i1> [[TMP55]], i32 2
683; CHECK-NEXT:    br i1 [[TMP134]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
684; CHECK:       pred.load.if22:
685; CHECK-NEXT:    [[TMP135:%.*]] = bitcast i32* [[BASE]] to i16*
686; CHECK-NEXT:    [[TMP136:%.*]] = getelementptr inbounds i16, i16* [[TMP135]], i64 [[TMP10]]
687; CHECK-NEXT:    [[TMP137:%.*]] = bitcast i16* [[TMP136]] to i32*
688; CHECK-NEXT:    [[TMP138:%.*]] = load i32, i32* [[TMP137]], align 4
689; CHECK-NEXT:    [[TMP139:%.*]] = insertelement <4 x i32> [[TMP133]], i32 [[TMP138]], i32 2
690; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
691; CHECK:       pred.load.continue23:
692; CHECK-NEXT:    [[TMP140:%.*]] = phi <4 x i32> [ [[TMP133]], [[PRED_LOAD_CONTINUE21]] ], [ [[TMP139]], [[PRED_LOAD_IF22]] ]
693; CHECK-NEXT:    [[TMP141:%.*]] = extractelement <4 x i1> [[TMP55]], i32 3
694; CHECK-NEXT:    br i1 [[TMP141]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
695; CHECK:       pred.load.if24:
696; CHECK-NEXT:    [[TMP142:%.*]] = bitcast i32* [[BASE]] to i16*
697; CHECK-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i16, i16* [[TMP142]], i64 [[TMP11]]
698; CHECK-NEXT:    [[TMP144:%.*]] = bitcast i16* [[TMP143]] to i32*
699; CHECK-NEXT:    [[TMP145:%.*]] = load i32, i32* [[TMP144]], align 4
700; CHECK-NEXT:    [[TMP146:%.*]] = insertelement <4 x i32> [[TMP140]], i32 [[TMP145]], i32 3
701; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
702; CHECK:       pred.load.continue25:
703; CHECK-NEXT:    [[TMP147:%.*]] = phi <4 x i32> [ [[TMP140]], [[PRED_LOAD_CONTINUE23]] ], [ [[TMP146]], [[PRED_LOAD_IF24]] ]
704; CHECK-NEXT:    [[TMP148:%.*]] = extractelement <4 x i1> [[TMP63]], i32 0
705; CHECK-NEXT:    br i1 [[TMP148]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
706; CHECK:       pred.load.if26:
707; CHECK-NEXT:    [[TMP149:%.*]] = bitcast i32* [[BASE]] to i16*
708; CHECK-NEXT:    [[TMP150:%.*]] = getelementptr inbounds i16, i16* [[TMP149]], i64 [[TMP12]]
709; CHECK-NEXT:    [[TMP151:%.*]] = bitcast i16* [[TMP150]] to i32*
710; CHECK-NEXT:    [[TMP152:%.*]] = load i32, i32* [[TMP151]], align 4
711; CHECK-NEXT:    [[TMP153:%.*]] = insertelement <4 x i32> poison, i32 [[TMP152]], i32 0
712; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
713; CHECK:       pred.load.continue27:
714; CHECK-NEXT:    [[TMP154:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP153]], [[PRED_LOAD_IF26]] ]
715; CHECK-NEXT:    [[TMP155:%.*]] = extractelement <4 x i1> [[TMP63]], i32 1
716; CHECK-NEXT:    br i1 [[TMP155]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
717; CHECK:       pred.load.if28:
718; CHECK-NEXT:    [[TMP156:%.*]] = bitcast i32* [[BASE]] to i16*
719; CHECK-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i16, i16* [[TMP156]], i64 [[TMP13]]
720; CHECK-NEXT:    [[TMP158:%.*]] = bitcast i16* [[TMP157]] to i32*
721; CHECK-NEXT:    [[TMP159:%.*]] = load i32, i32* [[TMP158]], align 4
722; CHECK-NEXT:    [[TMP160:%.*]] = insertelement <4 x i32> [[TMP154]], i32 [[TMP159]], i32 1
723; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
724; CHECK:       pred.load.continue29:
725; CHECK-NEXT:    [[TMP161:%.*]] = phi <4 x i32> [ [[TMP154]], [[PRED_LOAD_CONTINUE27]] ], [ [[TMP160]], [[PRED_LOAD_IF28]] ]
726; CHECK-NEXT:    [[TMP162:%.*]] = extractelement <4 x i1> [[TMP63]], i32 2
727; CHECK-NEXT:    br i1 [[TMP162]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
728; CHECK:       pred.load.if30:
729; CHECK-NEXT:    [[TMP163:%.*]] = bitcast i32* [[BASE]] to i16*
730; CHECK-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i16, i16* [[TMP163]], i64 [[TMP14]]
731; CHECK-NEXT:    [[TMP165:%.*]] = bitcast i16* [[TMP164]] to i32*
732; CHECK-NEXT:    [[TMP166:%.*]] = load i32, i32* [[TMP165]], align 4
733; CHECK-NEXT:    [[TMP167:%.*]] = insertelement <4 x i32> [[TMP161]], i32 [[TMP166]], i32 2
734; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
735; CHECK:       pred.load.continue31:
736; CHECK-NEXT:    [[TMP168:%.*]] = phi <4 x i32> [ [[TMP161]], [[PRED_LOAD_CONTINUE29]] ], [ [[TMP167]], [[PRED_LOAD_IF30]] ]
737; CHECK-NEXT:    [[TMP169:%.*]] = extractelement <4 x i1> [[TMP63]], i32 3
738; CHECK-NEXT:    br i1 [[TMP169]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33]]
739; CHECK:       pred.load.if32:
740; CHECK-NEXT:    [[TMP170:%.*]] = bitcast i32* [[BASE]] to i16*
741; CHECK-NEXT:    [[TMP171:%.*]] = getelementptr inbounds i16, i16* [[TMP170]], i64 [[TMP15]]
742; CHECK-NEXT:    [[TMP172:%.*]] = bitcast i16* [[TMP171]] to i32*
743; CHECK-NEXT:    [[TMP173:%.*]] = load i32, i32* [[TMP172]], align 4
744; CHECK-NEXT:    [[TMP174:%.*]] = insertelement <4 x i32> [[TMP168]], i32 [[TMP173]], i32 3
745; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
746; CHECK:       pred.load.continue33:
747; CHECK-NEXT:    [[TMP175:%.*]] = phi <4 x i32> [ [[TMP168]], [[PRED_LOAD_CONTINUE31]] ], [ [[TMP174]], [[PRED_LOAD_IF32]] ]
748; CHECK-NEXT:    [[TMP176:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
749; CHECK-NEXT:    [[TMP177:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
750; CHECK-NEXT:    [[TMP178:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
751; CHECK-NEXT:    [[TMP179:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
752; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP91]], <4 x i32> zeroinitializer
753; CHECK-NEXT:    [[PREDPHI34:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP119]], <4 x i32> zeroinitializer
754; CHECK-NEXT:    [[PREDPHI35:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP147]], <4 x i32> zeroinitializer
755; CHECK-NEXT:    [[PREDPHI36:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP175]], <4 x i32> zeroinitializer
756; CHECK-NEXT:    [[TMP180]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
757; CHECK-NEXT:    [[TMP181]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI34]]
758; CHECK-NEXT:    [[TMP182]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI35]]
759; CHECK-NEXT:    [[TMP183]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI36]]
760; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
761; CHECK-NEXT:    [[TMP184:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
762; CHECK-NEXT:    br i1 [[TMP184]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
763; CHECK:       middle.block:
764; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP181]], [[TMP180]]
765; CHECK-NEXT:    [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP182]], [[BIN_RDX]]
766; CHECK-NEXT:    [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP183]], [[BIN_RDX37]]
767; CHECK-NEXT:    [[TMP185:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]])
768; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
769; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
770; CHECK:       scalar.ph:
771; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
772; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP185]], [[MIDDLE_BLOCK]] ]
773; CHECK-NEXT:    br label [[LOOP:%.*]]
774; CHECK:       loop:
775; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
776; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
777; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
778; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
779; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
780; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
781; CHECK:       pred:
782; CHECK-NEXT:    [[BASE_I16P:%.*]] = bitcast i32* [[BASE]] to i16*
783; CHECK-NEXT:    [[ADDR_I16P:%.*]] = getelementptr inbounds i16, i16* [[BASE_I16P]], i64 [[IV]]
784; CHECK-NEXT:    [[ADDR:%.*]] = bitcast i16* [[ADDR_I16P]] to i32*
785; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
786; CHECK-NEXT:    br label [[LATCH]]
787; CHECK:       latch:
788; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
789; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
790; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
791; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
792; CHECK:       loop_exit:
793; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP185]], [[MIDDLE_BLOCK]] ]
794; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
795;
796entry:
797  %alloca = alloca [4096 x i32]
798  %base = bitcast [4096 x i32]* %alloca to i32*
799  call void @init(i32* %base)
800  br label %loop
801loop:
802  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
803  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
804  %iv.next = add i64 %iv, 1
805  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
806  %earlycnd = load i1, i1* %test_addr
807  br i1 %earlycnd, label %pred, label %latch
808pred:
809  %base.i16p = bitcast i32* %base to i16*
810  %addr.i16p = getelementptr inbounds i16, i16* %base.i16p, i64 %iv
811  %addr = bitcast i16* %addr.i16p to i32*
812  %val = load i32, i32* %addr
813  br label %latch
814latch:
815  %val.phi = phi i32 [0, %loop], [%val, %pred]
816  %accum.next = add i32 %accum, %val.phi
817  %exit = icmp ugt i64 %iv, 4094
818  br i1 %exit, label %loop_exit, label %loop
819
820loop_exit:
821  ret i32 %accum.next
822}
823
824define i32 @test_max_trip_count(i64 %len, i1* %test_base, i64 %n) {
825; CHECK-LABEL: @test_max_trip_count(
826; CHECK-NEXT:  entry:
827; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
828; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
829; CHECK-NEXT:    call void @init(i32* [[BASE]])
830; CHECK-NEXT:    [[MIN_CMP:%.*]] = icmp ult i64 4096, [[N:%.*]]
831; CHECK-NEXT:    [[MIN_N:%.*]] = select i1 [[MIN_CMP]], i64 4096, i64 [[N]]
832; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[MIN_N]], 2
833; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
834; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
835; CHECK:       vector.ph:
836; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
837; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
838; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
839; CHECK:       vector.body:
840; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
841; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
842; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
843; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
844; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP84:%.*]], [[VECTOR_BODY]] ]
845; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
846; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
847; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 2
848; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 3
849; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 4
850; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 5
851; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 6
852; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 7
853; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 8
854; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 9
855; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 10
856; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 11
857; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 12
858; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 13
859; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 14
860; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 15
861; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP1]]
862; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
863; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
864; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
865; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
866; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
867; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
868; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
869; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
870; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
871; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
872; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
873; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
874; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
875; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
876; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP16]]
877; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
878; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
879; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
880; CHECK-NEXT:    [[TMP36:%.*]] = load i1, i1* [[TMP20]], align 1
881; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> poison, i1 [[TMP33]], i32 0
882; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 1
883; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 2
884; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <4 x i1> [[TMP39]], i1 [[TMP36]], i32 3
885; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
886; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
887; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
888; CHECK-NEXT:    [[TMP44:%.*]] = load i1, i1* [[TMP24]], align 1
889; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> poison, i1 [[TMP41]], i32 0
890; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 1
891; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 2
892; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <4 x i1> [[TMP47]], i1 [[TMP44]], i32 3
893; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
894; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
895; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
896; CHECK-NEXT:    [[TMP52:%.*]] = load i1, i1* [[TMP28]], align 1
897; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> poison, i1 [[TMP49]], i32 0
898; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 1
899; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 2
900; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <4 x i1> [[TMP55]], i1 [[TMP52]], i32 3
901; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
902; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
903; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
904; CHECK-NEXT:    [[TMP60:%.*]] = load i1, i1* [[TMP32]], align 1
905; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> poison, i1 [[TMP57]], i32 0
906; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
907; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
908; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
909; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP1]]
910; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP5]]
911; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP9]]
912; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP13]]
913; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr i32, i32* [[TMP65]], i32 0
914; CHECK-NEXT:    [[TMP70:%.*]] = bitcast i32* [[TMP69]] to <4 x i32>*
915; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP70]], i32 4, <4 x i1> [[TMP40]], <4 x i32> poison)
916; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr i32, i32* [[TMP65]], i32 4
917; CHECK-NEXT:    [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
918; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP72]], i32 4, <4 x i1> [[TMP48]], <4 x i32> poison)
919; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr i32, i32* [[TMP65]], i32 8
920; CHECK-NEXT:    [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
921; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP74]], i32 4, <4 x i1> [[TMP56]], <4 x i32> poison)
922; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr i32, i32* [[TMP65]], i32 12
923; CHECK-NEXT:    [[TMP76:%.*]] = bitcast i32* [[TMP75]] to <4 x i32>*
924; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP76]], i32 4, <4 x i1> [[TMP64]], <4 x i32> poison)
925; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
926; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP48]], <i1 true, i1 true, i1 true, i1 true>
927; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP56]], <i1 true, i1 true, i1 true, i1 true>
928; CHECK-NEXT:    [[TMP80:%.*]] = xor <4 x i1> [[TMP64]], <i1 true, i1 true, i1 true, i1 true>
929; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP40]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
930; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
931; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP56]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
932; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP64]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
933; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
934; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
935; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
936; CHECK-NEXT:    [[TMP84]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
937; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
938; CHECK-NEXT:    [[TMP85:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
939; CHECK-NEXT:    br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
940; CHECK:       middle.block:
941; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP82]], [[TMP81]]
942; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX]]
943; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP84]], [[BIN_RDX10]]
944; CHECK-NEXT:    [[TMP86:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
945; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
946; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
947; CHECK:       scalar.ph:
948; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
949; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
950; CHECK-NEXT:    br label [[LOOP:%.*]]
951; CHECK:       loop:
952; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
953; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
954; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
955; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
956; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
957; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
958; CHECK:       pred:
959; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
960; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
961; CHECK-NEXT:    br label [[LATCH]]
962; CHECK:       latch:
963; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
964; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
965; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], [[MIN_N]]
966; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
967; CHECK:       loop_exit:
968; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
969; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
970;
971entry:
972  %alloca = alloca [4096 x i32]
973  %base = bitcast [4096 x i32]* %alloca to i32*
974  call void @init(i32* %base)
975  %min.cmp = icmp ult i64 4096, %n
976  %min.n = select i1 %min.cmp, i64 4096, i64 %n
977  br label %loop
978loop:
979  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
980  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
981  %iv.next = add i64 %iv, 1
982  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
983  %earlycnd = load i1, i1* %test_addr
984  br i1 %earlycnd, label %pred, label %latch
985pred:
986  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
987  %val = load i32, i32* %addr
988  br label %latch
989latch:
990  %val.phi = phi i32 [0, %loop], [%val, %pred]
991  %accum.next = add i32 %accum, %val.phi
992  %exit = icmp ugt i64 %iv, %min.n
993  br i1 %exit, label %loop_exit, label %loop
994
995loop_exit:
996  ret i32 %accum.next
997}
998
999
1000
1001
1002define i32 @test_non_zero_start(i64 %len, i1* %test_base) {
1003; CHECK-LABEL: @test_non_zero_start(
1004; CHECK-NEXT:  entry:
1005; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
1006; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
1007; CHECK-NEXT:    call void @init(i32* [[BASE]])
1008; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1009; CHECK:       vector.ph:
1010; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1011; CHECK:       vector.body:
1012; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1013; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1014; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1015; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1016; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1017; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1024, [[INDEX]]
1018; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1019; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
1020; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
1021; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
1022; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4
1023; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 5
1024; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
1025; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 7
1026; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 8
1027; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 9
1028; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 10
1029; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 11
1030; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 12
1031; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 13
1032; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 14
1033; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 15
1034; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1035; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1036; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1037; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1038; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1039; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1040; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1041; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1042; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1043; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1044; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1045; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1046; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1047; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1048; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1049; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1050; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1051; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1052; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1053; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1054; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1055; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1056; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1057; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1058; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1059; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1060; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1061; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1062; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1063; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1064; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1065; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1066; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1067; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1068; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1069; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1070; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1071; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1072; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1073; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1074; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1075; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1076; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1077; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1078; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1079; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1080; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1081; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1082; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1083; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1084; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1085; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1086; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1087; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1088; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1089; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1090; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1091; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1092; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1093; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1094; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1095; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1096; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1097; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1098; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1099; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1100; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1101; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1102; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1103; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1104; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1105; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1106; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1107; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1108; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1109; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1110; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1111; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 3072
1112; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1113; CHECK:       middle.block:
1114; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1115; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1116; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1117; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1118; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 3072, 3072
1119; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1120; CHECK:       scalar.ph:
1121; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 1024, [[ENTRY:%.*]] ]
1122; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1123; CHECK-NEXT:    br label [[LOOP:%.*]]
1124; CHECK:       loop:
1125; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1126; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1127; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1128; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1129; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1130; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1131; CHECK:       pred:
1132; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1133; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1134; CHECK-NEXT:    br label [[LATCH]]
1135; CHECK:       latch:
1136; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1137; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1138; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1139; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
1140; CHECK:       loop_exit:
1141; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1142; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1143;
1144entry:
1145  %alloca = alloca [4096 x i32]
1146  %base = bitcast [4096 x i32]* %alloca to i32*
1147  call void @init(i32* %base)
1148  br label %loop
1149loop:
1150  %iv = phi i64 [ 1024, %entry ], [ %iv.next, %latch ]
1151  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1152  %iv.next = add i64 %iv, 1
1153  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1154  %earlycnd = load i1, i1* %test_addr
1155  br i1 %earlycnd, label %pred, label %latch
1156pred:
1157  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1158  %val = load i32, i32* %addr
1159  br label %latch
1160latch:
1161  %val.phi = phi i32 [0, %loop], [%val, %pred]
1162  %accum.next = add i32 %accum, %val.phi
1163  %exit = icmp ugt i64 %iv, 4094
1164  br i1 %exit, label %loop_exit, label %loop
1165
1166loop_exit:
1167  ret i32 %accum.next
1168}
1169
1170define i32 @neg_out_of_bounds_start(i64 %len, i1* %test_base) {
1171; CHECK-LABEL: @neg_out_of_bounds_start(
1172; CHECK-NEXT:  entry:
1173; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
1174; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
1175; CHECK-NEXT:    call void @init(i32* [[BASE]])
1176; CHECK-NEXT:    br label [[LOOP:%.*]]
1177; CHECK:       loop:
1178; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ -10, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1179; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1180; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1181; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[IV]]
1182; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1183; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1184; CHECK:       pred:
1185; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1186; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1187; CHECK-NEXT:    br label [[LATCH]]
1188; CHECK:       latch:
1189; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1190; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1191; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1192; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
1193; CHECK:       loop_exit:
1194; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ]
1195; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1196;
1197entry:
1198  %alloca = alloca [4096 x i32]
1199  %base = bitcast [4096 x i32]* %alloca to i32*
1200  call void @init(i32* %base)
1201  br label %loop
1202loop:
1203  %iv = phi i64 [ -10, %entry ], [ %iv.next, %latch ]
1204  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1205  %iv.next = add i64 %iv, 1
1206  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1207  %earlycnd = load i1, i1* %test_addr
1208  br i1 %earlycnd, label %pred, label %latch
1209pred:
1210  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1211  %val = load i32, i32* %addr
1212  br label %latch
1213latch:
1214  %val.phi = phi i32 [0, %loop], [%val, %pred]
1215  %accum.next = add i32 %accum, %val.phi
1216  %exit = icmp ugt i64 %iv, 4094
1217  br i1 %exit, label %loop_exit, label %loop
1218
1219loop_exit:
1220  ret i32 %accum.next
1221}
1222
1223
1224;; TODO: handle non-unit strides
1225define i32 @test_non_unit_stride(i64 %len, i1* %test_base) {
1226; CHECK-LABEL: @test_non_unit_stride(
1227; CHECK-NEXT:  entry:
1228; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
1229; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
1230; CHECK-NEXT:    call void @init(i32* [[BASE]])
1231; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1232; CHECK:       vector.ph:
1233; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1234; CHECK:       vector.body:
1235; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE33:%.*]] ]
1236; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP148:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1237; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP149:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1238; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP150:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1239; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP151:%.*]], [[PRED_LOAD_CONTINUE33]] ]
1240; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1241; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1242; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
1243; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
1244; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
1245; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
1246; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
1247; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
1248; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
1249; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16
1250; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18
1251; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20
1252; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22
1253; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24
1254; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
1255; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
1256; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
1257; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1258; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1259; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1260; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1261; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1262; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1263; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1264; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1265; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1266; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1267; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1268; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1269; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1270; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1271; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1272; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1273; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1274; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1275; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1276; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1277; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1278; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1279; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1280; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1281; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1282; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1283; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1284; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1285; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1286; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1287; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1288; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1289; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1290; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1291; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1292; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1293; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1294; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1295; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1296; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1297; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1298; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1299; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1300; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1301; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1302; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1303; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1304; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1305; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0
1306; CHECK-NEXT:    br i1 [[TMP64]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1307; CHECK:       pred.load.if:
1308; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP0]]
1309; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[TMP65]], align 4
1310; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0
1311; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1312; CHECK:       pred.load.continue:
1313; CHECK-NEXT:    [[TMP68:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP67]], [[PRED_LOAD_IF]] ]
1314; CHECK-NEXT:    [[TMP69:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1
1315; CHECK-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]]
1316; CHECK:       pred.load.if4:
1317; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP1]]
1318; CHECK-NEXT:    [[TMP71:%.*]] = load i32, i32* [[TMP70]], align 4
1319; CHECK-NEXT:    [[TMP72:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP71]], i32 1
1320; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE5]]
1321; CHECK:       pred.load.continue5:
1322; CHECK-NEXT:    [[TMP73:%.*]] = phi <4 x i32> [ [[TMP68]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP72]], [[PRED_LOAD_IF4]] ]
1323; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2
1324; CHECK-NEXT:    br i1 [[TMP74]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
1325; CHECK:       pred.load.if6:
1326; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP2]]
1327; CHECK-NEXT:    [[TMP76:%.*]] = load i32, i32* [[TMP75]], align 4
1328; CHECK-NEXT:    [[TMP77:%.*]] = insertelement <4 x i32> [[TMP73]], i32 [[TMP76]], i32 2
1329; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
1330; CHECK:       pred.load.continue7:
1331; CHECK-NEXT:    [[TMP78:%.*]] = phi <4 x i32> [ [[TMP73]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP77]], [[PRED_LOAD_IF6]] ]
1332; CHECK-NEXT:    [[TMP79:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3
1333; CHECK-NEXT:    br i1 [[TMP79]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
1334; CHECK:       pred.load.if8:
1335; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP3]]
1336; CHECK-NEXT:    [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4
1337; CHECK-NEXT:    [[TMP82:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP81]], i32 3
1338; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
1339; CHECK:       pred.load.continue9:
1340; CHECK-NEXT:    [[TMP83:%.*]] = phi <4 x i32> [ [[TMP78]], [[PRED_LOAD_CONTINUE7]] ], [ [[TMP82]], [[PRED_LOAD_IF8]] ]
1341; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <4 x i1> [[TMP47]], i32 0
1342; CHECK-NEXT:    br i1 [[TMP84]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
1343; CHECK:       pred.load.if10:
1344; CHECK-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP4]]
1345; CHECK-NEXT:    [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4
1346; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <4 x i32> poison, i32 [[TMP86]], i32 0
1347; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
1348; CHECK:       pred.load.continue11:
1349; CHECK-NEXT:    [[TMP88:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP87]], [[PRED_LOAD_IF10]] ]
1350; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1
1351; CHECK-NEXT:    br i1 [[TMP89]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
1352; CHECK:       pred.load.if12:
1353; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP5]]
1354; CHECK-NEXT:    [[TMP91:%.*]] = load i32, i32* [[TMP90]], align 4
1355; CHECK-NEXT:    [[TMP92:%.*]] = insertelement <4 x i32> [[TMP88]], i32 [[TMP91]], i32 1
1356; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
1357; CHECK:       pred.load.continue13:
1358; CHECK-NEXT:    [[TMP93:%.*]] = phi <4 x i32> [ [[TMP88]], [[PRED_LOAD_CONTINUE11]] ], [ [[TMP92]], [[PRED_LOAD_IF12]] ]
1359; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <4 x i1> [[TMP47]], i32 2
1360; CHECK-NEXT:    br i1 [[TMP94]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
1361; CHECK:       pred.load.if14:
1362; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP6]]
1363; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
1364; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP96]], i32 2
1365; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
1366; CHECK:       pred.load.continue15:
1367; CHECK-NEXT:    [[TMP98:%.*]] = phi <4 x i32> [ [[TMP93]], [[PRED_LOAD_CONTINUE13]] ], [ [[TMP97]], [[PRED_LOAD_IF14]] ]
1368; CHECK-NEXT:    [[TMP99:%.*]] = extractelement <4 x i1> [[TMP47]], i32 3
1369; CHECK-NEXT:    br i1 [[TMP99]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
1370; CHECK:       pred.load.if16:
1371; CHECK-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP7]]
1372; CHECK-NEXT:    [[TMP101:%.*]] = load i32, i32* [[TMP100]], align 4
1373; CHECK-NEXT:    [[TMP102:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP101]], i32 3
1374; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
1375; CHECK:       pred.load.continue17:
1376; CHECK-NEXT:    [[TMP103:%.*]] = phi <4 x i32> [ [[TMP98]], [[PRED_LOAD_CONTINUE15]] ], [ [[TMP102]], [[PRED_LOAD_IF16]] ]
1377; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <4 x i1> [[TMP55]], i32 0
1378; CHECK-NEXT:    br i1 [[TMP104]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
1379; CHECK:       pred.load.if18:
1380; CHECK-NEXT:    [[TMP105:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP8]]
1381; CHECK-NEXT:    [[TMP106:%.*]] = load i32, i32* [[TMP105]], align 4
1382; CHECK-NEXT:    [[TMP107:%.*]] = insertelement <4 x i32> poison, i32 [[TMP106]], i32 0
1383; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
1384; CHECK:       pred.load.continue19:
1385; CHECK-NEXT:    [[TMP108:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP107]], [[PRED_LOAD_IF18]] ]
1386; CHECK-NEXT:    [[TMP109:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1
1387; CHECK-NEXT:    br i1 [[TMP109]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
1388; CHECK:       pred.load.if20:
1389; CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP9]]
1390; CHECK-NEXT:    [[TMP111:%.*]] = load i32, i32* [[TMP110]], align 4
1391; CHECK-NEXT:    [[TMP112:%.*]] = insertelement <4 x i32> [[TMP108]], i32 [[TMP111]], i32 1
1392; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
1393; CHECK:       pred.load.continue21:
1394; CHECK-NEXT:    [[TMP113:%.*]] = phi <4 x i32> [ [[TMP108]], [[PRED_LOAD_CONTINUE19]] ], [ [[TMP112]], [[PRED_LOAD_IF20]] ]
1395; CHECK-NEXT:    [[TMP114:%.*]] = extractelement <4 x i1> [[TMP55]], i32 2
1396; CHECK-NEXT:    br i1 [[TMP114]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
1397; CHECK:       pred.load.if22:
1398; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP10]]
1399; CHECK-NEXT:    [[TMP116:%.*]] = load i32, i32* [[TMP115]], align 4
1400; CHECK-NEXT:    [[TMP117:%.*]] = insertelement <4 x i32> [[TMP113]], i32 [[TMP116]], i32 2
1401; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
1402; CHECK:       pred.load.continue23:
1403; CHECK-NEXT:    [[TMP118:%.*]] = phi <4 x i32> [ [[TMP113]], [[PRED_LOAD_CONTINUE21]] ], [ [[TMP117]], [[PRED_LOAD_IF22]] ]
1404; CHECK-NEXT:    [[TMP119:%.*]] = extractelement <4 x i1> [[TMP55]], i32 3
1405; CHECK-NEXT:    br i1 [[TMP119]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
1406; CHECK:       pred.load.if24:
1407; CHECK-NEXT:    [[TMP120:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP11]]
1408; CHECK-NEXT:    [[TMP121:%.*]] = load i32, i32* [[TMP120]], align 4
1409; CHECK-NEXT:    [[TMP122:%.*]] = insertelement <4 x i32> [[TMP118]], i32 [[TMP121]], i32 3
1410; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
1411; CHECK:       pred.load.continue25:
1412; CHECK-NEXT:    [[TMP123:%.*]] = phi <4 x i32> [ [[TMP118]], [[PRED_LOAD_CONTINUE23]] ], [ [[TMP122]], [[PRED_LOAD_IF24]] ]
1413; CHECK-NEXT:    [[TMP124:%.*]] = extractelement <4 x i1> [[TMP63]], i32 0
1414; CHECK-NEXT:    br i1 [[TMP124]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
1415; CHECK:       pred.load.if26:
1416; CHECK-NEXT:    [[TMP125:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]]
1417; CHECK-NEXT:    [[TMP126:%.*]] = load i32, i32* [[TMP125]], align 4
1418; CHECK-NEXT:    [[TMP127:%.*]] = insertelement <4 x i32> poison, i32 [[TMP126]], i32 0
1419; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
1420; CHECK:       pred.load.continue27:
1421; CHECK-NEXT:    [[TMP128:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP127]], [[PRED_LOAD_IF26]] ]
1422; CHECK-NEXT:    [[TMP129:%.*]] = extractelement <4 x i1> [[TMP63]], i32 1
1423; CHECK-NEXT:    br i1 [[TMP129]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
1424; CHECK:       pred.load.if28:
1425; CHECK-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP13]]
1426; CHECK-NEXT:    [[TMP131:%.*]] = load i32, i32* [[TMP130]], align 4
1427; CHECK-NEXT:    [[TMP132:%.*]] = insertelement <4 x i32> [[TMP128]], i32 [[TMP131]], i32 1
1428; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
1429; CHECK:       pred.load.continue29:
1430; CHECK-NEXT:    [[TMP133:%.*]] = phi <4 x i32> [ [[TMP128]], [[PRED_LOAD_CONTINUE27]] ], [ [[TMP132]], [[PRED_LOAD_IF28]] ]
1431; CHECK-NEXT:    [[TMP134:%.*]] = extractelement <4 x i1> [[TMP63]], i32 2
1432; CHECK-NEXT:    br i1 [[TMP134]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
1433; CHECK:       pred.load.if30:
1434; CHECK-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP14]]
1435; CHECK-NEXT:    [[TMP136:%.*]] = load i32, i32* [[TMP135]], align 4
1436; CHECK-NEXT:    [[TMP137:%.*]] = insertelement <4 x i32> [[TMP133]], i32 [[TMP136]], i32 2
1437; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
1438; CHECK:       pred.load.continue31:
1439; CHECK-NEXT:    [[TMP138:%.*]] = phi <4 x i32> [ [[TMP133]], [[PRED_LOAD_CONTINUE29]] ], [ [[TMP137]], [[PRED_LOAD_IF30]] ]
1440; CHECK-NEXT:    [[TMP139:%.*]] = extractelement <4 x i1> [[TMP63]], i32 3
1441; CHECK-NEXT:    br i1 [[TMP139]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33]]
1442; CHECK:       pred.load.if32:
1443; CHECK-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP15]]
1444; CHECK-NEXT:    [[TMP141:%.*]] = load i32, i32* [[TMP140]], align 4
1445; CHECK-NEXT:    [[TMP142:%.*]] = insertelement <4 x i32> [[TMP138]], i32 [[TMP141]], i32 3
1446; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
1447; CHECK:       pred.load.continue33:
1448; CHECK-NEXT:    [[TMP143:%.*]] = phi <4 x i32> [ [[TMP138]], [[PRED_LOAD_CONTINUE31]] ], [ [[TMP142]], [[PRED_LOAD_IF32]] ]
1449; CHECK-NEXT:    [[TMP144:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1450; CHECK-NEXT:    [[TMP145:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1451; CHECK-NEXT:    [[TMP146:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1452; CHECK-NEXT:    [[TMP147:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1453; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP83]], <4 x i32> zeroinitializer
1454; CHECK-NEXT:    [[PREDPHI34:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer
1455; CHECK-NEXT:    [[PREDPHI35:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP123]], <4 x i32> zeroinitializer
1456; CHECK-NEXT:    [[PREDPHI36:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP143]], <4 x i32> zeroinitializer
1457; CHECK-NEXT:    [[TMP148]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1458; CHECK-NEXT:    [[TMP149]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI34]]
1459; CHECK-NEXT:    [[TMP150]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI35]]
1460; CHECK-NEXT:    [[TMP151]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI36]]
1461; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1462; CHECK-NEXT:    [[TMP152:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2048
1463; CHECK-NEXT:    br i1 [[TMP152]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1464; CHECK:       middle.block:
1465; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP149]], [[TMP148]]
1466; CHECK-NEXT:    [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP150]], [[BIN_RDX]]
1467; CHECK-NEXT:    [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP151]], [[BIN_RDX37]]
1468; CHECK-NEXT:    [[TMP153:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]])
1469; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 2048, 2048
1470; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1471; CHECK:       scalar.ph:
1472; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1473; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ]
1474; CHECK-NEXT:    br label [[LOOP:%.*]]
1475; CHECK:       loop:
1476; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1477; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1478; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 2
1479; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1480; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1481; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1482; CHECK:       pred:
1483; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1484; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1485; CHECK-NEXT:    br label [[LATCH]]
1486; CHECK:       latch:
1487; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1488; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1489; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4093
1490; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
1491; CHECK:       loop_exit:
1492; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ]
1493; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1494;
1495entry:
1496  %alloca = alloca [4096 x i32]
1497  %base = bitcast [4096 x i32]* %alloca to i32*
1498  call void @init(i32* %base)
1499  br label %loop
1500loop:
1501  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
1502  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1503  %iv.next = add i64 %iv, 2
1504  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1505  %earlycnd = load i1, i1* %test_addr
1506  br i1 %earlycnd, label %pred, label %latch
1507pred:
1508  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1509  %val = load i32, i32* %addr
1510  br label %latch
1511latch:
1512  %val.phi = phi i32 [0, %loop], [%val, %pred]
1513  %accum.next = add i32 %accum, %val.phi
1514  %exit = icmp ugt i64 %iv, 4093
1515  br i1 %exit, label %loop_exit, label %loop
1516
1517loop_exit:
1518  ret i32 %accum.next
1519}
1520
1521define i32 @neg_off_by_many(i64 %len, i1* %test_base) {
1522; CHECK-LABEL: @neg_off_by_many(
1523; CHECK-NEXT:  entry:
1524; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [1024 x i32], align 4
1525; CHECK-NEXT:    [[BASE:%.*]] = bitcast [1024 x i32]* [[ALLOCA]] to i32*
1526; CHECK-NEXT:    call void @init(i32* [[BASE]])
1527; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1528; CHECK:       vector.ph:
1529; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1530; CHECK:       vector.body:
1531; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1532; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1533; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1534; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1535; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1536; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1537; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1538; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1539; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1540; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1541; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1542; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1543; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1544; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1545; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1546; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1547; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1548; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1549; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1550; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1551; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1552; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1553; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1554; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1555; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1556; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1557; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1558; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1559; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1560; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1561; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1562; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1563; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1564; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1565; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1566; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1567; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1568; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1569; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1570; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1571; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1572; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1573; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1574; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1575; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1576; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1577; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1578; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1579; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1580; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1581; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1582; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1583; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1584; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1585; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1586; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1587; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1588; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1589; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1590; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1591; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1592; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1593; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1594; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1595; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1596; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1597; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1598; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1599; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1600; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1601; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1602; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1603; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1604; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1605; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1606; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1607; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1608; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1609; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1610; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1611; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1612; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1613; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1614; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1615; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1616; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1617; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1618; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1619; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1620; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1621; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1622; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1623; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1624; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1625; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1626; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1627; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1628; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1629; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1630; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1631; CHECK:       middle.block:
1632; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1633; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1634; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1635; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1636; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1637; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1638; CHECK:       scalar.ph:
1639; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1640; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1641; CHECK-NEXT:    br label [[LOOP:%.*]]
1642; CHECK:       loop:
1643; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1644; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1645; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1646; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1647; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1648; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1649; CHECK:       pred:
1650; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1651; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1652; CHECK-NEXT:    br label [[LATCH]]
1653; CHECK:       latch:
1654; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1655; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1656; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1657; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
1658; CHECK:       loop_exit:
1659; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1660; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1661;
1662entry:
1663  %alloca = alloca [1024 x i32]
1664  %base = bitcast [1024 x i32]* %alloca to i32*
1665  call void @init(i32* %base)
1666  br label %loop
1667loop:
1668  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
1669  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1670  %iv.next = add i64 %iv, 1
1671  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1672  %earlycnd = load i1, i1* %test_addr
1673  br i1 %earlycnd, label %pred, label %latch
1674pred:
1675  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1676  %val = load i32, i32* %addr
1677  br label %latch
1678latch:
1679  %val.phi = phi i32 [0, %loop], [%val, %pred]
1680  %accum.next = add i32 %accum, %val.phi
1681  %exit = icmp ugt i64 %iv, 4094
1682  br i1 %exit, label %loop_exit, label %loop
1683
1684loop_exit:
1685  ret i32 %accum.next
1686}
1687
1688define i32 @neg_off_by_one_iteration(i64 %len, i1* %test_base) {
1689; CHECK-LABEL: @neg_off_by_one_iteration(
1690; CHECK-NEXT:  entry:
1691; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4095 x i32], align 4
1692; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4095 x i32]* [[ALLOCA]] to i32*
1693; CHECK-NEXT:    call void @init(i32* [[BASE]])
1694; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1695; CHECK:       vector.ph:
1696; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1697; CHECK:       vector.body:
1698; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1699; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1700; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1701; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1702; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1703; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1704; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1705; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1706; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1707; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1708; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1709; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1710; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1711; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1712; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1713; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1714; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1715; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1716; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1717; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1718; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1719; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1720; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1721; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1722; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1723; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1724; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1725; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1726; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1727; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1728; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1729; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1730; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1731; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1732; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1733; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1734; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1735; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1736; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1737; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1738; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1739; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1740; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1741; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1742; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1743; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1744; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1745; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1746; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1747; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1748; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1749; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1750; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1751; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1752; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1753; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1754; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1755; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1756; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1757; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1758; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1759; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1760; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1761; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1762; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1763; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1764; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1765; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1766; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1767; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1768; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1769; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1770; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1771; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1772; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1773; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1774; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1775; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1776; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1777; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1778; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1779; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1780; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1781; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1782; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1783; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1784; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1785; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1786; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1787; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1788; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1789; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1790; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1791; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1792; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1793; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1794; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1795; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1796; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1797; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1798; CHECK:       middle.block:
1799; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1800; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1801; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1802; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1803; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1804; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1805; CHECK:       scalar.ph:
1806; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1807; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1808; CHECK-NEXT:    br label [[LOOP:%.*]]
1809; CHECK:       loop:
1810; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1811; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1812; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1813; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1814; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1815; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1816; CHECK:       pred:
1817; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1818; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1819; CHECK-NEXT:    br label [[LATCH]]
1820; CHECK:       latch:
1821; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1822; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1823; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1824; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
1825; CHECK:       loop_exit:
1826; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1827; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1828;
1829entry:
1830  %alloca = alloca [4095 x i32]
1831  %base = bitcast [4095 x i32]* %alloca to i32*
1832  call void @init(i32* %base)
1833  br label %loop
1834loop:
1835  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
1836  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
1837  %iv.next = add i64 %iv, 1
1838  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
1839  %earlycnd = load i1, i1* %test_addr
1840  br i1 %earlycnd, label %pred, label %latch
1841pred:
1842  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
1843  %val = load i32, i32* %addr
1844  br label %latch
1845latch:
1846  %val.phi = phi i32 [0, %loop], [%val, %pred]
1847  %accum.next = add i32 %accum, %val.phi
1848  %exit = icmp ugt i64 %iv, 4094
1849  br i1 %exit, label %loop_exit, label %loop
1850
1851loop_exit:
1852  ret i32 %accum.next
1853}
1854
1855define i32 @neg_off_by_one_byte(i64 %len, i1* %test_base) {
1856; CHECK-LABEL: @neg_off_by_one_byte(
1857; CHECK-NEXT:  entry:
1858; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [16383 x i8], align 1
1859; CHECK-NEXT:    [[BASE:%.*]] = bitcast [16383 x i8]* [[ALLOCA]] to i32*
1860; CHECK-NEXT:    call void @init(i32* [[BASE]])
1861; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1862; CHECK:       vector.ph:
1863; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1864; CHECK:       vector.body:
1865; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1866; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
1867; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
1868; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
1869; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
1870; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1871; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1872; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1873; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1874; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
1875; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
1876; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
1877; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
1878; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
1879; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
1880; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
1881; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
1882; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
1883; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
1884; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
1885; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
1886; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
1887; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
1888; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
1889; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
1890; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
1891; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
1892; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
1893; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
1894; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
1895; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
1896; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
1897; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
1898; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
1899; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
1900; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
1901; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
1902; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
1903; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
1904; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
1905; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
1906; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
1907; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
1908; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
1909; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
1910; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
1911; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
1912; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
1913; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
1914; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
1915; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
1916; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
1917; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
1918; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
1919; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
1920; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
1921; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
1922; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
1923; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
1924; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
1925; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
1926; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
1927; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
1928; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
1929; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
1930; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
1931; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
1932; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
1933; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
1934; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
1935; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
1936; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
1937; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
1938; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
1939; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1940; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
1941; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
1942; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
1943; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
1944; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
1945; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
1946; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
1947; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
1948; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
1949; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
1950; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
1951; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
1952; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
1953; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
1954; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
1955; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
1956; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
1957; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
1958; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
1959; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
1960; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
1961; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
1962; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1963; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
1964; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1965; CHECK:       middle.block:
1966; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
1967; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
1968; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
1969; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
1970; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
1971; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
1972; CHECK:       scalar.ph:
1973; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1974; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1975; CHECK-NEXT:    br label [[LOOP:%.*]]
1976; CHECK:       loop:
1977; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1978; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
1979; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
1980; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
1981; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
1982; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
1983; CHECK:       pred:
1984; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
1985; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
1986; CHECK-NEXT:    br label [[LATCH]]
1987; CHECK:       latch:
1988; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
1989; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
1990; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
1991; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
1992; CHECK:       loop_exit:
1993; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
1994; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
1995;
1996entry:
1997  %alloca = alloca [16383 x i8]
1998  %base = bitcast [16383 x i8]* %alloca to i32*
1999  call void @init(i32* %base)
2000  br label %loop
2001loop:
2002  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2003  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2004  %iv.next = add i64 %iv, 1
2005  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2006  %earlycnd = load i1, i1* %test_addr
2007  br i1 %earlycnd, label %pred, label %latch
2008pred:
2009  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2010  %val = load i32, i32* %addr
2011  br label %latch
2012latch:
2013  %val.phi = phi i32 [0, %loop], [%val, %pred]
2014  %accum.next = add i32 %accum, %val.phi
2015  %exit = icmp ugt i64 %iv, 4094
2016  br i1 %exit, label %loop_exit, label %loop
2017
2018loop_exit:
2019  ret i32 %accum.next
2020}
2021
2022
2023; Show that we handle case where exit count is non-constant, but that we
2024; have a constant bound on it which is sufficient to show dereferenceability.
2025define i32 @test_constant_max(i64 %len, i1* %test_base) {
2026; CHECK-LABEL: @test_constant_max(
2027; CHECK-NEXT:  entry:
2028; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [4096 x i32], align 4
2029; CHECK-NEXT:    [[BASE:%.*]] = bitcast [4096 x i32]* [[ALLOCA]] to i32*
2030; CHECK-NEXT:    call void @init(i32* [[BASE]])
2031; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[LEN:%.*]], 4094
2032; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 4094, i64 [[LEN]]
2033; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[MIN]], 2
2034; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
2035; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2036; CHECK:       vector.ph:
2037; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
2038; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
2039; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2040; CHECK:       vector.body:
2041; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2042; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2043; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2044; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2045; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP84:%.*]], [[VECTOR_BODY]] ]
2046; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
2047; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2048; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 2
2049; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 3
2050; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 4
2051; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 5
2052; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 6
2053; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 7
2054; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 8
2055; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 9
2056; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 10
2057; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 11
2058; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 12
2059; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 13
2060; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 14
2061; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 15
2062; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP1]]
2063; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2064; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2065; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2066; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2067; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2068; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2069; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2070; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2071; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2072; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2073; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2074; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2075; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2076; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2077; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP16]]
2078; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2079; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2080; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2081; CHECK-NEXT:    [[TMP36:%.*]] = load i1, i1* [[TMP20]], align 1
2082; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> poison, i1 [[TMP33]], i32 0
2083; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 1
2084; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 2
2085; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <4 x i1> [[TMP39]], i1 [[TMP36]], i32 3
2086; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2087; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2088; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2089; CHECK-NEXT:    [[TMP44:%.*]] = load i1, i1* [[TMP24]], align 1
2090; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> poison, i1 [[TMP41]], i32 0
2091; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 1
2092; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 2
2093; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <4 x i1> [[TMP47]], i1 [[TMP44]], i32 3
2094; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2095; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2096; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2097; CHECK-NEXT:    [[TMP52:%.*]] = load i1, i1* [[TMP28]], align 1
2098; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> poison, i1 [[TMP49]], i32 0
2099; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 1
2100; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 2
2101; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <4 x i1> [[TMP55]], i1 [[TMP52]], i32 3
2102; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2103; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2104; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2105; CHECK-NEXT:    [[TMP60:%.*]] = load i1, i1* [[TMP32]], align 1
2106; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> poison, i1 [[TMP57]], i32 0
2107; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
2108; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
2109; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
2110; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP1]]
2111; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP5]]
2112; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP9]]
2113; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP13]]
2114; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr i32, i32* [[TMP65]], i32 0
2115; CHECK-NEXT:    [[TMP70:%.*]] = bitcast i32* [[TMP69]] to <4 x i32>*
2116; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP70]], align 4
2117; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr i32, i32* [[TMP65]], i32 4
2118; CHECK-NEXT:    [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
2119; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP72]], align 4
2120; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr i32, i32* [[TMP65]], i32 8
2121; CHECK-NEXT:    [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
2122; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
2123; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr i32, i32* [[TMP65]], i32 12
2124; CHECK-NEXT:    [[TMP76:%.*]] = bitcast i32* [[TMP75]] to <4 x i32>*
2125; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP76]], align 4
2126; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
2127; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP48]], <i1 true, i1 true, i1 true, i1 true>
2128; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP56]], <i1 true, i1 true, i1 true, i1 true>
2129; CHECK-NEXT:    [[TMP80:%.*]] = xor <4 x i1> [[TMP64]], <i1 true, i1 true, i1 true, i1 true>
2130; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP40]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer
2131; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer
2132; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP56]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer
2133; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP64]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer
2134; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2135; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2136; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2137; CHECK-NEXT:    [[TMP84]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2138; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2139; CHECK-NEXT:    [[TMP85:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2140; CHECK-NEXT:    br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
2141; CHECK:       middle.block:
2142; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP82]], [[TMP81]]
2143; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX]]
2144; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP84]], [[BIN_RDX10]]
2145; CHECK-NEXT:    [[TMP86:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2146; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
2147; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2148; CHECK:       scalar.ph:
2149; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2150; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
2151; CHECK-NEXT:    br label [[LOOP:%.*]]
2152; CHECK:       loop:
2153; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2154; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2155; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2156; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2157; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2158; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2159; CHECK:       pred:
2160; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2161; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2162; CHECK-NEXT:    br label [[LATCH]]
2163; CHECK:       latch:
2164; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2165; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2166; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], [[MIN]]
2167; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
2168; CHECK:       loop_exit:
2169; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP86]], [[MIDDLE_BLOCK]] ]
2170; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2171;
2172entry:
2173  %alloca = alloca [4096 x i32]
2174  %base = bitcast [4096 x i32]* %alloca to i32*
2175  call void @init(i32* %base)
2176  %cmp = icmp ugt i64 %len, 4094
2177  %min = select i1 %cmp, i64 4094, i64 %len
2178  br label %loop
2179loop:
2180  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2181  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2182  %iv.next = add i64 %iv, 1
2183  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2184  %earlycnd = load i1, i1* %test_addr
2185  br i1 %earlycnd, label %pred, label %latch
2186pred:
2187  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2188  %val = load i32, i32* %addr
2189  br label %latch
2190latch:
2191  %val.phi = phi i32 [0, %loop], [%val, %pred]
2192  %accum.next = add i32 %accum, %val.phi
2193  %exit = icmp ugt i64 %iv, %min
2194  br i1 %exit, label %loop_exit, label %loop
2195
2196loop_exit:
2197  ret i32 %accum.next
2198}
2199
2200
2201;; Model a custom allocate which allocates in chunks of 8 bytes
2202declare align 8 dereferenceable_or_null(8) i8* @my_alloc(i32) allocsize(0)
2203declare align 8 dereferenceable_or_null(8) i8* @my_array_alloc(i32, i32) allocsize(0, 1)
2204
2205define i32 @test_allocsize(i64 %len, i1* %test_base) nofree nosync {
2206; CHECK-LABEL: @test_allocsize(
2207; CHECK-NEXT:  entry:
2208; CHECK-NEXT:    [[ALLOCATION:%.*]] = call nonnull i8* @my_alloc(i32 16384)
2209; CHECK-NEXT:    [[BASE:%.*]] = bitcast i8* [[ALLOCATION]] to i32*
2210; CHECK-NEXT:    call void @init(i32* [[BASE]])
2211; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2212; CHECK:       vector.ph:
2213; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2214; CHECK:       vector.body:
2215; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2216; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
2217; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2218; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2219; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2220; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2221; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
2222; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
2223; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
2224; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
2225; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
2226; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
2227; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
2228; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
2229; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
2230; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
2231; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
2232; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
2233; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
2234; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
2235; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
2236; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
2237; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
2238; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2239; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2240; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2241; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2242; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2243; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2244; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2245; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2246; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2247; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2248; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2249; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2250; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2251; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2252; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
2253; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2254; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2255; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2256; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
2257; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
2258; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
2259; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
2260; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
2261; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2262; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2263; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2264; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
2265; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
2266; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
2267; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
2268; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
2269; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2270; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2271; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2272; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
2273; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
2274; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
2275; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
2276; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
2277; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2278; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2279; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2280; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
2281; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
2282; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
2283; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
2284; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
2285; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
2286; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
2287; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
2288; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
2289; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
2290; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
2291; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
2292; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
2293; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
2294; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
2295; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
2296; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
2297; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
2298; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
2299; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
2300; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
2301; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
2302; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
2303; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
2304; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
2305; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
2306; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
2307; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
2308; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2309; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2310; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2311; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2312; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2313; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
2314; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2315; CHECK:       middle.block:
2316; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
2317; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
2318; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
2319; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2320; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
2321; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2322; CHECK:       scalar.ph:
2323; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2324; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2325; CHECK-NEXT:    br label [[LOOP:%.*]]
2326; CHECK:       loop:
2327; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2328; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2329; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2330; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2331; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2332; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2333; CHECK:       pred:
2334; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2335; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2336; CHECK-NEXT:    br label [[LATCH]]
2337; CHECK:       latch:
2338; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2339; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2340; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
2341; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
2342; CHECK:       loop_exit:
2343; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2344; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2345;
2346entry:
2347  %allocation = call nonnull i8* @my_alloc(i32 16384)
2348  %base = bitcast i8* %allocation to i32*
2349  call void @init(i32* %base)
2350  br label %loop
2351loop:
2352  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2353  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2354  %iv.next = add i64 %iv, 1
2355  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2356  %earlycnd = load i1, i1* %test_addr
2357  br i1 %earlycnd, label %pred, label %latch
2358pred:
2359  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2360  %val = load i32, i32* %addr
2361  br label %latch
2362latch:
2363  %val.phi = phi i32 [0, %loop], [%val, %pred]
2364  %accum.next = add i32 %accum, %val.phi
2365  %exit = icmp ugt i64 %iv, 4094
2366  br i1 %exit, label %loop_exit, label %loop
2367
2368loop_exit:
2369  ret i32 %accum.next
2370}
2371
2372
2373define i32 @test_allocsize_array(i64 %len, i1* %test_base) nofree nosync {
2374; CHECK-LABEL: @test_allocsize_array(
2375; CHECK-NEXT:  entry:
2376; CHECK-NEXT:    [[ALLOCATION:%.*]] = call nonnull i8* @my_array_alloc(i32 4096, i32 4)
2377; CHECK-NEXT:    [[BASE:%.*]] = bitcast i8* [[ALLOCATION]] to i32*
2378; CHECK-NEXT:    call void @init(i32* [[BASE]])
2379; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2380; CHECK:       vector.ph:
2381; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2382; CHECK:       vector.body:
2383; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2384; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
2385; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2386; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2387; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2388; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2389; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
2390; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
2391; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
2392; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
2393; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
2394; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
2395; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
2396; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
2397; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
2398; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
2399; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
2400; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
2401; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
2402; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
2403; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
2404; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
2405; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
2406; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2407; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2408; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2409; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2410; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2411; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2412; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2413; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2414; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2415; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2416; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2417; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2418; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2419; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2420; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
2421; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2422; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2423; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2424; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
2425; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
2426; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
2427; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
2428; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
2429; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2430; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2431; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2432; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
2433; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
2434; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
2435; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
2436; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
2437; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2438; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2439; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2440; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
2441; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
2442; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
2443; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
2444; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
2445; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2446; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2447; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2448; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
2449; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
2450; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
2451; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
2452; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
2453; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
2454; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
2455; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
2456; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
2457; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
2458; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
2459; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
2460; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
2461; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
2462; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
2463; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
2464; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
2465; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
2466; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
2467; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
2468; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
2469; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
2470; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
2471; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
2472; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
2473; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
2474; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
2475; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
2476; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2477; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2478; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2479; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2480; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2481; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
2482; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2483; CHECK:       middle.block:
2484; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
2485; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
2486; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
2487; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2488; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
2489; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2490; CHECK:       scalar.ph:
2491; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2492; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2493; CHECK-NEXT:    br label [[LOOP:%.*]]
2494; CHECK:       loop:
2495; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2496; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2497; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2498; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2499; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2500; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2501; CHECK:       pred:
2502; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2503; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2504; CHECK-NEXT:    br label [[LATCH]]
2505; CHECK:       latch:
2506; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2507; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2508; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
2509; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
2510; CHECK:       loop_exit:
2511; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2512; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2513;
2514entry:
2515  %allocation = call nonnull i8* @my_array_alloc(i32 4096, i32 4)
2516  %base = bitcast i8* %allocation to i32*
2517  call void @init(i32* %base)
2518  br label %loop
2519loop:
2520  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
2521  %accum = phi i32 [ 0, %entry ], [ %accum.next, %latch ]
2522  %iv.next = add i64 %iv, 1
2523  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2524  %earlycnd = load i1, i1* %test_addr
2525  br i1 %earlycnd, label %pred, label %latch
2526pred:
2527  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2528  %val = load i32, i32* %addr
2529  br label %latch
2530latch:
2531  %val.phi = phi i32 [0, %loop], [%val, %pred]
2532  %accum.next = add i32 %accum, %val.phi
2533  %exit = icmp ugt i64 %iv, 4094
2534  br i1 %exit, label %loop_exit, label %loop
2535
2536loop_exit:
2537  ret i32 %accum.next
2538}
2539
2540declare void @my_free(i8*)
2541
2542; For the point in time variant of deref(N) semantics, show a negative
2543; example where hoisting without explicit predication might introduce a
2544; dynamic use after free.  (e.g. allzero is true when all elements of the
2545; test vector are false and thus base is never accessed.)
2546define i32 @test_allocsize_cond_deref(i1 %allzero, i1* %test_base) {
2547; CHECK-LABEL: @test_allocsize_cond_deref(
2548; CHECK-NEXT:  entry:
2549; CHECK-NEXT:    [[ALLOCATION:%.*]] = call nonnull i8* @my_alloc(i32 16384)
2550; CHECK-NEXT:    [[BASE:%.*]] = bitcast i8* [[ALLOCATION]] to i32*
2551; CHECK-NEXT:    call void @init(i32* [[BASE]])
2552; CHECK-NEXT:    br i1 [[ALLZERO:%.*]], label [[FREEIT:%.*]], label [[PREHEADER:%.*]]
2553; CHECK:       freeit:
2554; CHECK-NEXT:    call void @my_free(i8* [[ALLOCATION]])
2555; CHECK-NEXT:    br label [[PREHEADER]]
2556; CHECK:       preheader:
2557; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2558; CHECK:       vector.ph:
2559; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2560; CHECK:       vector.body:
2561; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2562; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP80:%.*]], [[VECTOR_BODY]] ]
2563; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP81:%.*]], [[VECTOR_BODY]] ]
2564; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP82:%.*]], [[VECTOR_BODY]] ]
2565; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP83:%.*]], [[VECTOR_BODY]] ]
2566; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2567; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
2568; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
2569; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
2570; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
2571; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
2572; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
2573; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
2574; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 8
2575; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 9
2576; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 10
2577; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 11
2578; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 12
2579; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 13
2580; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 14
2581; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 15
2582; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE:%.*]], i64 [[TMP0]]
2583; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP1]]
2584; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP2]]
2585; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP3]]
2586; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP4]]
2587; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP5]]
2588; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP6]]
2589; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP7]]
2590; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP8]]
2591; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP9]]
2592; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP10]]
2593; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP11]]
2594; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP12]]
2595; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP13]]
2596; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP14]]
2597; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[TMP15]]
2598; CHECK-NEXT:    [[TMP32:%.*]] = load i1, i1* [[TMP16]], align 1
2599; CHECK-NEXT:    [[TMP33:%.*]] = load i1, i1* [[TMP17]], align 1
2600; CHECK-NEXT:    [[TMP34:%.*]] = load i1, i1* [[TMP18]], align 1
2601; CHECK-NEXT:    [[TMP35:%.*]] = load i1, i1* [[TMP19]], align 1
2602; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP32]], i32 0
2603; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i1> [[TMP36]], i1 [[TMP33]], i32 1
2604; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i1> [[TMP37]], i1 [[TMP34]], i32 2
2605; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x i1> [[TMP38]], i1 [[TMP35]], i32 3
2606; CHECK-NEXT:    [[TMP40:%.*]] = load i1, i1* [[TMP20]], align 1
2607; CHECK-NEXT:    [[TMP41:%.*]] = load i1, i1* [[TMP21]], align 1
2608; CHECK-NEXT:    [[TMP42:%.*]] = load i1, i1* [[TMP22]], align 1
2609; CHECK-NEXT:    [[TMP43:%.*]] = load i1, i1* [[TMP23]], align 1
2610; CHECK-NEXT:    [[TMP44:%.*]] = insertelement <4 x i1> poison, i1 [[TMP40]], i32 0
2611; CHECK-NEXT:    [[TMP45:%.*]] = insertelement <4 x i1> [[TMP44]], i1 [[TMP41]], i32 1
2612; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i1> [[TMP45]], i1 [[TMP42]], i32 2
2613; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i1> [[TMP46]], i1 [[TMP43]], i32 3
2614; CHECK-NEXT:    [[TMP48:%.*]] = load i1, i1* [[TMP24]], align 1
2615; CHECK-NEXT:    [[TMP49:%.*]] = load i1, i1* [[TMP25]], align 1
2616; CHECK-NEXT:    [[TMP50:%.*]] = load i1, i1* [[TMP26]], align 1
2617; CHECK-NEXT:    [[TMP51:%.*]] = load i1, i1* [[TMP27]], align 1
2618; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x i1> poison, i1 [[TMP48]], i32 0
2619; CHECK-NEXT:    [[TMP53:%.*]] = insertelement <4 x i1> [[TMP52]], i1 [[TMP49]], i32 1
2620; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x i1> [[TMP53]], i1 [[TMP50]], i32 2
2621; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i1> [[TMP54]], i1 [[TMP51]], i32 3
2622; CHECK-NEXT:    [[TMP56:%.*]] = load i1, i1* [[TMP28]], align 1
2623; CHECK-NEXT:    [[TMP57:%.*]] = load i1, i1* [[TMP29]], align 1
2624; CHECK-NEXT:    [[TMP58:%.*]] = load i1, i1* [[TMP30]], align 1
2625; CHECK-NEXT:    [[TMP59:%.*]] = load i1, i1* [[TMP31]], align 1
2626; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <4 x i1> poison, i1 [[TMP56]], i32 0
2627; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
2628; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
2629; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
2630; CHECK-NEXT:    [[TMP64:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP0]]
2631; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP4]]
2632; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP8]]
2633; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[TMP12]]
2634; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr i32, i32* [[TMP64]], i32 0
2635; CHECK-NEXT:    [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
2636; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison)
2637; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr i32, i32* [[TMP64]], i32 4
2638; CHECK-NEXT:    [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>*
2639; CHECK-NEXT:    [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison)
2640; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr i32, i32* [[TMP64]], i32 8
2641; CHECK-NEXT:    [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>*
2642; CHECK-NEXT:    [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison)
2643; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr i32, i32* [[TMP64]], i32 12
2644; CHECK-NEXT:    [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>*
2645; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison)
2646; CHECK-NEXT:    [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], <i1 true, i1 true, i1 true, i1 true>
2647; CHECK-NEXT:    [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], <i1 true, i1 true, i1 true, i1 true>
2648; CHECK-NEXT:    [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], <i1 true, i1 true, i1 true, i1 true>
2649; CHECK-NEXT:    [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], <i1 true, i1 true, i1 true, i1 true>
2650; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer
2651; CHECK-NEXT:    [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer
2652; CHECK-NEXT:    [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer
2653; CHECK-NEXT:    [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer
2654; CHECK-NEXT:    [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]]
2655; CHECK-NEXT:    [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]]
2656; CHECK-NEXT:    [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]
2657; CHECK-NEXT:    [[TMP83]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI9]]
2658; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
2659; CHECK-NEXT:    [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
2660; CHECK-NEXT:    br i1 [[TMP84]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2661; CHECK:       middle.block:
2662; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP81]], [[TMP80]]
2663; CHECK-NEXT:    [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP82]], [[BIN_RDX]]
2664; CHECK-NEXT:    [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP83]], [[BIN_RDX10]]
2665; CHECK-NEXT:    [[TMP85:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]])
2666; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
2667; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
2668; CHECK:       scalar.ph:
2669; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[PREHEADER]] ]
2670; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2671; CHECK-NEXT:    br label [[LOOP:%.*]]
2672; CHECK:       loop:
2673; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
2674; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ]
2675; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
2676; CHECK-NEXT:    [[TEST_ADDR:%.*]] = getelementptr inbounds i1, i1* [[TEST_BASE]], i64 [[IV]]
2677; CHECK-NEXT:    [[EARLYCND:%.*]] = load i1, i1* [[TEST_ADDR]], align 1
2678; CHECK-NEXT:    br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]]
2679; CHECK:       pred:
2680; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[IV]]
2681; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[ADDR]], align 4
2682; CHECK-NEXT:    br label [[LATCH]]
2683; CHECK:       latch:
2684; CHECK-NEXT:    [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ]
2685; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]]
2686; CHECK-NEXT:    [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094
2687; CHECK-NEXT:    br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
2688; CHECK:       loop_exit:
2689; CHECK-NEXT:    [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP85]], [[MIDDLE_BLOCK]] ]
2690; CHECK-NEXT:    ret i32 [[ACCUM_NEXT_LCSSA]]
2691;
2692entry:
2693  %allocation = call nonnull i8* @my_alloc(i32 16384)
2694  %base = bitcast i8* %allocation to i32*
2695  call void @init(i32* %base)
2696  br i1 %allzero, label %freeit, label %preheader
2697freeit:
2698  call void @my_free(i8* %allocation)
2699  br label %preheader
2700preheader:
2701  br label %loop
2702loop:
2703  %iv = phi i64 [ 0, %preheader ], [ %iv.next, %latch ]
2704  %accum = phi i32 [ 0, %preheader ], [ %accum.next, %latch ]
2705  %iv.next = add i64 %iv, 1
2706  %test_addr = getelementptr inbounds i1, i1* %test_base, i64 %iv
2707  %earlycnd = load i1, i1* %test_addr
2708  br i1 %earlycnd, label %pred, label %latch
2709pred:
2710  %addr = getelementptr inbounds i32, i32* %base, i64 %iv
2711  %val = load i32, i32* %addr
2712  br label %latch
2713latch:
2714  %val.phi = phi i32 [0, %loop], [%val, %pred]
2715  %accum.next = add i32 %accum, %val.phi
2716  %exit = icmp ugt i64 %iv, 4094
2717  br i1 %exit, label %loop_exit, label %loop
2718
2719loop_exit:
2720  ret i32 %accum.next
2721}
2722