1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -loop-vectorize -S | FileCheck %s
3target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-apple-macosx10.11.0"
5
6; This test checks vector GEP before scatter.
7; The code bellow crashed due to destroyed SSA while incorrect vectorization of
8; the GEP.
9
10@d = global [10 x [10 x i32]] zeroinitializer, align 16
11@c = external global i32, align 4
12@a = external global i32, align 4
13@b = external global i64, align 8
14
15; Function Attrs: norecurse nounwind ssp uwtable
16define void @_Z3fn1v() #0 {
17; CHECK-LABEL: @_Z3fn1v(
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ]
20; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [
21; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <16 x i64> [
22; CHECK-NEXT:    [[SHL:%.*]] = shl i64 %index, 1
23; CHECK-NEXT:    %offset.idx = add i64 [[SHL]], 8
24; CHECK-NEXT:    [[IND00:%.*]] = add i64 %offset.idx, 0
25; CHECK-NEXT:    [[IND02:%.*]] = add i64 %offset.idx, 2
26; CHECK-NEXT:    [[IND04:%.*]] = add i64 %offset.idx, 4
27; CHECK-NEXT:    [[IND06:%.*]] = add i64 %offset.idx, 6
28; CHECK-NEXT:    [[IND08:%.*]] = add i64 %offset.idx, 8
29; CHECK-NEXT:    [[IND10:%.*]] = add i64 %offset.idx, 10
30; CHECK-NEXT:    [[IND12:%.*]] = add i64 %offset.idx, 12
31; CHECK-NEXT:    [[IND14:%.*]] = add i64 %offset.idx, 14
32; CHECK-NEXT:    [[IND16:%.*]] = add i64 %offset.idx, 16
33; CHECK-NEXT:    [[IND18:%.*]] = add i64 %offset.idx, 18
34; CHECK-NEXT:    [[IND20:%.*]] = add i64 %offset.idx, 20
35; CHECK-NEXT:    [[IND22:%.*]] = add i64 %offset.idx, 22
36; CHECK-NEXT:    [[IND24:%.*]] = add i64 %offset.idx, 24
37; CHECK-NEXT:    [[IND26:%.*]] = add i64 %offset.idx, 26
38; CHECK-NEXT:    [[IND28:%.*]] = add i64 %offset.idx, 28
39; CHECK-NEXT:    [[IND30:%.*]] = add i64 %offset.idx, 30
40; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
41; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND00]]
42; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND02]]
43; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND04]]
44; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND06]]
45; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND08]]
46; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND10]]
47; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND12]]
48; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND14]]
49; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND16]]
50; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND18]]
51; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND20]]
52; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND22]]
53; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND24]]
54; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]]
55; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]]
56; CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]]
57; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0
58; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1
59; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2
60; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3
61; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4
62; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5
63; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6
64; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7
65; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8
66; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9
67; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10
68; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11
69; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12
70; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13
71; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14
72; CHECK-NEXT:    [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15
73; CHECK-NEXT:    [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
74; CHECK-NEXT:    [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0
75; CHECK-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP12]], i64 [[TMP61]], i64 0
76; CHECK-NEXT:    [[TMP65:%.*]] = extractelement <16 x i64> [[TMP59]], i32 1
77; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP15]], i64 [[TMP65]], i64 0
78; CHECK-NEXT:    [[TMP69:%.*]] = extractelement <16 x i64> [[TMP59]], i32 2
79; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP18]], i64 [[TMP69]], i64 0
80; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <16 x i64> [[TMP59]], i32 3
81; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP21]], i64 [[TMP73]], i64 0
82; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <16 x i64> [[TMP59]], i32 4
83; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP24]], i64 [[TMP77]], i64 0
84; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <16 x i64> [[TMP59]], i32 5
85; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP27]], i64 [[TMP81]], i64 0
86; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <16 x i64> [[TMP59]], i32 6
87; CHECK-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP30]], i64 [[TMP85]], i64 0
88; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <16 x i64> [[TMP59]], i32 7
89; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP33]], i64 [[TMP89]], i64 0
90; CHECK-NEXT:    [[TMP93:%.*]] = extractelement <16 x i64> [[TMP59]], i32 8
91; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP36]], i64 [[TMP93]], i64 0
92; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <16 x i64> [[TMP59]], i32 9
93; CHECK-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP39]], i64 [[TMP97]], i64 0
94; CHECK-NEXT:    [[TMP101:%.*]] = extractelement <16 x i64> [[TMP59]], i32 10
95; CHECK-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP42]], i64 [[TMP101]], i64 0
96; CHECK-NEXT:    [[TMP105:%.*]] = extractelement <16 x i64> [[TMP59]], i32 11
97; CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP45]], i64 [[TMP105]], i64 0
98; CHECK-NEXT:    [[TMP109:%.*]] = extractelement <16 x i64> [[TMP59]], i32 12
99; CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP48]], i64 [[TMP109]], i64 0
100; CHECK-NEXT:    [[TMP113:%.*]] = extractelement <16 x i64> [[TMP59]], i32 13
101; CHECK-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP51]], i64 [[TMP113]], i64 0
102; CHECK-NEXT:    [[TMP117:%.*]] = extractelement <16 x i64> [[TMP59]], i32 14
103; CHECK-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP54]], i64 [[TMP117]], i64 0
104; CHECK-NEXT:    [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15
105; CHECK-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP57]], i64 [[TMP121]], i64 0
106; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0
107; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
108; CHECK:         [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
109; CHECK:         [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
110entry:
111  %0 = load i32, i32* @c, align 4
112  %cmp34 = icmp sgt i32 %0, 8
113  br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup
114
115for.body.lr.ph:                                   ; preds = %entry
116  %1 = load i32, i32* @a, align 4
117  %tobool = icmp eq i32 %1, 0
118  %2 = load i64, i64* @b, align 8
119  %mul = mul i64 %2, 4063299859190
120  %tobool6 = icmp eq i64 %mul, 0
121  %3 = sext i32 %0 to i64
122  br i1 %tobool, label %for.body.us.preheader, label %for.body.preheader
123
124for.body.preheader:                               ; preds = %for.body.lr.ph
125  br label %for.body
126
127for.body.us.preheader:                            ; preds = %for.body.lr.ph
128  br label %for.body.us
129
130for.body.us:                                      ; preds = %for.body.us.preheader, %for.cond.cleanup4.us-lcssa.us.us
131  %indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.cond.cleanup4.us-lcssa.us.us ], [ 8, %for.body.us.preheader ]
132  %indvars.iv70 = phi i64 [ %indvars.iv.next71, %for.cond.cleanup4.us-lcssa.us.us ], [ 0, %for.body.us.preheader ]
133  %4 = sub nsw i64 8, %indvars.iv78
134  %add.ptr.us = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv78
135  %5 = add nsw i64 %4, %indvars.iv70
136  %arraydecay.us.us.us = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %5, i64 0
137  br i1 %tobool6, label %for.body5.us.us.us.preheader, label %for.body5.us.us48.preheader
138
139for.body5.us.us48.preheader:                      ; preds = %for.body.us
140  store i32 8, i32* %arraydecay.us.us.us, align 16
141  %indvars.iv.next66 = or i64 %indvars.iv70, 1
142  %6 = add nsw i64 %4, %indvars.iv.next66
143  %arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %6, i64 0
144  store i32 8, i32* %arraydecay.us.us55.1, align 8
145  br label %for.cond.cleanup4.us-lcssa.us.us
146
147for.body5.us.us.us.preheader:                     ; preds = %for.body.us
148  store i32 7, i32* %arraydecay.us.us.us, align 16
149  %indvars.iv.next73 = or i64 %indvars.iv70, 1
150  %7 = add nsw i64 %4, %indvars.iv.next73
151  %arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %7, i64 0
152  store i32 7, i32* %arraydecay.us.us.us.1, align 8
153  br label %for.cond.cleanup4.us-lcssa.us.us
154
155for.cond.cleanup4.us-lcssa.us.us:                 ; preds = %for.body5.us.us48.preheader, %for.body5.us.us.us.preheader
156  %indvars.iv.next79 = add nuw nsw i64 %indvars.iv78, 2
157  %cmp.us = icmp slt i64 %indvars.iv.next79, %3
158  %indvars.iv.next71 = add nuw nsw i64 %indvars.iv70, 2
159  br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit
160
161for.cond.cleanup.loopexit:                        ; preds = %for.cond.cleanup4.us-lcssa.us.us
162  br label %for.cond.cleanup
163
164for.cond.cleanup.loopexit99:                      ; preds = %for.body
165  br label %for.cond.cleanup
166
167for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit99, %for.cond.cleanup.loopexit, %entry
168  ret void
169
170for.body:                                         ; preds = %for.body.preheader, %for.body
171  %indvars.iv95 = phi i64 [ %indvars.iv.next96, %for.body ], [ 8, %for.body.preheader ]
172  %indvars.iv87 = phi i64 [ %indvars.iv.next88, %for.body ], [ 0, %for.body.preheader ]
173  %8 = sub nsw i64 8, %indvars.iv95
174  %add.ptr = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv95
175  %9 = add nsw i64 %8, %indvars.iv87
176  %arraydecay.us31 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %9, i64 0
177  store i32 8, i32* %arraydecay.us31, align 16
178  %indvars.iv.next90 = or i64 %indvars.iv87, 1
179  %10 = add nsw i64 %8, %indvars.iv.next90
180  %arraydecay.us31.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %10, i64 0
181  store i32 8, i32* %arraydecay.us31.1, align 8
182  %indvars.iv.next96 = add nuw nsw i64 %indvars.iv95, 2
183  %cmp = icmp slt i64 %indvars.iv.next96, %3
184  %indvars.iv.next88 = add nuw nsw i64 %indvars.iv87, 2
185  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99
186}
187
188attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
189