1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -S | FileCheck %s 3target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-apple-macosx10.11.0" 5 6; This test checks vector GEP before scatter. 7; The code bellow crashed due to destroyed SSA while incorrect vectorization of 8; the GEP. 9 10@d = global [10 x [10 x i32]] zeroinitializer, align 16 11@c = external global i32, align 4 12@a = external global i32, align 4 13@b = external global i64, align 8 14 15; Function Attrs: norecurse nounwind ssp uwtable 16define void @_Z3fn1v() #0 { 17; CHECK-LABEL: @_Z3fn1v( 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ] 20; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ 21; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ 22; CHECK-NEXT: [[SHL:%.*]] = shl i64 %index, 1 23; CHECK-NEXT: %offset.idx = add i64 [[SHL]], 8 24; CHECK-NEXT: [[IND00:%.*]] = add i64 %offset.idx, 0 25; CHECK-NEXT: [[IND02:%.*]] = add i64 %offset.idx, 2 26; CHECK-NEXT: [[IND04:%.*]] = add i64 %offset.idx, 4 27; CHECK-NEXT: [[IND06:%.*]] = add i64 %offset.idx, 6 28; CHECK-NEXT: [[IND08:%.*]] = add i64 %offset.idx, 8 29; CHECK-NEXT: [[IND10:%.*]] = add i64 %offset.idx, 10 30; CHECK-NEXT: [[IND12:%.*]] = add i64 %offset.idx, 12 31; CHECK-NEXT: [[IND14:%.*]] = add i64 %offset.idx, 14 32; CHECK-NEXT: [[IND16:%.*]] = add i64 %offset.idx, 16 33; CHECK-NEXT: [[IND18:%.*]] = add i64 %offset.idx, 18 34; CHECK-NEXT: [[IND20:%.*]] = add i64 %offset.idx, 20 35; CHECK-NEXT: [[IND22:%.*]] = add i64 %offset.idx, 22 36; CHECK-NEXT: [[IND24:%.*]] = add i64 %offset.idx, 24 37; CHECK-NEXT: [[IND26:%.*]] = add i64 %offset.idx, 26 38; CHECK-NEXT: [[IND28:%.*]] = add i64 %offset.idx, 28 39; CHECK-NEXT: [[IND30:%.*]] = add i64 %offset.idx, 30 40; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]] 41; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND00]] 42; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND02]] 43; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND04]] 44; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND06]] 45; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND08]] 46; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND10]] 47; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND12]] 48; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND14]] 49; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND16]] 50; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND18]] 51; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND20]] 52; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND22]] 53; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND24]] 54; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND26]] 55; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND28]] 56; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[IND30]] 57; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0 58; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1 59; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2 60; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3 61; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4 62; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5 63; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6 64; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7 65; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8 66; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9 67; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10 68; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11 69; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12 70; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13 71; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14 72; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15 73; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] 74; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0 75; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP12]], i64 [[TMP61]], i64 0 76; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i64> [[TMP59]], i32 1 77; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP15]], i64 [[TMP65]], i64 0 78; CHECK-NEXT: [[TMP69:%.*]] = extractelement <16 x i64> [[TMP59]], i32 2 79; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP18]], i64 [[TMP69]], i64 0 80; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i64> [[TMP59]], i32 3 81; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP21]], i64 [[TMP73]], i64 0 82; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i64> [[TMP59]], i32 4 83; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP24]], i64 [[TMP77]], i64 0 84; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i64> [[TMP59]], i32 5 85; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP27]], i64 [[TMP81]], i64 0 86; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i64> [[TMP59]], i32 6 87; CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP30]], i64 [[TMP85]], i64 0 88; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i64> [[TMP59]], i32 7 89; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP33]], i64 [[TMP89]], i64 0 90; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i64> [[TMP59]], i32 8 91; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP36]], i64 [[TMP93]], i64 0 92; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i64> [[TMP59]], i32 9 93; CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP39]], i64 [[TMP97]], i64 0 94; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i64> [[TMP59]], i32 10 95; CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP42]], i64 [[TMP101]], i64 0 96; CHECK-NEXT: [[TMP105:%.*]] = extractelement <16 x i64> [[TMP59]], i32 11 97; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP45]], i64 [[TMP105]], i64 0 98; CHECK-NEXT: [[TMP109:%.*]] = extractelement <16 x i64> [[TMP59]], i32 12 99; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP48]], i64 [[TMP109]], i64 0 100; CHECK-NEXT: [[TMP113:%.*]] = extractelement <16 x i64> [[TMP59]], i32 13 101; CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP51]], i64 [[TMP113]], i64 0 102; CHECK-NEXT: [[TMP117:%.*]] = extractelement <16 x i64> [[TMP59]], i32 14 103; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP54]], i64 [[TMP117]], i64 0 104; CHECK-NEXT: [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15 105; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP57]], i64 [[TMP121]], i64 0 106; CHECK-NEXT: [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0 107; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 108; CHECK: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> 109; CHECK: [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> 110entry: 111 %0 = load i32, i32* @c, align 4 112 %cmp34 = icmp sgt i32 %0, 8 113 br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup 114 115for.body.lr.ph: ; preds = %entry 116 %1 = load i32, i32* @a, align 4 117 %tobool = icmp eq i32 %1, 0 118 %2 = load i64, i64* @b, align 8 119 %mul = mul i64 %2, 4063299859190 120 %tobool6 = icmp eq i64 %mul, 0 121 %3 = sext i32 %0 to i64 122 br i1 %tobool, label %for.body.us.preheader, label %for.body.preheader 123 124for.body.preheader: ; preds = %for.body.lr.ph 125 br label %for.body 126 127for.body.us.preheader: ; preds = %for.body.lr.ph 128 br label %for.body.us 129 130for.body.us: ; preds = %for.body.us.preheader, %for.cond.cleanup4.us-lcssa.us.us 131 %indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.cond.cleanup4.us-lcssa.us.us ], [ 8, %for.body.us.preheader ] 132 %indvars.iv70 = phi i64 [ %indvars.iv.next71, %for.cond.cleanup4.us-lcssa.us.us ], [ 0, %for.body.us.preheader ] 133 %4 = sub nsw i64 8, %indvars.iv78 134 %add.ptr.us = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv78 135 %5 = add nsw i64 %4, %indvars.iv70 136 %arraydecay.us.us.us = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %5, i64 0 137 br i1 %tobool6, label %for.body5.us.us.us.preheader, label %for.body5.us.us48.preheader 138 139for.body5.us.us48.preheader: ; preds = %for.body.us 140 store i32 8, i32* %arraydecay.us.us.us, align 16 141 %indvars.iv.next66 = or i64 %indvars.iv70, 1 142 %6 = add nsw i64 %4, %indvars.iv.next66 143 %arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %6, i64 0 144 store i32 8, i32* %arraydecay.us.us55.1, align 8 145 br label %for.cond.cleanup4.us-lcssa.us.us 146 147for.body5.us.us.us.preheader: ; preds = %for.body.us 148 store i32 7, i32* %arraydecay.us.us.us, align 16 149 %indvars.iv.next73 = or i64 %indvars.iv70, 1 150 %7 = add nsw i64 %4, %indvars.iv.next73 151 %arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %7, i64 0 152 store i32 7, i32* %arraydecay.us.us.us.1, align 8 153 br label %for.cond.cleanup4.us-lcssa.us.us 154 155for.cond.cleanup4.us-lcssa.us.us: ; preds = %for.body5.us.us48.preheader, %for.body5.us.us.us.preheader 156 %indvars.iv.next79 = add nuw nsw i64 %indvars.iv78, 2 157 %cmp.us = icmp slt i64 %indvars.iv.next79, %3 158 %indvars.iv.next71 = add nuw nsw i64 %indvars.iv70, 2 159 br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit 160 161for.cond.cleanup.loopexit: ; preds = %for.cond.cleanup4.us-lcssa.us.us 162 br label %for.cond.cleanup 163 164for.cond.cleanup.loopexit99: ; preds = %for.body 165 br label %for.cond.cleanup 166 167for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit99, %for.cond.cleanup.loopexit, %entry 168 ret void 169 170for.body: ; preds = %for.body.preheader, %for.body 171 %indvars.iv95 = phi i64 [ %indvars.iv.next96, %for.body ], [ 8, %for.body.preheader ] 172 %indvars.iv87 = phi i64 [ %indvars.iv.next88, %for.body ], [ 0, %for.body.preheader ] 173 %8 = sub nsw i64 8, %indvars.iv95 174 %add.ptr = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv95 175 %9 = add nsw i64 %8, %indvars.iv87 176 %arraydecay.us31 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %9, i64 0 177 store i32 8, i32* %arraydecay.us31, align 16 178 %indvars.iv.next90 = or i64 %indvars.iv87, 1 179 %10 = add nsw i64 %8, %indvars.iv.next90 180 %arraydecay.us31.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %10, i64 0 181 store i32 8, i32* %arraydecay.us31.1, align 8 182 %indvars.iv.next96 = add nuw nsw i64 %indvars.iv95, 2 183 %cmp = icmp slt i64 %indvars.iv.next96, %3 184 %indvars.iv.next88 = add nuw nsw i64 %indvars.iv87, 2 185 br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99 186} 187 188attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } 189