1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; REQUIRES: asserts 3; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine -enable-new-pm=0 2>&1 | FileCheck %s 4; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-vectorize,instcombine -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s 5 6target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 7 8; 9define void @vector_gep(i32** %a, i32 *%b, i64 %n) { 10; CHECK-LABEL: @vector_gep( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 13; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 14; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 15; CHECK: vector.ph: 16; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 21; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <2 x i64> [[VEC_IND]] 22; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]] 23; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>* 24; CHECK-NEXT: store <2 x i32*> [[TMP0]], <2 x i32*>* [[TMP2]], align 8 25; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 26; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 27; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_BODY:%.*]] 35; CHECK: for.body: 36; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 37; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] 38; CHECK-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]] 39; CHECK-NEXT: store i32* [[VAR0]], i32** [[VAR1]], align 8 40; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 41; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 42; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] 43; CHECK: for.end: 44; CHECK-NEXT: ret void 45; 46entry: 47 br label %for.body 48 49for.body: 50 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 51 %var0 = getelementptr inbounds i32, i32* %b, i64 %i 52 %var1 = getelementptr inbounds i32*, i32** %a, i64 %i 53 store i32* %var0, i32** %var1, align 8 54 %i.next = add nuw nsw i64 %i, 1 55 %cond = icmp slt i64 %i.next, %n 56 br i1 %cond, label %for.body, label %for.end 57 58for.end: 59 ret void 60} 61 62; 63define void @scalar_store(i32** %a, i32 *%b, i64 %n) { 64; CHECK-LABEL: @scalar_store( 65; CHECK-NEXT: entry: 66; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) 67; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 68; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 69; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 70; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 71; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 72; CHECK: vector.ph: 73; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 74; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 75; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 76; CHECK: vector.body: 77; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 78; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 79; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2 80; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[OFFSET_IDX]] 81; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP3]] 82; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[OFFSET_IDX]] 83; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[TMP3]] 84; CHECK-NEXT: store i32* [[TMP4]], i32** [[TMP6]], align 8 85; CHECK-NEXT: store i32* [[TMP5]], i32** [[TMP7]], align 8 86; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 87; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 88; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 89; CHECK: middle.block: 90; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 91; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 92; CHECK: scalar.ph: 93; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 94; CHECK-NEXT: br label [[FOR_BODY:%.*]] 95; CHECK: for.body: 96; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 97; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] 98; CHECK-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]] 99; CHECK-NEXT: store i32* [[VAR0]], i32** [[VAR1]], align 8 100; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 101; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 102; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]] 103; CHECK: for.end: 104; CHECK-NEXT: ret void 105; 106entry: 107 br label %for.body 108 109for.body: 110 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 111 %var0 = getelementptr inbounds i32, i32* %b, i64 %i 112 %var1 = getelementptr inbounds i32*, i32** %a, i64 %i 113 store i32* %var0, i32** %var1, align 8 114 %i.next = add nuw nsw i64 %i, 2 115 %cond = icmp slt i64 %i.next, %n 116 br i1 %cond, label %for.body, label %for.end 117 118for.end: 119 ret void 120} 121 122; 123define void @expansion(i32** %a, i64 *%b, i64 %n) { 124; CHECK-LABEL: @expansion( 125; CHECK-NEXT: entry: 126; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) 127; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 128; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 129; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 130; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 131; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 132; CHECK: vector.ph: 133; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 134; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 135; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 136; CHECK: vector.body: 137; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 138; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 139; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2 140; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[OFFSET_IDX]] 141; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP3]] 142; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[OFFSET_IDX]] 143; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[TMP3]] 144; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32** [[TMP6]] to i64** 145; CHECK-NEXT: store i64* [[TMP4]], i64** [[TMP8]], align 8 146; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32** [[TMP7]] to i64** 147; CHECK-NEXT: store i64* [[TMP5]], i64** [[TMP9]], align 8 148; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 149; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 150; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 151; CHECK: middle.block: 152; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 153; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 154; CHECK: scalar.ph: 155; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 156; CHECK-NEXT: br label [[FOR_BODY:%.*]] 157; CHECK: for.body: 158; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 159; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I]] 160; CHECK-NEXT: [[VAR3:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]] 161; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32** [[VAR3]] to i64** 162; CHECK-NEXT: store i64* [[VAR0]], i64** [[TMP11]], align 8 163; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 164; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 165; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]] 166; CHECK: for.end: 167; CHECK-NEXT: ret void 168; 169entry: 170 br label %for.body 171 172for.body: 173 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 174 %var0 = getelementptr inbounds i64, i64* %b, i64 %i 175 %var1 = bitcast i64* %var0 to i32* 176 %var2 = getelementptr inbounds i32*, i32** %a, i64 0 177 %var3 = getelementptr inbounds i32*, i32** %var2, i64 %i 178 store i32* %var1, i32** %var3, align 8 179 %i.next = add nuw nsw i64 %i, 2 180 %cond = icmp slt i64 %i.next, %n 181 br i1 %cond, label %for.body, label %for.end 182 183for.end: 184 ret void 185} 186 187; 188define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) { 189; CHECK-LABEL: @no_gep_or_bitcast( 190; CHECK-NEXT: entry: 191; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 192; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 193; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 194; CHECK: vector.ph: 195; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 196; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 197; CHECK: vector.body: 198; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 199; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]] 200; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** [[TMP0]] to <2 x i32*>* 201; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP1]], align 8 202; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i64 0 203; CHECK-NEXT: store i32 0, i32* [[TMP2]], align 8 204; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i64 1 205; CHECK-NEXT: store i32 0, i32* [[TMP3]], align 8 206; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 207; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 208; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 209; CHECK: middle.block: 210; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 211; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 212; CHECK: scalar.ph: 213; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 214; CHECK-NEXT: br label [[FOR_BODY:%.*]] 215; CHECK: for.body: 216; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 217; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]] 218; CHECK-NEXT: [[VAR1:%.*]] = load i32*, i32** [[VAR0]], align 8 219; CHECK-NEXT: store i32 0, i32* [[VAR1]], align 8 220; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 221; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 222; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] 223; CHECK: for.end: 224; CHECK-NEXT: ret void 225; 226entry: 227 br label %for.body 228 229for.body: 230 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 231 %var0 = getelementptr inbounds i32*, i32** %a, i64 %i 232 %var1 = load i32*, i32** %var0, align 8 233 store i32 0, i32* %var1, align 8 234 %i.next = add nuw nsw i64 %i, 1 235 %cond = icmp slt i64 %i.next, %n 236 br i1 %cond, label %for.body, label %for.end 237 238for.end: 239 ret void 240} 241