1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize -dce \ 3; RUN: -instcombine -force-vector-width=2 < %s | FileCheck %s 4; 5; Test that loop vectorizer does not generate vector addresses that must then 6; always be extracted. 7 8; Check that the addresses for a scalarized memory access is not extracted 9; from a vector register. 10define i32 @foo(i32* nocapture %A) { 11; CHECK-LABEL: @foo( 12; CHECK-NEXT: entry: 13; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 16; CHECK: vector.body: 17; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 18; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 2 19; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[INDEX]], 2 20; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP1]], 4 21; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 22; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] 23; CHECK-NEXT: store i32 4, i32* [[TMP3]], align 4 24; CHECK-NEXT: store i32 4, i32* [[TMP4]], align 4 25; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 26; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 27; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 28; CHECK: middle.block: 29; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 30; CHECK: scalar.ph: 31; CHECK-NEXT: br label [[FOR_BODY:%.*]] 32; CHECK: for.body: 33; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 34; CHECK: for.end: 35; CHECK-NEXT: ret i32 poison 36; 37 38entry: 39 br label %for.body 40 41for.body: 42 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 43 %0 = shl nsw i64 %indvars.iv, 2 44 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0 45 store i32 4, i32* %arrayidx, align 4 46 %indvars.iv.next = add i64 %indvars.iv, 1 47 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 48 %exitcond = icmp eq i32 %lftr.wideiv, 10000 49 br i1 %exitcond, label %for.end, label %for.body 50 51for.end: 52 ret i32 poison 53} 54 55 56; Check that a load of address is scalarized. 57define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) { 58; CHECK-LABEL: @foo1( 59; CHECK-NEXT: entry: 60; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 61; CHECK: vector.ph: 62; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 63; CHECK: vector.body: 64; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 65; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 66; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[PTRPTR:%.*]], i64 [[INDEX]] 67; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32*, i32** [[PTRPTR]], i64 [[TMP0]] 68; CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP1]], align 8 69; CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP2]], align 8 70; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4 71; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 72; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0 73; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1 74; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 75; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* 76; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP10]], align 4 77; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 78; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 79; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 80; CHECK: middle.block: 81; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 82; CHECK: scalar.ph: 83; CHECK-NEXT: br label [[FOR_BODY:%.*]] 84; CHECK: for.body: 85; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 86; CHECK: for.end: 87; CHECK-NEXT: ret i32 poison 88; 89 90entry: 91 br label %for.body 92 93for.body: 94 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 95 %ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv 96 %el = load i32*, i32** %ptr 97 %v = load i32, i32* %el 98 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 99 store i32 %v, i32* %arrayidx, align 4 100 %indvars.iv.next = add i64 %indvars.iv, 1 101 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 102 %exitcond = icmp eq i32 %lftr.wideiv, 10000 103 br i1 %exitcond, label %for.end, label %for.body 104 105for.end: 106 ret i32 poison 107} 108