1; RUN: opt < %s -loop-vectorize -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE 2; RUN: opt < %s -loop-vectorize -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5 6target triple = "x86_64-unknown-linux" 7 8; First loop produced diagnostic pass remark. 9;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 2) 10; Second loop produces diagnostic analysis remark. 11;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations 12 13; First loop produced diagnostic pass remark. 14;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 2) 15; Second loop produces diagnostic pass remark. 16;OVERRIDE: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations 17 18; We are vectorizing with 6 runtime checks. 19;CHECK-LABEL: func1x6( 20;CHECK: <4 x i32> 21;CHECK: ret 22;OVERRIDE-LABEL: func1x6( 23;OVERRIDE: <4 x i32> 24;OVERRIDE: ret 25define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { 26entry: 27 br label %for.body 28 29for.body: ; preds = %for.body, %entry 30 %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 31 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.016 32 %0 = load i32, i32* %arrayidx, align 4 33 %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.016 34 %1 = load i32, i32* %arrayidx1, align 4 35 %add = add nsw i32 %1, %0 36 %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.016 37 %2 = load i32, i32* %arrayidx2, align 4 38 %add3 = add nsw i32 %add, %2 39 %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.016 40 %3 = load i32, i32* %arrayidx4, align 4 41 %add5 = add nsw i32 %add3, %3 42 %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.016 43 %4 = load i32, i32* %arrayidx6, align 4 44 %add7 = add nsw i32 %add5, %4 45 %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.016 46 store i32 %add7, i32* %arrayidx8, align 4 47 %inc = add i64 %i.016, 1 48 %exitcond = icmp eq i64 %inc, 256 49 br i1 %exitcond, label %for.end, label %for.body 50 51for.end: ; preds = %for.body 52 ret i32 undef 53} 54 55; We are not vectorizing with 12 runtime checks. 56;CHECK-LABEL: func2x6( 57;CHECK-NOT: <4 x i32> 58;CHECK: ret 59; We vectorize with 12 checks if a vectorization hint is provided. 60;OVERRIDE-LABEL: func2x6( 61;OVERRIDE-NOT: <4 x i32> 62;OVERRIDE: ret 63define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { 64entry: 65 br label %for.body 66 67for.body: ; preds = %for.body, %entry 68 %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 69 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.037 70 %0 = load i32, i32* %arrayidx, align 4 71 %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.037 72 %1 = load i32, i32* %arrayidx1, align 4 73 %add = add nsw i32 %1, %0 74 %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.037 75 %2 = load i32, i32* %arrayidx2, align 4 76 %add3 = add nsw i32 %add, %2 77 %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.037 78 %3 = load i32, i32* %arrayidx4, align 4 79 %add5 = add nsw i32 %add3, %3 80 %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.037 81 %4 = load i32, i32* %arrayidx6, align 4 82 %add7 = add nsw i32 %add5, %4 83 %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.037 84 store i32 %add7, i32* %arrayidx8, align 4 85 %5 = load i32, i32* %arrayidx, align 4 86 %6 = load i32, i32* %arrayidx1, align 4 87 %add11 = add nsw i32 %6, %5 88 %7 = load i32, i32* %arrayidx2, align 4 89 %add13 = add nsw i32 %add11, %7 90 %8 = load i32, i32* %arrayidx4, align 4 91 %add15 = add nsw i32 %add13, %8 92 %9 = load i32, i32* %arrayidx6, align 4 93 %add17 = add nsw i32 %add15, %9 94 %arrayidx18 = getelementptr inbounds i32, i32* %out2, i64 %i.037 95 store i32 %add17, i32* %arrayidx18, align 4 96 %inc = add i64 %i.037, 1 97 %exitcond = icmp eq i64 %inc, 256 98 br i1 %exitcond, label %for.end, label %for.body 99 100for.end: ; preds = %for.body 101 ret i32 undef 102} 103 104