1*37e6a27dSArthur Eubanks; RUN: opt -aa-pipeline=basic-aa -passes=loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S \ 2cee313d2SEric Christopher; RUN: < %s | FileCheck %s 3cee313d2SEric Christopher 4*37e6a27dSArthur Eubanks; RUN: opt -aa-pipeline=basic-aa -passes='loop-distribute,loop(print-access-info)' -enable-loop-distribute \ 5*37e6a27dSArthur Eubanks; RUN: -verify-loop-info -verify-dom-info -disable-output < %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS 6007ffdc1SArthur Eubanks 7*37e6a27dSArthur Eubanks; RUN: opt -aa-pipeline=basic-aa -passes=loop-distribute,loop-vectorize -enable-loop-distribute -force-vector-width=4 -S \ 8cee313d2SEric Christopher; RUN: < %s | FileCheck %s --check-prefix=VECTORIZE 9cee313d2SEric Christopher 10cee313d2SEric Christopher; We should distribute this loop into a safe (2nd statement) and unsafe loop 11cee313d2SEric Christopher; (1st statement): 12cee313d2SEric Christopher; for (i = 0; i < n; i++) { 13cee313d2SEric Christopher; A[i + 1] = A[i] * B[i]; 14cee313d2SEric Christopher; ======================= 15cee313d2SEric Christopher; C[i] = D[i] * E[i]; 16cee313d2SEric Christopher; } 17cee313d2SEric Christopher 18cee313d2SEric Christophertarget datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 19cee313d2SEric Christophertarget triple = "x86_64-apple-macosx10.10.0" 20cee313d2SEric Christopher 212466ba97SMatt Arsenault; CHECK-LABEL: @f( 22cee313d2SEric Christopherdefine void @f(i32* noalias %a, 23cee313d2SEric Christopher i32* noalias %b, 24cee313d2SEric Christopher i32* noalias %c, 25cee313d2SEric Christopher i32* noalias %d, 26cee313d2SEric Christopher i32* noalias %e) { 27cee313d2SEric Christopherentry: 28cee313d2SEric Christopher br label %for.body 29cee313d2SEric Christopher 30cee313d2SEric Christopher; Verify the two distributed loops. 31cee313d2SEric Christopher 32cee313d2SEric Christopher; CHECK: entry.split.ldist1: 33cee313d2SEric Christopher; CHECK: br label %for.body.ldist1 34cee313d2SEric Christopher; CHECK: for.body.ldist1: 35cee313d2SEric Christopher; CHECK: %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1 36cee313d2SEric Christopher; CHECK: br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1 37cee313d2SEric Christopher 38cee313d2SEric Christopher; CHECK: entry.split: 39cee313d2SEric Christopher; CHECK: br label %for.body 40cee313d2SEric Christopher; CHECK: for.body: 41cee313d2SEric Christopher; CHECK: %mulC = mul i32 %loadD, %loadE 42cee313d2SEric Christopher; CHECK: for.end: 43cee313d2SEric Christopher 44cee313d2SEric Christopher 45cee313d2SEric Christopher; ANALYSIS: for.body.ldist1: 46cee313d2SEric Christopher; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop 47*37e6a27dSArthur Eubanks; ANALYSIS: for.body: 48*37e6a27dSArthur Eubanks; ANALYSIS-NEXT: Memory dependences are safe{{$}} 49cee313d2SEric Christopher 50cee313d2SEric Christopher 51cee313d2SEric Christopher; VECTORIZE: mul <4 x i32> 52cee313d2SEric Christopher 53cee313d2SEric Christopherfor.body: ; preds = %for.body, %entry 54cee313d2SEric Christopher %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 55cee313d2SEric Christopher 56cee313d2SEric Christopher %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind 57cee313d2SEric Christopher %loadA = load i32, i32* %arrayidxA, align 4 58cee313d2SEric Christopher 59cee313d2SEric Christopher %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind 60cee313d2SEric Christopher %loadB = load i32, i32* %arrayidxB, align 4 61cee313d2SEric Christopher 62cee313d2SEric Christopher %mulA = mul i32 %loadB, %loadA 63cee313d2SEric Christopher 64cee313d2SEric Christopher %add = add nuw nsw i64 %ind, 1 65cee313d2SEric Christopher %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add 66cee313d2SEric Christopher store i32 %mulA, i32* %arrayidxA_plus_4, align 4 67cee313d2SEric Christopher 68cee313d2SEric Christopher %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind 69cee313d2SEric Christopher %loadD = load i32, i32* %arrayidxD, align 4 70cee313d2SEric Christopher 71cee313d2SEric Christopher %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind 72cee313d2SEric Christopher %loadE = load i32, i32* %arrayidxE, align 4 73cee313d2SEric Christopher 74cee313d2SEric Christopher %mulC = mul i32 %loadD, %loadE 75cee313d2SEric Christopher 76cee313d2SEric Christopher %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind 77cee313d2SEric Christopher store i32 %mulC, i32* %arrayidxC, align 4 78cee313d2SEric Christopher 79cee313d2SEric Christopher %exitcond = icmp eq i64 %add, 20 80cee313d2SEric Christopher br i1 %exitcond, label %for.end, label %for.body 81cee313d2SEric Christopher 82cee313d2SEric Christopherfor.end: ; preds = %for.body 83cee313d2SEric Christopher ret void 84cee313d2SEric Christopher} 852466ba97SMatt Arsenault 862466ba97SMatt Arsenaultdeclare i32 @llvm.convergent(i32) #0 872466ba97SMatt Arsenault 882466ba97SMatt Arsenault; It is OK to distribute with a convergent operation, since in each 892466ba97SMatt Arsenault; new loop the convergent operation has the ssame control dependency. 902466ba97SMatt Arsenault; CHECK-LABEL: @f_with_convergent( 912466ba97SMatt Arsenaultdefine void @f_with_convergent(i32* noalias %a, 922466ba97SMatt Arsenault i32* noalias %b, 932466ba97SMatt Arsenault i32* noalias %c, 942466ba97SMatt Arsenault i32* noalias %d, 952466ba97SMatt Arsenault i32* noalias %e) { 962466ba97SMatt Arsenaultentry: 972466ba97SMatt Arsenault br label %for.body 982466ba97SMatt Arsenault 992466ba97SMatt Arsenault; Verify the two distributed loops. 1002466ba97SMatt Arsenault 1012466ba97SMatt Arsenault; CHECK: entry.split.ldist1: 1022466ba97SMatt Arsenault; CHECK: br label %for.body.ldist1 1032466ba97SMatt Arsenault; CHECK: for.body.ldist1: 1042466ba97SMatt Arsenault; CHECK: %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1 1052466ba97SMatt Arsenault; CHECK: br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1 1062466ba97SMatt Arsenault 1072466ba97SMatt Arsenault; CHECK: entry.split: 1082466ba97SMatt Arsenault; CHECK: br label %for.body 1092466ba97SMatt Arsenault; CHECK: for.body: 1102466ba97SMatt Arsenault; CHECK: %convergentD = call i32 @llvm.convergent(i32 %loadD) 1112466ba97SMatt Arsenault; CHECK: %mulC = mul i32 %convergentD, %loadE 1122466ba97SMatt Arsenault; CHECK: for.end: 1132466ba97SMatt Arsenault 1142466ba97SMatt Arsenault 115*37e6a27dSArthur Eubanks; ANALYSIS: for.body.ldist1: 116*37e6a27dSArthur Eubanks; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop 1172466ba97SMatt Arsenault; ANALYSIS: for.body: 1182466ba97SMatt Arsenault; ANALYSIS-NEXT: Has convergent operation in loop 1192466ba97SMatt Arsenault; ANALYSIS-NEXT: Report: cannot add control dependency to convergent operation 1202466ba97SMatt Arsenault 1212466ba97SMatt Arsenault; convergent instruction happens to block vectorization 1222466ba97SMatt Arsenault; VECTORIZE: call i32 @llvm.convergent 1232466ba97SMatt Arsenault; VECTORIZE: mul i32 1242466ba97SMatt Arsenault 1252466ba97SMatt Arsenaultfor.body: ; preds = %for.body, %entry 1262466ba97SMatt Arsenault %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 1272466ba97SMatt Arsenault 1282466ba97SMatt Arsenault %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind 1292466ba97SMatt Arsenault %loadA = load i32, i32* %arrayidxA, align 4 1302466ba97SMatt Arsenault 1312466ba97SMatt Arsenault %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind 1322466ba97SMatt Arsenault %loadB = load i32, i32* %arrayidxB, align 4 1332466ba97SMatt Arsenault 1342466ba97SMatt Arsenault %mulA = mul i32 %loadB, %loadA 1352466ba97SMatt Arsenault 1362466ba97SMatt Arsenault %add = add nuw nsw i64 %ind, 1 1372466ba97SMatt Arsenault %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add 1382466ba97SMatt Arsenault store i32 %mulA, i32* %arrayidxA_plus_4, align 4 1392466ba97SMatt Arsenault 1402466ba97SMatt Arsenault %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind 1412466ba97SMatt Arsenault %loadD = load i32, i32* %arrayidxD, align 4 1422466ba97SMatt Arsenault 1432466ba97SMatt Arsenault %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind 1442466ba97SMatt Arsenault %loadE = load i32, i32* %arrayidxE, align 4 1452466ba97SMatt Arsenault 1462466ba97SMatt Arsenault %convergentD = call i32 @llvm.convergent(i32 %loadD) 1472466ba97SMatt Arsenault %mulC = mul i32 %convergentD, %loadE 1482466ba97SMatt Arsenault 1492466ba97SMatt Arsenault %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind 1502466ba97SMatt Arsenault store i32 %mulC, i32* %arrayidxC, align 4 1512466ba97SMatt Arsenault 1522466ba97SMatt Arsenault %exitcond = icmp eq i64 %add, 20 1532466ba97SMatt Arsenault br i1 %exitcond, label %for.end, label %for.body 1542466ba97SMatt Arsenault 1552466ba97SMatt Arsenaultfor.end: ; preds = %for.body 1562466ba97SMatt Arsenault ret void 1572466ba97SMatt Arsenault} 1582466ba97SMatt Arsenault 1592466ba97SMatt Arsenaultattributes #0 = { nounwind readnone convergent } 160