1cee313d2SEric Christopher; Test optimization remarks generated by the LoopInterchange pass. 2cee313d2SEric Christopher; 3*b941857bSCongzhe Cao; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \ 4cee313d2SEric Christopher; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \ 5cee313d2SEric Christopher; RUN: -pass-remarks='loop-interchange' -S 6cee313d2SEric Christopher; RUN: cat %t | FileCheck %s 7cee313d2SEric Christopher 8*b941857bSCongzhe Cao; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \ 91b811ff8SBardia Mahjour; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \ 101b811ff8SBardia Mahjour; RUN: -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks 111b811ff8SBardia Mahjour; RUN: cat %t | FileCheck --check-prefix=DELIN %s 121b811ff8SBardia Mahjour 13cee313d2SEric Christopher@A = common global [100 x [100 x i32]] zeroinitializer 14cee313d2SEric Christopher@B = common global [100 x [100 x i32]] zeroinitializer 15cee313d2SEric Christopher@C = common global [100 x i32] zeroinitializer 16cee313d2SEric Christopher 17cee313d2SEric Christopher;;---------------------------------------Test case 01--------------------------------- 18cee313d2SEric Christopher;; Loops interchange is not profitable. 19cee313d2SEric Christopher;; for(int i=1;i<N;i++) 20cee313d2SEric Christopher;; for(int j=1;j<N;j++) 21cee313d2SEric Christopher;; A[i-1][j-1] = A[i - 1][j-1] + B[i][j]; 22cee313d2SEric Christopher 23cee313d2SEric Christopherdefine void @test01(i32 %N){ 24cee313d2SEric Christopherentry: 25cee313d2SEric Christopher %cmp31 = icmp sgt i32 %N, 1 26cee313d2SEric Christopher br i1 %cmp31, label %for.cond1.preheader.lr.ph, label %for.end19 27cee313d2SEric Christopher 28cee313d2SEric Christopherfor.cond1.preheader.lr.ph: 29cee313d2SEric Christopher %0 = add i32 %N, -1 30cee313d2SEric Christopher br label %for.body3.lr.ph 31cee313d2SEric Christopher 32cee313d2SEric Christopherfor.body3.lr.ph: 33cee313d2SEric Christopher %indvars.iv34 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc17 ] 34cee313d2SEric Christopher %1 = add nsw i64 %indvars.iv34, -1 35cee313d2SEric Christopher br label %for.body3 36cee313d2SEric Christopher 37cee313d2SEric Christopherfor.body3: 38cee313d2SEric Christopher %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] 39cee313d2SEric Christopher %2 = add nsw i64 %indvars.iv, -1 40cee313d2SEric Christopher %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %2 41cee313d2SEric Christopher %3 = load i32, i32* %arrayidx6 42cee313d2SEric Christopher %arrayidx10 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv34, i64 %indvars.iv 43cee313d2SEric Christopher %4 = load i32, i32* %arrayidx10 44cee313d2SEric Christopher %add = add nsw i32 %4, %3 45cee313d2SEric Christopher store i32 %add, i32* %arrayidx6 46cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 47cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv to i32 48cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %0 49cee313d2SEric Christopher br i1 %exitcond, label %for.inc17, label %for.body3 50cee313d2SEric Christopher 51cee313d2SEric Christopherfor.inc17: 52cee313d2SEric Christopher %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1 53cee313d2SEric Christopher %lftr.wideiv37 = trunc i64 %indvars.iv34 to i32 54cee313d2SEric Christopher %exitcond38 = icmp eq i32 %lftr.wideiv37, %0 55cee313d2SEric Christopher br i1 %exitcond38, label %for.end19, label %for.body3.lr.ph 56cee313d2SEric Christopher 57cee313d2SEric Christopherfor.end19: 58cee313d2SEric Christopher ret void 59cee313d2SEric Christopher} 60cee313d2SEric Christopher 61cee313d2SEric Christopher; CHECK: --- !Missed 62cee313d2SEric Christopher; CHECK-NEXT: Pass: loop-interchange 63cee313d2SEric Christopher; CHECK-NEXT: Name: Dependence 64cee313d2SEric Christopher; CHECK-NEXT: Function: test01 65cee313d2SEric Christopher; CHECK-NEXT: Args: 66cee313d2SEric Christopher; CHECK-NEXT: - String: Cannot interchange loops due to dependences. 67cee313d2SEric Christopher; CHECK-NEXT: ... 68cee313d2SEric Christopher 691b811ff8SBardia Mahjour; DELIN: --- !Missed 701b811ff8SBardia Mahjour; DELIN-NEXT: Pass: loop-interchange 711b811ff8SBardia Mahjour; DELIN-NEXT: Name: InterchangeNotProfitable 721b811ff8SBardia Mahjour; DELIN-NEXT: Function: test01 731b811ff8SBardia Mahjour; DELIN-NEXT: Args: 74*b941857bSCongzhe Cao; DELIN-NEXT: - String: Interchanging loops is too costly and it does not improve parallelism. 751b811ff8SBardia Mahjour; DELIN-NEXT: ... 761b811ff8SBardia Mahjour 77cee313d2SEric Christopher;;--------------------------------------Test case 02------------------------------------ 78cee313d2SEric Christopher;; [FIXME] This loop though valid is currently not interchanged due to the 79cee313d2SEric Christopher;; limitation that we cannot split the inner loop latch due to multiple use of inner induction 80cee313d2SEric Christopher;; variable.(used to increment the loop counter and to access A[j+1][i+1] 81cee313d2SEric Christopher;; for(int i=0;i<N-1;i++) 82cee313d2SEric Christopher;; for(int j=1;j<N-1;j++) 83cee313d2SEric Christopher;; A[j+1][i+1] = A[j+1][i+1] + k; 84cee313d2SEric Christopher 85cee313d2SEric Christopherdefine void @test02(i32 %k, i32 %N) { 86cee313d2SEric Christopher entry: 87cee313d2SEric Christopher %sub = add nsw i32 %N, -1 88cee313d2SEric Christopher %cmp26 = icmp sgt i32 %N, 1 89cee313d2SEric Christopher br i1 %cmp26, label %for.cond1.preheader.lr.ph, label %for.end17 90cee313d2SEric Christopher 91cee313d2SEric Christopher for.cond1.preheader.lr.ph: 92cee313d2SEric Christopher %cmp324 = icmp sgt i32 %sub, 1 93cee313d2SEric Christopher %0 = add i32 %N, -2 94cee313d2SEric Christopher %1 = sext i32 %sub to i64 95cee313d2SEric Christopher br label %for.cond1.preheader 96cee313d2SEric Christopher 97cee313d2SEric Christopher for.cond.loopexit: 98cee313d2SEric Christopher %cmp = icmp slt i64 %indvars.iv.next29, %1 99cee313d2SEric Christopher br i1 %cmp, label %for.cond1.preheader, label %for.end17 100cee313d2SEric Christopher 101cee313d2SEric Christopher for.cond1.preheader: 102cee313d2SEric Christopher %indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.cond.loopexit ] 103cee313d2SEric Christopher %indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1 104cee313d2SEric Christopher br i1 %cmp324, label %for.body4, label %for.cond.loopexit 105cee313d2SEric Christopher 106cee313d2SEric Christopher for.body4: 107cee313d2SEric Christopher %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 1, %for.cond1.preheader ] 108cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 109cee313d2SEric Christopher %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next, i64 %indvars.iv.next29 110cee313d2SEric Christopher %2 = load i32, i32* %arrayidx7 111cee313d2SEric Christopher %add8 = add nsw i32 %2, %k 112cee313d2SEric Christopher store i32 %add8, i32* %arrayidx7 113cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv to i32 114cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %0 115cee313d2SEric Christopher br i1 %exitcond, label %for.cond.loopexit, label %for.body4 116cee313d2SEric Christopher 117cee313d2SEric Christopher for.end17: 118cee313d2SEric Christopher ret void 119cee313d2SEric Christopher} 120cee313d2SEric Christopher 121cee313d2SEric Christopher; CHECK: --- !Missed 122cee313d2SEric Christopher; CHECK-NEXT: Pass: loop-interchange 123cee313d2SEric Christopher; CHECK-NEXT: Name: Dependence 124cee313d2SEric Christopher; CHECK-NEXT: Function: test02 125cee313d2SEric Christopher; CHECK-NEXT: Args: 126cee313d2SEric Christopher; CHECK-NEXT: - String: Cannot interchange loops due to dependences. 127cee313d2SEric Christopher; CHECK-NEXT: ... 128cee313d2SEric Christopher 129c251bfc3SCongzhe Cao; DELIN: --- !Passed 1301b811ff8SBardia Mahjour; DELIN-NEXT: Pass: loop-interchange 131c251bfc3SCongzhe Cao; DELIN-NEXT: Name: Interchanged 1321b811ff8SBardia Mahjour; DELIN-NEXT: Function: test02 1331b811ff8SBardia Mahjour; DELIN-NEXT: Args: 134c251bfc3SCongzhe Cao; DELIN-NEXT: - String: Loop interchanged with enclosing loop. 1351b811ff8SBardia Mahjour; DELIN-NEXT: ... 1361b811ff8SBardia Mahjour 137cee313d2SEric Christopher;;-----------------------------------Test case 03------------------------------- 138cee313d2SEric Christopher;; Test to make sure we can handle output dependencies. 139cee313d2SEric Christopher;; 140cee313d2SEric Christopher;; for (int i = 0; i < 2; ++i) 141cee313d2SEric Christopher;; for(int j = 0; j < 3; ++j) { 142cee313d2SEric Christopher;; A[j][i] = i; 143cee313d2SEric Christopher;; A[j][i+1] = j; 144cee313d2SEric Christopher;; } 145cee313d2SEric Christopher 146cee313d2SEric Christopher@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16 147cee313d2SEric Christopher 148cee313d2SEric Christopherdefine void @test03() { 149cee313d2SEric Christopherentry: 150cee313d2SEric Christopher br label %for.cond1.preheader 151cee313d2SEric Christopher 152cee313d2SEric Christopherfor.cond.loopexit: ; preds = %for.body4 153cee313d2SEric Christopher %exitcond28 = icmp ne i64 %indvars.iv.next27, 2 154cee313d2SEric Christopher br i1 %exitcond28, label %for.cond1.preheader, label %for.cond.cleanup 155cee313d2SEric Christopher 156cee313d2SEric Christopherfor.cond1.preheader: ; preds = %for.cond.loopexit, %entry 157cee313d2SEric Christopher %indvars.iv26 = phi i64 [ 0, %entry ], [ %indvars.iv.next27, %for.cond.loopexit ] 158cee313d2SEric Christopher %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1 159cee313d2SEric Christopher br label %for.body4 160cee313d2SEric Christopher 161cee313d2SEric Christopherfor.cond.cleanup: ; preds = %for.cond.loopexit 162cee313d2SEric Christopher ret void 163cee313d2SEric Christopher 164cee313d2SEric Christopherfor.body4: ; preds = %for.body4, %for.cond1.preheader 165cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ] 166cee313d2SEric Christopher %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26 167cee313d2SEric Christopher %tmp = trunc i64 %indvars.iv26 to i32 168cee313d2SEric Christopher store i32 %tmp, i32* %arrayidx6, align 4 169cee313d2SEric Christopher %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27 170cee313d2SEric Christopher %tmp1 = trunc i64 %indvars.iv to i32 171cee313d2SEric Christopher store i32 %tmp1, i32* %arrayidx10, align 4 172cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 173cee313d2SEric Christopher %exitcond = icmp ne i64 %indvars.iv.next, 3 174cee313d2SEric Christopher br i1 %exitcond, label %for.body4, label %for.cond.loopexit 175cee313d2SEric Christopher} 176cee313d2SEric Christopher 1777086025dSAndy Kaylor; CHECK: --- !Passed 178cee313d2SEric Christopher; CHECK-NEXT: Pass: loop-interchange 1797086025dSAndy Kaylor; CHECK-NEXT: Name: Interchanged 180cee313d2SEric Christopher; CHECK-NEXT: Function: test03 181cee313d2SEric Christopher; CHECK-NEXT: Args: 1827086025dSAndy Kaylor; CHECK-NEXT: - String: Loop interchanged with enclosing loop. 183cee313d2SEric Christopher; CHECK-NEXT: ... 184cee313d2SEric Christopher 1851b811ff8SBardia Mahjour; DELIN: --- !Passed 1861b811ff8SBardia Mahjour; DELIN-NEXT: Pass: loop-interchange 1871b811ff8SBardia Mahjour; DELIN-NEXT: Name: Interchanged 1881b811ff8SBardia Mahjour; DELIN-NEXT: Function: test03 1891b811ff8SBardia Mahjour; DELIN-NEXT: Args: 1901b811ff8SBardia Mahjour; DELIN-NEXT: - String: Loop interchanged with enclosing loop. 1911b811ff8SBardia Mahjour; DELIN-NEXT: ... 1921b811ff8SBardia Mahjour 193cee313d2SEric Christopher;;--------------------------------------Test case 04------------------------------------- 194cee313d2SEric Christopher;; Loops not tightly nested are not interchanged 195cee313d2SEric Christopher;; for(int j=0;j<N;j++) { 196cee313d2SEric Christopher;; B[j] = j+k; 197cee313d2SEric Christopher;; for(int i=0;i<N;i++) 198cee313d2SEric Christopher;; A[j][i] = A[j][i]+B[j]; 199cee313d2SEric Christopher;; } 200cee313d2SEric Christopher 201cee313d2SEric Christopherdefine void @test04(i32 %k, i32 %N){ 202cee313d2SEric Christopherentry: 203cee313d2SEric Christopher %cmp30 = icmp sgt i32 %N, 0 204cee313d2SEric Christopher br i1 %cmp30, label %for.body.lr.ph, label %for.end17 205cee313d2SEric Christopher 206cee313d2SEric Christopherfor.body.lr.ph: 207cee313d2SEric Christopher %0 = add i32 %N, -1 208cee313d2SEric Christopher %1 = zext i32 %k to i64 209cee313d2SEric Christopher br label %for.body 210cee313d2SEric Christopher 211cee313d2SEric Christopherfor.body: 212cee313d2SEric Christopher %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ] 213cee313d2SEric Christopher %2 = add nsw i64 %indvars.iv32, %1 214cee313d2SEric Christopher %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @C, i64 0, i64 %indvars.iv32 215cee313d2SEric Christopher %3 = trunc i64 %2 to i32 216cee313d2SEric Christopher store i32 %3, i32* %arrayidx 217cee313d2SEric Christopher br label %for.body3 218cee313d2SEric Christopher 219cee313d2SEric Christopherfor.body3: 220cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] 221cee313d2SEric Christopher %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv 222cee313d2SEric Christopher %4 = load i32, i32* %arrayidx7 223cee313d2SEric Christopher %add10 = add nsw i32 %3, %4 224cee313d2SEric Christopher store i32 %add10, i32* %arrayidx7 225cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 226cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv to i32 227cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %0 228cee313d2SEric Christopher br i1 %exitcond, label %for.inc15, label %for.body3 229cee313d2SEric Christopher 230cee313d2SEric Christopherfor.inc15: 231cee313d2SEric Christopher %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1 232cee313d2SEric Christopher %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32 233cee313d2SEric Christopher %exitcond36 = icmp eq i32 %lftr.wideiv35, %0 234cee313d2SEric Christopher br i1 %exitcond36, label %for.end17, label %for.body 235cee313d2SEric Christopher 236cee313d2SEric Christopherfor.end17: 237cee313d2SEric Christopher ret void 238cee313d2SEric Christopher} 239cee313d2SEric Christopher 240cee313d2SEric Christopher; CHECK: --- !Missed 241cee313d2SEric Christopher; CHECK-NEXT: Pass: loop-interchange 242cee313d2SEric Christopher; CHECK-NEXT: Name: Dependence 243cee313d2SEric Christopher; CHECK-NEXT: Function: test04 244cee313d2SEric Christopher; CHECK-NEXT: Args: 245cee313d2SEric Christopher; CHECK-NEXT: - String: Cannot interchange loops due to dependences. 246cee313d2SEric Christopher; CHECK-NEXT: ... 2471b811ff8SBardia Mahjour 2481b811ff8SBardia Mahjour; DELIN: --- !Missed 2491b811ff8SBardia Mahjour; DELIN-NEXT: Pass: loop-interchange 2501b811ff8SBardia Mahjour; DELIN-NEXT: Name: NotTightlyNested 2511b811ff8SBardia Mahjour; DELIN-NEXT: Function: test04 2521b811ff8SBardia Mahjour; DELIN-NEXT: Args: 2531b811ff8SBardia Mahjour; DELIN-NEXT: - String: Cannot interchange loops because they are not tightly nested. 2541b811ff8SBardia Mahjour; DELIN-NEXT: ... 255