1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes='default<O3>' -enable-loop-flatten -loop-flatten-cost-threshold=3 -S %s | FileCheck %s
3
4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5target triple = "aarch64"
6
7define dso_local void @_Z3fooPiii(i32* %A, i32 %N, i32 %M) #0 {
8; CHECK-LABEL: @_Z3fooPiii(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0
11; CHECK-NEXT:    [[CMP21:%.*]] = icmp sgt i32 [[M:%.*]], 0
12; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP3]], i1 [[CMP21]], i1 false
13; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND_CLEANUP:%.*]]
14; CHECK:       for.cond1.preheader.lr.ph.split.us:
15; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[M]] to i64
16; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[N]] to i64
17; CHECK-NEXT:    [[FLATTEN_TRIPCOUNT:%.*]] = mul nuw nsw i64 [[TMP0]], [[TMP1]]
18; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
19; CHECK:       for.cond1.preheader.us:
20; CHECK-NEXT:    [[INDVAR6:%.*]] = phi i64 [ [[INDVAR_NEXT7:%.*]], [[FOR_COND1_PREHEADER_US]] ], [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ]
21; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVAR6]]
22; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4
23; CHECK-NEXT:    tail call void @_Z1fi(i32 [[TMP2]])
24; CHECK-NEXT:    [[INDVAR_NEXT7]] = add nuw nsw i64 [[INDVAR6]], 1
25; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVAR_NEXT7]], [[FLATTEN_TRIPCOUNT]]
26; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]]
27; CHECK:       for.cond.cleanup:
28; CHECK-NEXT:    ret void
29;
30entry:
31  br label %for.cond
32
33for.cond:
34  %i.0 = phi i32 [ 0, %entry ], [ %inc6, %for.cond.cleanup3 ]
35  %cmp = icmp slt i32 %i.0, %N
36  br i1 %cmp, label %for.body, label %for.cond.cleanup
37
38for.cond.cleanup:
39  ret void
40
41for.body:
42  br label %for.cond1
43
44for.cond1:
45  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.body4 ]
46  %cmp2 = icmp slt i32 %j.0, %M
47  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
48
49for.cond.cleanup3:
50  %inc6 = add nsw i32 %i.0, 1
51  br label %for.cond
52
53for.body4:
54  %mul = mul nsw i32 %i.0, %M
55  %add = add nsw i32 %mul, %j.0
56  %idxprom = sext i32 %add to i64
57  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
58  %0 = load i32, i32* %arrayidx, align 4
59  call void @_Z1fi(i32 %0)
60  %inc = add nsw i32 %j.0, 1
61  br label %for.cond1
62}
63
64declare dso_local void @_Z1fi(i32) #2
65