1; Test for CSSPGO's new early inliner using priority queue
2
3; Note that we need new pass manager to enable top-down processing for sample profile loader
4; Test we inlined the following in top-down order with old inliner
5;   main:3 @ _Z5funcAi
6;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
7;   _Z5funcBi:1 @ _Z8funcLeafi
8; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
9
10; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5
11; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
12
13; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile
14; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW
15;
16; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining
17; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
18;
19; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining
20; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
21;
22; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob.
23; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1
24; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2
25
26
27; INLINE-BASE: remark: merged.cpp:14:10: '_Z5funcAi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
28; INLINE-BASE: remark: merged.cpp:27:11: '_Z8funcLeafi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10
29; INLINE-BASE: remark: merged.cpp:33:11: '_Z8funcLeafi' inlined into '_Z5funcBi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
30
31; INLINE-NEW: remark: merged.cpp:14:10: '_Z5funcAi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
32; INLINE-NEW-NOT: remark
33
34; INLINE-NEW-LIMIT1-NOT: remark
35
36; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: '_Z8funcLeafi' inlined into '_Z5funcBi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
37; INLINE-NEW-LIMIT2-NOT: remark
38
39@factor = dso_local global i32 3, align 4, !dbg !0
40
41define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
42entry:
43  br label %for.body, !dbg !25
44
45for.cond.cleanup:                                 ; preds = %for.body
46  ret i32 %add3, !dbg !27
47
48for.body:                                         ; preds = %for.body, %entry
49  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
50  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
51  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
52  %add = add nuw nsw i32 %x.011, 1, !dbg !31
53  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
54  %add2 = add i32 %call, %r.010, !dbg !34
55  %add3 = add i32 %add2, %call1, !dbg !35
56  %dec = add nsw i32 %x.011, -1, !dbg !36
57  %cmp = icmp eq i32 %x.011, 0, !dbg !38
58  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
59}
60
61define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 {
62entry:
63  %add = add nsw i32 %x, 100000, !dbg !44
64  %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45
65  ret i32 %call, !dbg !46
66}
67
68define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
69entry:
70  %cmp = icmp sgt i32 %x, 0, !dbg !57
71  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
72
73while.cond2.preheader:                            ; preds = %entry
74  %cmp313 = icmp slt i32 %x, 0, !dbg !60
75  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
76
77while.body:                                       ; preds = %while.body, %entry
78  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
79  %tmp = load volatile i32, i32* @factor, align 4, !dbg !64
80  %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67
81  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
82  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
83  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
84
85while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
86  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
87  %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72
88  %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74
89  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
90  %cmp3 = icmp slt i32 %add, 0, !dbg !60
91  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
92
93if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
94  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
95  ret i32 %x.addr.2, !dbg !76
96}
97
98define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
99entry:
100  %sub = add nsw i32 %x, -100000, !dbg !51
101  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
102  ret i32 %call, !dbg !53
103}
104
105declare i32 @_Z3fibi(i32)
106
107attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
108attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
109
110!llvm.dbg.cu = !{!2}
111!llvm.module.flags = !{!14, !15, !16}
112!llvm.ident = !{!17}
113
114!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
115!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
116!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
117!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
118!4 = !{}
119!5 = !{!6, !10, !11}
120!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
121!7 = !DISubroutineType(types: !8)
122!8 = !{!9, !9}
123!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
124!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
125!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
126!12 = !{!0}
127!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
128!14 = !{i32 7, !"Dwarf Version", i32 4}
129!15 = !{i32 2, !"Debug Info Version", i32 3}
130!16 = !{i32 1, !"wchar_size", i32 4}
131!17 = !{!"clang version 11.0.0"}
132!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
133!19 = !DISubroutineType(types: !20)
134!20 = !{!9}
135!21 = !{!22, !23}
136!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
137!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
138!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
139!25 = !DILocation(line: 13, column: 3, scope: !26)
140!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
141!27 = !DILocation(line: 17, column: 3, scope: !18)
142!28 = !DILocation(line: 14, column: 10, scope: !29)
143!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
144!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
145!31 = !DILocation(line: 14, column: 29, scope: !29)
146!32 = !DILocation(line: 14, column: 21, scope: !33)
147!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
148!34 = !DILocation(line: 14, column: 19, scope: !29)
149!35 = !DILocation(line: 14, column: 7, scope: !29)
150!36 = !DILocation(line: 13, column: 33, scope: !37)
151!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
152!38 = !DILocation(line: 13, column: 26, scope: !39)
153!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
154!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
155!44 = !DILocation(line: 27, column: 22, scope: !40)
156!45 = !DILocation(line: 27, column: 11, scope: !40)
157!46 = !DILocation(line: 29, column: 3, scope: !40)
158!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
159!51 = !DILocation(line: 33, column: 22, scope: !47)
160!52 = !DILocation(line: 33, column: 11, scope: !47)
161!53 = !DILocation(line: 35, column: 3, scope: !47)
162!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
163!57 = !DILocation(line: 49, column: 9, scope: !58)
164!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
165!59 = !DILocation(line: 49, column: 7, scope: !54)
166!60 = !DILocation(line: 58, column: 14, scope: !61)
167!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
168!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
169!63 = !DILocation(line: 58, column: 5, scope: !61)
170!64 = !DILocation(line: 52, column: 16, scope: !65)
171!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
172!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
173!67 = !DILocation(line: 52, column: 12, scope: !65)
174!68 = !DILocation(line: 52, column: 9, scope: !65)
175!69 = !DILocation(line: 51, column: 14, scope: !70)
176!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
177!71 = !DILocation(line: 51, column: 5, scope: !70)
178!72 = !DILocation(line: 59, column: 16, scope: !73)
179!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
180!74 = !DILocation(line: 59, column: 12, scope: !73)
181!75 = !DILocation(line: 59, column: 9, scope: !73)
182!76 = !DILocation(line: 63, column: 3, scope: !54)
183