1c572e92cSDiego Novillo //===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
2c572e92cSDiego Novillo //
3c572e92cSDiego Novillo //                      The LLVM Compiler Infrastructure
4c572e92cSDiego Novillo //
5c572e92cSDiego Novillo // This file is distributed under the University of Illinois Open Source
6c572e92cSDiego Novillo // License. See LICENSE.TXT for details.
7c572e92cSDiego Novillo //
8c572e92cSDiego Novillo //===----------------------------------------------------------------------===//
9c572e92cSDiego Novillo //
10c572e92cSDiego Novillo // This file implements the class that writes LLVM sample profiles. It
11c572e92cSDiego Novillo // supports two file formats: text and binary. The textual representation
12c572e92cSDiego Novillo // is useful for debugging and testing purposes. The binary representation
13c572e92cSDiego Novillo // is more compact, resulting in smaller file sizes. However, they can
14c572e92cSDiego Novillo // both be used interchangeably.
15c572e92cSDiego Novillo //
16c572e92cSDiego Novillo // See lib/ProfileData/SampleProfReader.cpp for documentation on each of the
17c572e92cSDiego Novillo // supported formats.
18c572e92cSDiego Novillo //
19c572e92cSDiego Novillo //===----------------------------------------------------------------------===//
20c572e92cSDiego Novillo 
216bda14b3SChandler Carruth #include "llvm/ProfileData/SampleProfWriter.h"
22e78d131aSEugene Zelenko #include "llvm/ADT/StringRef.h"
23e78d131aSEugene Zelenko #include "llvm/ProfileData/ProfileCommon.h"
24e78d131aSEugene Zelenko #include "llvm/ProfileData/SampleProf.h"
25c572e92cSDiego Novillo #include "llvm/Support/ErrorOr.h"
26e78d131aSEugene Zelenko #include "llvm/Support/FileSystem.h"
27c572e92cSDiego Novillo #include "llvm/Support/LEB128.h"
28a0c0857eSWei Mi #include "llvm/Support/MD5.h"
29e78d131aSEugene Zelenko #include "llvm/Support/raw_ostream.h"
30e78d131aSEugene Zelenko #include <algorithm>
31e78d131aSEugene Zelenko #include <cstdint>
32e78d131aSEugene Zelenko #include <memory>
338d1c983fSDehao Chen #include <set>
34e78d131aSEugene Zelenko #include <system_error>
35e78d131aSEugene Zelenko #include <utility>
36e78d131aSEugene Zelenko #include <vector>
37c572e92cSDiego Novillo 
38c572e92cSDiego Novillo using namespace llvm;
39e78d131aSEugene Zelenko using namespace sampleprof;
40c572e92cSDiego Novillo 
418d1c983fSDehao Chen std::error_code
428d1c983fSDehao Chen SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
438d1c983fSDehao Chen   if (std::error_code EC = writeHeader(ProfileMap))
448d1c983fSDehao Chen     return EC;
458d1c983fSDehao Chen 
468d1c983fSDehao Chen   // Sort the ProfileMap by total samples.
478d1c983fSDehao Chen   typedef std::pair<StringRef, const FunctionSamples *> NameFunctionSamples;
488d1c983fSDehao Chen   std::vector<NameFunctionSamples> V;
498d1c983fSDehao Chen   for (const auto &I : ProfileMap)
508d1c983fSDehao Chen     V.push_back(std::make_pair(I.getKey(), &I.second));
518d1c983fSDehao Chen 
528d1c983fSDehao Chen   std::stable_sort(
538d1c983fSDehao Chen       V.begin(), V.end(),
548d1c983fSDehao Chen       [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
558d1c983fSDehao Chen         if (A.second->getTotalSamples() == B.second->getTotalSamples())
568d1c983fSDehao Chen           return A.first > B.first;
578d1c983fSDehao Chen         return A.second->getTotalSamples() > B.second->getTotalSamples();
588d1c983fSDehao Chen       });
598d1c983fSDehao Chen 
608d1c983fSDehao Chen   for (const auto &I : V) {
618d1c983fSDehao Chen     if (std::error_code EC = write(*I.second))
628d1c983fSDehao Chen       return EC;
638d1c983fSDehao Chen   }
648d1c983fSDehao Chen   return sampleprof_error::success;
658d1c983fSDehao Chen }
668d1c983fSDehao Chen 
675f8f34e4SAdrian Prantl /// Write samples to a text file.
688e415a82SDiego Novillo ///
698e415a82SDiego Novillo /// Note: it may be tempting to implement this in terms of
70ef548d29SDiego Novillo /// FunctionSamples::print().  Please don't.  The dump functionality is intended
718e415a82SDiego Novillo /// for debugging and has no specified form.
728e415a82SDiego Novillo ///
738e415a82SDiego Novillo /// The format used here is more structured and deliberate because
748e415a82SDiego Novillo /// it needs to be parsed by the SampleProfileReaderText class.
7557d1dda5SDehao Chen std::error_code SampleProfileWriterText::write(const FunctionSamples &S) {
7651abea74SNathan Slingerland   auto &OS = *OutputStream;
7757d1dda5SDehao Chen   OS << S.getName() << ":" << S.getTotalSamples();
78aae1ed8eSDiego Novillo   if (Indent == 0)
79aae1ed8eSDiego Novillo     OS << ":" << S.getHeadSamples();
80aae1ed8eSDiego Novillo   OS << "\n";
81c572e92cSDiego Novillo 
82ef548d29SDiego Novillo   SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
83ef548d29SDiego Novillo   for (const auto &I : SortedSamples.get()) {
84ef548d29SDiego Novillo     LineLocation Loc = I->first;
85ef548d29SDiego Novillo     const SampleRecord &Sample = I->second;
86aae1ed8eSDiego Novillo     OS.indent(Indent + 1);
87c572e92cSDiego Novillo     if (Loc.Discriminator == 0)
88c572e92cSDiego Novillo       OS << Loc.LineOffset << ": ";
89c572e92cSDiego Novillo     else
90c572e92cSDiego Novillo       OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
91c572e92cSDiego Novillo 
92c572e92cSDiego Novillo     OS << Sample.getSamples();
93c572e92cSDiego Novillo 
94d5336ae2SDiego Novillo     for (const auto &J : Sample.getCallTargets())
95d5336ae2SDiego Novillo       OS << " " << J.first() << ":" << J.second;
96c572e92cSDiego Novillo     OS << "\n";
97c572e92cSDiego Novillo   }
98c572e92cSDiego Novillo 
992c7ca9b5SDehao Chen   SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
100ef548d29SDiego Novillo       S.getCallsiteSamples());
101aae1ed8eSDiego Novillo   Indent += 1;
1022c7ca9b5SDehao Chen   for (const auto &I : SortedCallsiteSamples.get())
1032c7ca9b5SDehao Chen     for (const auto &FS : I->second) {
10457d1dda5SDehao Chen       LineLocation Loc = I->first;
1052c7ca9b5SDehao Chen       const FunctionSamples &CalleeSamples = FS.second;
106aae1ed8eSDiego Novillo       OS.indent(Indent);
107aae1ed8eSDiego Novillo       if (Loc.Discriminator == 0)
108aae1ed8eSDiego Novillo         OS << Loc.LineOffset << ": ";
109aae1ed8eSDiego Novillo       else
110aae1ed8eSDiego Novillo         OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
11157d1dda5SDehao Chen       if (std::error_code EC = write(CalleeSamples))
112760c5a8fSDiego Novillo         return EC;
113aae1ed8eSDiego Novillo     }
114aae1ed8eSDiego Novillo   Indent -= 1;
115aae1ed8eSDiego Novillo 
116760c5a8fSDiego Novillo   return sampleprof_error::success;
117c572e92cSDiego Novillo }
118c572e92cSDiego Novillo 
119760c5a8fSDiego Novillo std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
120760c5a8fSDiego Novillo   const auto &ret = NameTable.find(FName);
121760c5a8fSDiego Novillo   if (ret == NameTable.end())
122760c5a8fSDiego Novillo     return sampleprof_error::truncated_name_table;
12351abea74SNathan Slingerland   encodeULEB128(ret->second, *OutputStream);
124760c5a8fSDiego Novillo   return sampleprof_error::success;
125760c5a8fSDiego Novillo }
126c572e92cSDiego Novillo 
127760c5a8fSDiego Novillo void SampleProfileWriterBinary::addName(StringRef FName) {
1288d1c983fSDehao Chen   NameTable.insert(std::make_pair(FName, 0));
129760c5a8fSDiego Novillo }
130760c5a8fSDiego Novillo 
131760c5a8fSDiego Novillo void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
132760c5a8fSDiego Novillo   // Add all the names in indirect call targets.
133760c5a8fSDiego Novillo   for (const auto &I : S.getBodySamples()) {
134760c5a8fSDiego Novillo     const SampleRecord &Sample = I.second;
135760c5a8fSDiego Novillo     for (const auto &J : Sample.getCallTargets())
136760c5a8fSDiego Novillo       addName(J.first());
137760c5a8fSDiego Novillo   }
138760c5a8fSDiego Novillo 
139760c5a8fSDiego Novillo   // Recursively add all the names for inlined callsites.
1402c7ca9b5SDehao Chen   for (const auto &J : S.getCallsiteSamples())
1412c7ca9b5SDehao Chen     for (const auto &FS : J.second) {
1422c7ca9b5SDehao Chen       const FunctionSamples &CalleeSamples = FS.second;
14357d1dda5SDehao Chen       addName(CalleeSamples.getName());
144760c5a8fSDiego Novillo       addNames(CalleeSamples);
145760c5a8fSDiego Novillo     }
146760c5a8fSDiego Novillo }
147760c5a8fSDiego Novillo 
148a0c0857eSWei Mi void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
149a0c0857eSWei Mi   // Sort the names to make NameTable deterministic.
150a0c0857eSWei Mi   for (const auto &I : NameTable)
151a0c0857eSWei Mi     V.insert(I.first);
152a0c0857eSWei Mi   int i = 0;
153a0c0857eSWei Mi   for (const StringRef &N : V)
154a0c0857eSWei Mi     NameTable[N] = i++;
155a0c0857eSWei Mi }
15651abea74SNathan Slingerland 
157a0c0857eSWei Mi std::error_code SampleProfileWriterRawBinary::writeNameTable() {
158a0c0857eSWei Mi   auto &OS = *OutputStream;
159a0c0857eSWei Mi   std::set<StringRef> V;
160a0c0857eSWei Mi   stablizeNameTable(V);
161a0c0857eSWei Mi 
162a0c0857eSWei Mi   // Write out the name table.
163a0c0857eSWei Mi   encodeULEB128(NameTable.size(), OS);
164a0c0857eSWei Mi   for (auto N : V) {
165a0c0857eSWei Mi     OS << N;
166a0c0857eSWei Mi     encodeULEB128(0, OS);
167a0c0857eSWei Mi   }
168a0c0857eSWei Mi   return sampleprof_error::success;
169a0c0857eSWei Mi }
170a0c0857eSWei Mi 
171a0c0857eSWei Mi std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
172a0c0857eSWei Mi   auto &OS = *OutputStream;
173a0c0857eSWei Mi   std::set<StringRef> V;
174a0c0857eSWei Mi   stablizeNameTable(V);
175a0c0857eSWei Mi 
176a0c0857eSWei Mi   // Write out the name table.
177a0c0857eSWei Mi   encodeULEB128(NameTable.size(), OS);
178a0c0857eSWei Mi   for (auto N : V) {
179a0c0857eSWei Mi     encodeULEB128(MD5Hash(N), OS);
180a0c0857eSWei Mi   }
181a0c0857eSWei Mi   return sampleprof_error::success;
182a0c0857eSWei Mi }
183a0c0857eSWei Mi 
184a0c0857eSWei Mi std::error_code SampleProfileWriterRawBinary::writeMagicIdent() {
185a0c0857eSWei Mi   auto &OS = *OutputStream;
186760c5a8fSDiego Novillo   // Write file magic identifier.
187c572e92cSDiego Novillo   encodeULEB128(SPMagic(), OS);
188c572e92cSDiego Novillo   encodeULEB128(SPVersion(), OS);
189a0c0857eSWei Mi   return sampleprof_error::success;
190a0c0857eSWei Mi }
191a0c0857eSWei Mi 
192a0c0857eSWei Mi std::error_code SampleProfileWriterCompactBinary::writeMagicIdent() {
193a0c0857eSWei Mi   auto &OS = *OutputStream;
194a0c0857eSWei Mi   // Write file magic identifier.
195a0c0857eSWei Mi   encodeULEB128(SPMagic(SPF_Compact_Binary), OS);
196a0c0857eSWei Mi   encodeULEB128(SPVersion(), OS);
197a0c0857eSWei Mi   return sampleprof_error::success;
198a0c0857eSWei Mi }
199a0c0857eSWei Mi 
200a0c0857eSWei Mi std::error_code SampleProfileWriterBinary::writeHeader(
201a0c0857eSWei Mi     const StringMap<FunctionSamples> &ProfileMap) {
202a0c0857eSWei Mi   writeMagicIdent();
203760c5a8fSDiego Novillo 
20440ee23dbSEaswaran Raman   computeSummary(ProfileMap);
20540ee23dbSEaswaran Raman   if (auto EC = writeSummary())
20640ee23dbSEaswaran Raman     return EC;
20740ee23dbSEaswaran Raman 
208760c5a8fSDiego Novillo   // Generate the name table for all the functions referenced in the profile.
209760c5a8fSDiego Novillo   for (const auto &I : ProfileMap) {
210760c5a8fSDiego Novillo     addName(I.first());
211760c5a8fSDiego Novillo     addNames(I.second);
212760c5a8fSDiego Novillo   }
213760c5a8fSDiego Novillo 
214a0c0857eSWei Mi   writeNameTable();
215760c5a8fSDiego Novillo   return sampleprof_error::success;
216c572e92cSDiego Novillo }
217c572e92cSDiego Novillo 
21840ee23dbSEaswaran Raman std::error_code SampleProfileWriterBinary::writeSummary() {
21940ee23dbSEaswaran Raman   auto &OS = *OutputStream;
2207cefdb81SEaswaran Raman   encodeULEB128(Summary->getTotalCount(), OS);
2217cefdb81SEaswaran Raman   encodeULEB128(Summary->getMaxCount(), OS);
2226f4903d9SEaswaran Raman   encodeULEB128(Summary->getMaxFunctionCount(), OS);
2237cefdb81SEaswaran Raman   encodeULEB128(Summary->getNumCounts(), OS);
22440ee23dbSEaswaran Raman   encodeULEB128(Summary->getNumFunctions(), OS);
22540ee23dbSEaswaran Raman   std::vector<ProfileSummaryEntry> &Entries = Summary->getDetailedSummary();
22640ee23dbSEaswaran Raman   encodeULEB128(Entries.size(), OS);
22740ee23dbSEaswaran Raman   for (auto Entry : Entries) {
22840ee23dbSEaswaran Raman     encodeULEB128(Entry.Cutoff, OS);
22940ee23dbSEaswaran Raman     encodeULEB128(Entry.MinCount, OS);
23040ee23dbSEaswaran Raman     encodeULEB128(Entry.NumCounts, OS);
23140ee23dbSEaswaran Raman   }
23240ee23dbSEaswaran Raman   return sampleprof_error::success;
23340ee23dbSEaswaran Raman }
23457d1dda5SDehao Chen std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
23551abea74SNathan Slingerland   auto &OS = *OutputStream;
23651abea74SNathan Slingerland 
23757d1dda5SDehao Chen   if (std::error_code EC = writeNameIdx(S.getName()))
238760c5a8fSDiego Novillo     return EC;
239760c5a8fSDiego Novillo 
240c572e92cSDiego Novillo   encodeULEB128(S.getTotalSamples(), OS);
241a7f1e8efSDiego Novillo 
242a7f1e8efSDiego Novillo   // Emit all the body samples.
243b93483dbSDiego Novillo   encodeULEB128(S.getBodySamples().size(), OS);
244d5336ae2SDiego Novillo   for (const auto &I : S.getBodySamples()) {
245d5336ae2SDiego Novillo     LineLocation Loc = I.first;
246d5336ae2SDiego Novillo     const SampleRecord &Sample = I.second;
247c572e92cSDiego Novillo     encodeULEB128(Loc.LineOffset, OS);
248c572e92cSDiego Novillo     encodeULEB128(Loc.Discriminator, OS);
249c572e92cSDiego Novillo     encodeULEB128(Sample.getSamples(), OS);
250c572e92cSDiego Novillo     encodeULEB128(Sample.getCallTargets().size(), OS);
251d5336ae2SDiego Novillo     for (const auto &J : Sample.getCallTargets()) {
252760c5a8fSDiego Novillo       StringRef Callee = J.first();
25338be3330SDiego Novillo       uint64_t CalleeSamples = J.second;
254760c5a8fSDiego Novillo       if (std::error_code EC = writeNameIdx(Callee))
255760c5a8fSDiego Novillo         return EC;
256c572e92cSDiego Novillo       encodeULEB128(CalleeSamples, OS);
257c572e92cSDiego Novillo     }
258c572e92cSDiego Novillo   }
259c572e92cSDiego Novillo 
260a7f1e8efSDiego Novillo   // Recursively emit all the callsite samples.
2612c27daf7SDehao Chen   uint64_t NumCallsites = 0;
2622c27daf7SDehao Chen   for (const auto &J : S.getCallsiteSamples())
2632c27daf7SDehao Chen     NumCallsites += J.second.size();
2642c27daf7SDehao Chen   encodeULEB128(NumCallsites, OS);
2652c7ca9b5SDehao Chen   for (const auto &J : S.getCallsiteSamples())
2662c7ca9b5SDehao Chen     for (const auto &FS : J.second) {
26757d1dda5SDehao Chen       LineLocation Loc = J.first;
2682c7ca9b5SDehao Chen       const FunctionSamples &CalleeSamples = FS.second;
269a7f1e8efSDiego Novillo       encodeULEB128(Loc.LineOffset, OS);
270a7f1e8efSDiego Novillo       encodeULEB128(Loc.Discriminator, OS);
27157d1dda5SDehao Chen       if (std::error_code EC = writeBody(CalleeSamples))
272760c5a8fSDiego Novillo         return EC;
273a7f1e8efSDiego Novillo     }
274a7f1e8efSDiego Novillo 
275760c5a8fSDiego Novillo   return sampleprof_error::success;
276c572e92cSDiego Novillo }
277d5336ae2SDiego Novillo 
2785f8f34e4SAdrian Prantl /// Write samples of a top-level function to a binary file.
279b93483dbSDiego Novillo ///
280b93483dbSDiego Novillo /// \returns true if the samples were written successfully, false otherwise.
28157d1dda5SDehao Chen std::error_code SampleProfileWriterBinary::write(const FunctionSamples &S) {
28251abea74SNathan Slingerland   encodeULEB128(S.getHeadSamples(), *OutputStream);
28357d1dda5SDehao Chen   return writeBody(S);
284b93483dbSDiego Novillo }
285b93483dbSDiego Novillo 
2865f8f34e4SAdrian Prantl /// Create a sample profile file writer based on the specified format.
287d5336ae2SDiego Novillo ///
288d5336ae2SDiego Novillo /// \param Filename The file to create.
289d5336ae2SDiego Novillo ///
290d5336ae2SDiego Novillo /// \param Format Encoding format for the profile file.
291d5336ae2SDiego Novillo ///
292d5336ae2SDiego Novillo /// \returns an error code indicating the status of the created writer.
293fcd55607SDiego Novillo ErrorOr<std::unique_ptr<SampleProfileWriter>>
294fcd55607SDiego Novillo SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
295d5336ae2SDiego Novillo   std::error_code EC;
29651abea74SNathan Slingerland   std::unique_ptr<raw_ostream> OS;
297*d9be2c7eSWei Mi   if (Format == SPF_Binary || Format == SPF_Compact_Binary)
29851abea74SNathan Slingerland     OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None));
29951abea74SNathan Slingerland   else
30051abea74SNathan Slingerland     OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text));
30151abea74SNathan Slingerland   if (EC)
30251abea74SNathan Slingerland     return EC;
30351abea74SNathan Slingerland 
30451abea74SNathan Slingerland   return create(OS, Format);
30551abea74SNathan Slingerland }
30651abea74SNathan Slingerland 
3075f8f34e4SAdrian Prantl /// Create a sample profile stream writer based on the specified format.
30851abea74SNathan Slingerland ///
30951abea74SNathan Slingerland /// \param OS The output stream to store the profile data to.
31051abea74SNathan Slingerland ///
31151abea74SNathan Slingerland /// \param Format Encoding format for the profile file.
31251abea74SNathan Slingerland ///
31351abea74SNathan Slingerland /// \returns an error code indicating the status of the created writer.
31451abea74SNathan Slingerland ErrorOr<std::unique_ptr<SampleProfileWriter>>
31551abea74SNathan Slingerland SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
31651abea74SNathan Slingerland                             SampleProfileFormat Format) {
31751abea74SNathan Slingerland   std::error_code EC;
318fcd55607SDiego Novillo   std::unique_ptr<SampleProfileWriter> Writer;
319d5336ae2SDiego Novillo 
320*d9be2c7eSWei Mi   if (Format == SPF_Binary)
321a0c0857eSWei Mi     Writer.reset(new SampleProfileWriterRawBinary(OS));
322a0c0857eSWei Mi   else if (Format == SPF_Compact_Binary)
323a0c0857eSWei Mi     Writer.reset(new SampleProfileWriterCompactBinary(OS));
324d5336ae2SDiego Novillo   else if (Format == SPF_Text)
32551abea74SNathan Slingerland     Writer.reset(new SampleProfileWriterText(OS));
326760c5a8fSDiego Novillo   else if (Format == SPF_GCC)
327760c5a8fSDiego Novillo     EC = sampleprof_error::unsupported_writing_format;
328d5336ae2SDiego Novillo   else
329d5336ae2SDiego Novillo     EC = sampleprof_error::unrecognized_format;
330d5336ae2SDiego Novillo 
331fcd55607SDiego Novillo   if (EC)
332d5336ae2SDiego Novillo     return EC;
333fcd55607SDiego Novillo 
334fcd55607SDiego Novillo   return std::move(Writer);
335d5336ae2SDiego Novillo }
33640ee23dbSEaswaran Raman 
33740ee23dbSEaswaran Raman void SampleProfileWriter::computeSummary(
33840ee23dbSEaswaran Raman     const StringMap<FunctionSamples> &ProfileMap) {
339e5a17e3fSEaswaran Raman   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
34040ee23dbSEaswaran Raman   for (const auto &I : ProfileMap) {
34140ee23dbSEaswaran Raman     const FunctionSamples &Profile = I.second;
342e5a17e3fSEaswaran Raman     Builder.addRecord(Profile);
34340ee23dbSEaswaran Raman   }
34438de59e4SBenjamin Kramer   Summary = Builder.getSummary();
34540ee23dbSEaswaran Raman }
346