1 //===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that writes LLVM sample profiles. It
10 // supports two file formats: text and binary. The textual representation
11 // is useful for debugging and testing purposes. The binary representation
12 // is more compact, resulting in smaller file sizes. However, they can
13 // both be used interchangeably.
14 //
15 // See lib/ProfileData/SampleProfReader.cpp for documentation on each of the
16 // supported formats.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "llvm/ProfileData/SampleProfWriter.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ProfileData/ProfileCommon.h"
23 #include "llvm/ProfileData/SampleProf.h"
24 #include "llvm/Support/Compression.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/EndianStream.h"
27 #include "llvm/Support/ErrorOr.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/LEB128.h"
30 #include "llvm/Support/MD5.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <algorithm>
33 #include <cstdint>
34 #include <memory>
35 #include <set>
36 #include <system_error>
37 #include <utility>
38 #include <vector>
39 
40 using namespace llvm;
41 using namespace sampleprof;
42 
43 std::error_code SampleProfileWriter::writeFuncProfiles(
44     const StringMap<FunctionSamples> &ProfileMap) {
45   // Sort the ProfileMap by total samples.
46   typedef std::pair<StringRef, const FunctionSamples *> NameFunctionSamples;
47   std::vector<NameFunctionSamples> V;
48   for (const auto &I : ProfileMap)
49     V.push_back(std::make_pair(I.getKey(), &I.second));
50 
51   llvm::stable_sort(
52       V, [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
53         if (A.second->getTotalSamples() == B.second->getTotalSamples())
54           return A.first > B.first;
55         return A.second->getTotalSamples() > B.second->getTotalSamples();
56       });
57 
58   for (const auto &I : V) {
59     if (std::error_code EC = writeSample(*I.second))
60       return EC;
61   }
62   return sampleprof_error::success;
63 }
64 
65 std::error_code
66 SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
67   if (std::error_code EC = writeHeader(ProfileMap))
68     return EC;
69 
70   if (std::error_code EC = writeFuncProfiles(ProfileMap))
71     return EC;
72 
73   return sampleprof_error::success;
74 }
75 
76 SecHdrTableEntry &
77 SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) {
78   auto SecIt = std::find_if(
79       SectionLayout.begin(), SectionLayout.end(),
80       [=](const auto &Entry) -> bool { return Entry.Type == Type; });
81   return *SecIt;
82 }
83 
84 /// Return the current position and prepare to use it as the start
85 /// position of a section.
86 uint64_t SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type) {
87   uint64_t SectionStart = OutputStream->tell();
88   auto &Entry = getEntryInLayout(Type);
89   // Use LocalBuf as a temporary output for writting data.
90   if (hasSecFlag(Entry, SecFlagCompress))
91     LocalBufStream.swap(OutputStream);
92   return SectionStart;
93 }
94 
95 std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
96   if (!llvm::zlib::isAvailable())
97     return sampleprof_error::zlib_unavailable;
98   std::string &UncompressedStrings =
99       static_cast<raw_string_ostream *>(LocalBufStream.get())->str();
100   if (UncompressedStrings.size() == 0)
101     return sampleprof_error::success;
102   auto &OS = *OutputStream;
103   SmallString<128> CompressedStrings;
104   llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings,
105                                  zlib::BestSizeCompression);
106   if (E)
107     return sampleprof_error::compress_failed;
108   encodeULEB128(UncompressedStrings.size(), OS);
109   encodeULEB128(CompressedStrings.size(), OS);
110   OS << CompressedStrings.str();
111   UncompressedStrings.clear();
112   return sampleprof_error::success;
113 }
114 
115 /// Add a new section into section header table.
116 std::error_code
117 SampleProfileWriterExtBinaryBase::addNewSection(SecType Type,
118                                                 uint64_t SectionStart) {
119   auto Entry = getEntryInLayout(Type);
120   if (hasSecFlag(Entry, SecFlagCompress)) {
121     LocalBufStream.swap(OutputStream);
122     if (std::error_code EC = compressAndOutput())
123       return EC;
124   }
125   SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart,
126                          OutputStream->tell() - SectionStart});
127   return sampleprof_error::success;
128 }
129 
130 std::error_code SampleProfileWriterExtBinaryBase::write(
131     const StringMap<FunctionSamples> &ProfileMap) {
132   if (std::error_code EC = writeHeader(ProfileMap))
133     return EC;
134 
135   std::string LocalBuf;
136   LocalBufStream = std::make_unique<raw_string_ostream>(LocalBuf);
137   if (std::error_code EC = writeSections(ProfileMap))
138     return EC;
139 
140   if (std::error_code EC = writeSecHdrTable())
141     return EC;
142 
143   return sampleprof_error::success;
144 }
145 
146 std::error_code SampleProfileWriterExtBinary::writeSections(
147     const StringMap<FunctionSamples> &ProfileMap) {
148   uint64_t SectionStart = markSectionStart(SecProfSummary);
149   computeSummary(ProfileMap);
150   if (auto EC = writeSummary())
151     return EC;
152   if (std::error_code EC = addNewSection(SecProfSummary, SectionStart))
153     return EC;
154 
155   // Generate the name table for all the functions referenced in the profile.
156   SectionStart = markSectionStart(SecNameTable);
157   for (const auto &I : ProfileMap) {
158     addName(I.first());
159     addNames(I.second);
160   }
161   writeNameTable();
162   if (std::error_code EC = addNewSection(SecNameTable, SectionStart))
163     return EC;
164 
165   SectionStart = markSectionStart(SecLBRProfile);
166   if (std::error_code EC = writeFuncProfiles(ProfileMap))
167     return EC;
168   if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart))
169     return EC;
170 
171   if (ProfSymList && ProfSymList->toCompress())
172     setToCompressSection(SecProfileSymbolList);
173 
174   SectionStart = markSectionStart(SecProfileSymbolList);
175   if (ProfSymList && ProfSymList->size() > 0)
176     if (std::error_code EC = ProfSymList->write(*OutputStream))
177       return EC;
178   if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart))
179     return EC;
180 
181   return sampleprof_error::success;
182 }
183 
184 std::error_code SampleProfileWriterCompactBinary::write(
185     const StringMap<FunctionSamples> &ProfileMap) {
186   if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
187     return EC;
188   if (std::error_code EC = writeFuncOffsetTable())
189     return EC;
190   return sampleprof_error::success;
191 }
192 
193 /// Write samples to a text file.
194 ///
195 /// Note: it may be tempting to implement this in terms of
196 /// FunctionSamples::print().  Please don't.  The dump functionality is intended
197 /// for debugging and has no specified form.
198 ///
199 /// The format used here is more structured and deliberate because
200 /// it needs to be parsed by the SampleProfileReaderText class.
201 std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
202   auto &OS = *OutputStream;
203   OS << S.getName() << ":" << S.getTotalSamples();
204   if (Indent == 0)
205     OS << ":" << S.getHeadSamples();
206   OS << "\n";
207 
208   SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
209   for (const auto &I : SortedSamples.get()) {
210     LineLocation Loc = I->first;
211     const SampleRecord &Sample = I->second;
212     OS.indent(Indent + 1);
213     if (Loc.Discriminator == 0)
214       OS << Loc.LineOffset << ": ";
215     else
216       OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
217 
218     OS << Sample.getSamples();
219 
220     for (const auto &J : Sample.getSortedCallTargets())
221       OS << " " << J.first << ":" << J.second;
222     OS << "\n";
223   }
224 
225   SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
226       S.getCallsiteSamples());
227   Indent += 1;
228   for (const auto &I : SortedCallsiteSamples.get())
229     for (const auto &FS : I->second) {
230       LineLocation Loc = I->first;
231       const FunctionSamples &CalleeSamples = FS.second;
232       OS.indent(Indent);
233       if (Loc.Discriminator == 0)
234         OS << Loc.LineOffset << ": ";
235       else
236         OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
237       if (std::error_code EC = writeSample(CalleeSamples))
238         return EC;
239     }
240   Indent -= 1;
241 
242   return sampleprof_error::success;
243 }
244 
245 std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
246   const auto &ret = NameTable.find(FName);
247   if (ret == NameTable.end())
248     return sampleprof_error::truncated_name_table;
249   encodeULEB128(ret->second, *OutputStream);
250   return sampleprof_error::success;
251 }
252 
253 void SampleProfileWriterBinary::addName(StringRef FName) {
254   NameTable.insert(std::make_pair(FName, 0));
255 }
256 
257 void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
258   // Add all the names in indirect call targets.
259   for (const auto &I : S.getBodySamples()) {
260     const SampleRecord &Sample = I.second;
261     for (const auto &J : Sample.getCallTargets())
262       addName(J.first());
263   }
264 
265   // Recursively add all the names for inlined callsites.
266   for (const auto &J : S.getCallsiteSamples())
267     for (const auto &FS : J.second) {
268       const FunctionSamples &CalleeSamples = FS.second;
269       addName(CalleeSamples.getName());
270       addNames(CalleeSamples);
271     }
272 }
273 
274 void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
275   // Sort the names to make NameTable deterministic.
276   for (const auto &I : NameTable)
277     V.insert(I.first);
278   int i = 0;
279   for (const StringRef &N : V)
280     NameTable[N] = i++;
281 }
282 
283 std::error_code SampleProfileWriterBinary::writeNameTable() {
284   auto &OS = *OutputStream;
285   std::set<StringRef> V;
286   stablizeNameTable(V);
287 
288   // Write out the name table.
289   encodeULEB128(NameTable.size(), OS);
290   for (auto N : V) {
291     OS << N;
292     encodeULEB128(0, OS);
293   }
294   return sampleprof_error::success;
295 }
296 
297 std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
298   auto &OS = *OutputStream;
299 
300   // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
301   auto &OFS = static_cast<raw_fd_ostream &>(OS);
302   uint64_t FuncOffsetTableStart = OS.tell();
303   if (OFS.seek(TableOffset) == (uint64_t)-1)
304     return sampleprof_error::ostream_seek_unsupported;
305   support::endian::Writer Writer(*OutputStream, support::little);
306   Writer.write(FuncOffsetTableStart);
307   if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
308     return sampleprof_error::ostream_seek_unsupported;
309 
310   // Write out the table size.
311   encodeULEB128(FuncOffsetTable.size(), OS);
312 
313   // Write out FuncOffsetTable.
314   for (auto entry : FuncOffsetTable) {
315     writeNameIdx(entry.first);
316     encodeULEB128(entry.second, OS);
317   }
318   return sampleprof_error::success;
319 }
320 
321 std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
322   auto &OS = *OutputStream;
323   std::set<StringRef> V;
324   stablizeNameTable(V);
325 
326   // Write out the name table.
327   encodeULEB128(NameTable.size(), OS);
328   for (auto N : V) {
329     encodeULEB128(MD5Hash(N), OS);
330   }
331   return sampleprof_error::success;
332 }
333 
334 std::error_code
335 SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
336   auto &OS = *OutputStream;
337   // Write file magic identifier.
338   encodeULEB128(SPMagic(Format), OS);
339   encodeULEB128(SPVersion(), OS);
340   return sampleprof_error::success;
341 }
342 
343 std::error_code SampleProfileWriterBinary::writeHeader(
344     const StringMap<FunctionSamples> &ProfileMap) {
345   writeMagicIdent(Format);
346 
347   computeSummary(ProfileMap);
348   if (auto EC = writeSummary())
349     return EC;
350 
351   // Generate the name table for all the functions referenced in the profile.
352   for (const auto &I : ProfileMap) {
353     addName(I.first());
354     addNames(I.second);
355   }
356 
357   writeNameTable();
358   return sampleprof_error::success;
359 }
360 
361 void SampleProfileWriterExtBinaryBase::setToCompressAllSections() {
362   for (auto &Entry : SectionLayout)
363     addSecFlags(Entry, SecFlagCompress);
364 }
365 
366 void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) {
367   addSectionFlags(Type, SecFlagCompress);
368 }
369 
370 void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type,
371                                                        SecFlags Flags) {
372   for (auto &Entry : SectionLayout) {
373     if (Entry.Type == Type)
374       addSecFlags(Entry, Flags);
375   }
376 }
377 
378 void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
379   support::endian::Writer Writer(*OutputStream, support::little);
380 
381   Writer.write(static_cast<uint64_t>(SectionLayout.size()));
382   SecHdrTableOffset = OutputStream->tell();
383   for (uint32_t i = 0; i < SectionLayout.size(); i++) {
384     Writer.write(static_cast<uint64_t>(-1));
385     Writer.write(static_cast<uint64_t>(-1));
386     Writer.write(static_cast<uint64_t>(-1));
387     Writer.write(static_cast<uint64_t>(-1));
388   }
389 }
390 
391 std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
392   auto &OFS = static_cast<raw_fd_ostream &>(*OutputStream);
393   uint64_t Saved = OutputStream->tell();
394 
395   // Set OutputStream to the location saved in SecHdrTableOffset.
396   if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1)
397     return sampleprof_error::ostream_seek_unsupported;
398   support::endian::Writer Writer(*OutputStream, support::little);
399 
400   DenseMap<uint32_t, uint32_t> IndexMap;
401   for (uint32_t i = 0; i < SecHdrTable.size(); i++) {
402     IndexMap.insert({static_cast<uint32_t>(SecHdrTable[i].Type), i});
403   }
404 
405   // Write the sections in the order specified in SectionLayout.
406   // That is the sections order Reader will see. Note that the
407   // sections order in which Reader expects to read may be different
408   // from the order in which Writer is able to write, so we need
409   // to adjust the order in SecHdrTable to be consistent with
410   // SectionLayout when we write SecHdrTable to the memory.
411   for (uint32_t i = 0; i < SectionLayout.size(); i++) {
412     uint32_t idx = IndexMap[static_cast<uint32_t>(SectionLayout[i].Type)];
413     Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Type));
414     Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Flags));
415     Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Offset));
416     Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Size));
417   }
418 
419   // Reset OutputStream.
420   if (OFS.seek(Saved) == (uint64_t)-1)
421     return sampleprof_error::ostream_seek_unsupported;
422 
423   return sampleprof_error::success;
424 }
425 
426 std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
427     const StringMap<FunctionSamples> &ProfileMap) {
428   auto &OS = *OutputStream;
429   FileStart = OS.tell();
430   writeMagicIdent(Format);
431 
432   allocSecHdrTable();
433   return sampleprof_error::success;
434 }
435 
436 std::error_code SampleProfileWriterCompactBinary::writeHeader(
437     const StringMap<FunctionSamples> &ProfileMap) {
438   support::endian::Writer Writer(*OutputStream, support::little);
439   if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
440     return EC;
441 
442   // Reserve a slot for the offset of function offset table. The slot will
443   // be populated with the offset of FuncOffsetTable later.
444   TableOffset = OutputStream->tell();
445   Writer.write(static_cast<uint64_t>(-2));
446   return sampleprof_error::success;
447 }
448 
449 std::error_code SampleProfileWriterBinary::writeSummary() {
450   auto &OS = *OutputStream;
451   encodeULEB128(Summary->getTotalCount(), OS);
452   encodeULEB128(Summary->getMaxCount(), OS);
453   encodeULEB128(Summary->getMaxFunctionCount(), OS);
454   encodeULEB128(Summary->getNumCounts(), OS);
455   encodeULEB128(Summary->getNumFunctions(), OS);
456   std::vector<ProfileSummaryEntry> &Entries = Summary->getDetailedSummary();
457   encodeULEB128(Entries.size(), OS);
458   for (auto Entry : Entries) {
459     encodeULEB128(Entry.Cutoff, OS);
460     encodeULEB128(Entry.MinCount, OS);
461     encodeULEB128(Entry.NumCounts, OS);
462   }
463   return sampleprof_error::success;
464 }
465 std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
466   auto &OS = *OutputStream;
467 
468   if (std::error_code EC = writeNameIdx(S.getName()))
469     return EC;
470 
471   encodeULEB128(S.getTotalSamples(), OS);
472 
473   // Emit all the body samples.
474   encodeULEB128(S.getBodySamples().size(), OS);
475   for (const auto &I : S.getBodySamples()) {
476     LineLocation Loc = I.first;
477     const SampleRecord &Sample = I.second;
478     encodeULEB128(Loc.LineOffset, OS);
479     encodeULEB128(Loc.Discriminator, OS);
480     encodeULEB128(Sample.getSamples(), OS);
481     encodeULEB128(Sample.getCallTargets().size(), OS);
482     for (const auto &J : Sample.getSortedCallTargets()) {
483       StringRef Callee = J.first;
484       uint64_t CalleeSamples = J.second;
485       if (std::error_code EC = writeNameIdx(Callee))
486         return EC;
487       encodeULEB128(CalleeSamples, OS);
488     }
489   }
490 
491   // Recursively emit all the callsite samples.
492   uint64_t NumCallsites = 0;
493   for (const auto &J : S.getCallsiteSamples())
494     NumCallsites += J.second.size();
495   encodeULEB128(NumCallsites, OS);
496   for (const auto &J : S.getCallsiteSamples())
497     for (const auto &FS : J.second) {
498       LineLocation Loc = J.first;
499       const FunctionSamples &CalleeSamples = FS.second;
500       encodeULEB128(Loc.LineOffset, OS);
501       encodeULEB128(Loc.Discriminator, OS);
502       if (std::error_code EC = writeBody(CalleeSamples))
503         return EC;
504     }
505 
506   return sampleprof_error::success;
507 }
508 
509 /// Write samples of a top-level function to a binary file.
510 ///
511 /// \returns true if the samples were written successfully, false otherwise.
512 std::error_code
513 SampleProfileWriterBinary::writeSample(const FunctionSamples &S) {
514   encodeULEB128(S.getHeadSamples(), *OutputStream);
515   return writeBody(S);
516 }
517 
518 std::error_code
519 SampleProfileWriterCompactBinary::writeSample(const FunctionSamples &S) {
520   uint64_t Offset = OutputStream->tell();
521   StringRef Name = S.getName();
522   FuncOffsetTable[Name] = Offset;
523   encodeULEB128(S.getHeadSamples(), *OutputStream);
524   return writeBody(S);
525 }
526 
527 /// Create a sample profile file writer based on the specified format.
528 ///
529 /// \param Filename The file to create.
530 ///
531 /// \param Format Encoding format for the profile file.
532 ///
533 /// \returns an error code indicating the status of the created writer.
534 ErrorOr<std::unique_ptr<SampleProfileWriter>>
535 SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
536   std::error_code EC;
537   std::unique_ptr<raw_ostream> OS;
538   if (Format == SPF_Binary || Format == SPF_Ext_Binary ||
539       Format == SPF_Compact_Binary)
540     OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_None));
541   else
542     OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_Text));
543   if (EC)
544     return EC;
545 
546   return create(OS, Format);
547 }
548 
549 /// Create a sample profile stream writer based on the specified format.
550 ///
551 /// \param OS The output stream to store the profile data to.
552 ///
553 /// \param Format Encoding format for the profile file.
554 ///
555 /// \returns an error code indicating the status of the created writer.
556 ErrorOr<std::unique_ptr<SampleProfileWriter>>
557 SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
558                             SampleProfileFormat Format) {
559   std::error_code EC;
560   std::unique_ptr<SampleProfileWriter> Writer;
561 
562   if (Format == SPF_Binary)
563     Writer.reset(new SampleProfileWriterRawBinary(OS));
564   else if (Format == SPF_Ext_Binary)
565     Writer.reset(new SampleProfileWriterExtBinary(OS));
566   else if (Format == SPF_Compact_Binary)
567     Writer.reset(new SampleProfileWriterCompactBinary(OS));
568   else if (Format == SPF_Text)
569     Writer.reset(new SampleProfileWriterText(OS));
570   else if (Format == SPF_GCC)
571     EC = sampleprof_error::unsupported_writing_format;
572   else
573     EC = sampleprof_error::unrecognized_format;
574 
575   if (EC)
576     return EC;
577 
578   Writer->Format = Format;
579   return std::move(Writer);
580 }
581 
582 void SampleProfileWriter::computeSummary(
583     const StringMap<FunctionSamples> &ProfileMap) {
584   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
585   for (const auto &I : ProfileMap) {
586     const FunctionSamples &Profile = I.second;
587     Builder.addRecord(Profile);
588   }
589   Summary = Builder.getSummary();
590 }
591