//===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This family of functions reads profile data written by the perf2bolt
// utility and stores it in memory for llvm-bolt consumption.
//
//===----------------------------------------------------------------------===//

#include "bolt/Profile/DataReader.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/MCF.h"
#include "bolt/Utils/Utils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include <map>

#undef  DEBUG_TYPE
#define DEBUG_TYPE "bolt-prof"

using namespace llvm;

namespace opts {

extern cl::OptionCategory BoltCategory;
extern llvm::cl::opt<unsigned> Verbosity;

static cl::opt<bool>
DumpData("dump-data",
  cl::desc("dump parsed bolt data for debugging"),
  cl::Hidden,
  cl::cat(BoltCategory));

} // namespace opts

namespace llvm {
namespace bolt {

Optional<StringRef> getLTOCommonName(const StringRef Name) {
  size_t LTOSuffixPos = Name.find(".lto_priv.");
  if (LTOSuffixPos != StringRef::npos)
    return Name.substr(0, LTOSuffixPos + 10);
  if ((LTOSuffixPos = Name.find(".constprop.")) != StringRef::npos)
    return Name.substr(0, LTOSuffixPos + 11);
  if ((LTOSuffixPos = Name.find(".llvm.")) != StringRef::npos)
    return Name.substr(0, LTOSuffixPos + 6);
  return NoneType();
}

namespace {

/// Return true if the function name can change across compilations.
bool hasVolatileName(const BinaryFunction &BF) {
  for (const StringRef &Name : BF.getNames())
    if (getLTOCommonName(Name))
      return true;

  return false;
}

/// Return standard escaped name of the function possibly renamed by BOLT.
std::string normalizeName(StringRef NameRef) {
  // Strip "PG." prefix used for globalized locals.
  NameRef = NameRef.startswith("PG.") ? NameRef.substr(2) : NameRef;
  return getEscapedName(NameRef);
}

} // anonymous namespace

raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) {
  if (Loc.IsSymbol) {
    OS << Loc.Name;
    if (Loc.Offset)
      OS << "+" << Twine::utohexstr(Loc.Offset);
  } else {
    OS << Twine::utohexstr(Loc.Offset);
  }
  return OS;
}

void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
  Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
  for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
    if (I->From.Name == FBD.Name) {
      I->From.Name = this->Name;
      I->From.Offset += Offset;
    }
    if (I->To.Name == FBD.Name) {
      I->To.Name = this->Name;
      I->To.Offset += Offset;
    }
  }
  llvm::stable_sort(Data);
  ExecutionCount += FBD.ExecutionCount;
  for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
    assert(I->To.Name == FBD.Name);
    auto NewElmt = EntryData.insert(EntryData.end(), *I);
    NewElmt->To.Name = this->Name;
    NewElmt->To.Offset += Offset;
  }
}

uint64_t FuncBranchData::getNumExecutedBranches() const {
  uint64_t ExecutedBranches = 0;
  for (const BranchInfo &BI : Data) {
    int64_t BranchCount = BI.Branches;
    assert(BranchCount >= 0 && "branch execution count should not be negative");
    ExecutedBranches += BranchCount;
  }
  return ExecutedBranches;
}

void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; }

void SampleInfo::print(raw_ostream &OS) const {
  OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset)
     << " " << Hits << "\n";
}

uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
  assert(llvm::is_sorted(Data));
  struct Compare {
    bool operator()(const SampleInfo &SI, const uint64_t Val) const {
      return SI.Loc.Offset < Val;
    }
    bool operator()(const uint64_t Val, const SampleInfo &SI) const {
      return Val < SI.Loc.Offset;
    }
  };
  uint64_t Result = 0;
  for (auto I = llvm::lower_bound(Data, Start, Compare()),
            E = llvm::lower_bound(Data, End, Compare());
       I != E; ++I)
    Result += I->Hits;
  return Result;
}

void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
  auto Iter = Index.find(Offset);
  if (Iter == Index.end()) {
    Data.emplace_back(Location(true, Name, Offset), Count);
    Index[Offset] = Data.size() - 1;
    return;
  }
  SampleInfo &SI = Data[Iter->second];
  SI.Hits += Count;
}

void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo,
                                     uint64_t Count, uint64_t Mispreds) {
  auto Iter = IntraIndex[OffsetFrom].find(OffsetTo);
  if (Iter == IntraIndex[OffsetFrom].end()) {
    Data.emplace_back(Location(true, Name, OffsetFrom),
                      Location(true, Name, OffsetTo), Mispreds, Count);
    IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1;
    return;
  }
  BranchInfo &BI = Data[Iter->second];
  BI.Branches += Count;
  BI.Mispreds += Mispreds;
}

void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To,
                                   uint64_t Count, uint64_t Mispreds) {
  auto Iter = InterIndex[OffsetFrom].find(To);
  if (Iter == InterIndex[OffsetFrom].end()) {
    Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count);
    InterIndex[OffsetFrom][To] = Data.size() - 1;
    return;
  }
  BranchInfo &BI = Data[Iter->second];
  BI.Branches += Count;
  BI.Mispreds += Mispreds;
}

void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo,
                                    uint64_t Count, uint64_t Mispreds) {
  auto Iter = EntryIndex[OffsetTo].find(From);
  if (Iter == EntryIndex[OffsetTo].end()) {
    EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds,
                           Count);
    EntryIndex[OffsetTo][From] = EntryData.size() - 1;
    return;
  }
  BranchInfo &BI = EntryData[Iter->second];
  BI.Branches += Count;
  BI.Mispreds += Mispreds;
}

void BranchInfo::mergeWith(const BranchInfo &BI) {
  Branches += BI.Branches;
  Mispreds += BI.Mispreds;
}

void BranchInfo::print(raw_ostream &OS) const {
  OS << From.IsSymbol << " " << From.Name << " "
     << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name
     << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches
     << '\n';
}

ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
                                                      uint64_t To) const {
  for (const BranchInfo &I : Data)
    if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name)
      return I;

  return make_error_code(llvm::errc::invalid_argument);
}

ErrorOr<const BranchInfo &>
FuncBranchData::getDirectCallBranch(uint64_t From) const {
  // Commented out because it can be expensive.
  // assert(std::is_sorted(Data.begin(), Data.end()));
  struct Compare {
    bool operator()(const BranchInfo &BI, const uint64_t Val) const {
      return BI.From.Offset < Val;
    }
    bool operator()(const uint64_t Val, const BranchInfo &BI) const {
      return Val < BI.From.Offset;
    }
  };
  auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare());
  for (auto I = Range.first; I != Range.second; ++I)
    if (I->From.Name != I->To.Name)
      return *I;

  return make_error_code(llvm::errc::invalid_argument);
}

void MemInfo::print(raw_ostream &OS) const {
  OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " "
     << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " "
     << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count
     << "\n";
}

void MemInfo::prettyPrint(raw_ostream &OS) const {
  OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")";
}

void FuncMemData::update(const Location &Offset, const Location &Addr) {
  auto Iter = EventIndex[Offset.Offset].find(Addr);
  if (Iter == EventIndex[Offset.Offset].end()) {
    Data.emplace_back(MemInfo(Offset, Addr, 1));
    EventIndex[Offset.Offset][Addr] = Data.size() - 1;
    return;
  }
  ++Data[Iter->second].Count;
}

Error DataReader::preprocessProfile(BinaryContext &BC) {
  if (std::error_code EC = parseInput())
    return errorCodeToError(EC);

  if (opts::DumpData)
    dump();

  if (collectedInBoltedBinary())
    outs() << "BOLT-INFO: profile collection done on a binary already "
              "processed by BOLT\n";

  for (auto &BFI : BC.getBinaryFunctions()) {
    BinaryFunction &Function = BFI.second;
    if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) {
      setMemData(Function, MemData);
      MemData->Used = true;
    }
    if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
      setBranchData(Function, FuncData);
      Function.ExecutionCount = FuncData->ExecutionCount;
      FuncData->Used = true;
    }
  }

  for (auto &BFI : BC.getBinaryFunctions()) {
    BinaryFunction &Function = BFI.second;
    matchProfileMemData(Function);
  }

  return Error::success();
}

Error DataReader::readProfilePreCFG(BinaryContext &BC) {
  for (auto &BFI : BC.getBinaryFunctions()) {
    BinaryFunction &Function = BFI.second;
    FuncMemData *MemoryData = getMemData(Function);
    if (!MemoryData)
      continue;

    for (MemInfo &MI : MemoryData->Data) {
      const uint64_t Offset = MI.Offset.Offset;
      auto II = Function.Instructions.find(Offset);
      if (II == Function.Instructions.end()) {
        // Ignore bad instruction address.
        continue;
      }

      auto &MemAccessProfile =
          BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>(
              II->second, "MemoryAccessProfile");
      BinaryData *BD = nullptr;
      if (MI.Addr.IsSymbol)
        BD = BC.getBinaryDataByName(MI.Addr.Name);
      MemAccessProfile.AddressAccessInfo.push_back(
          {BD, MI.Addr.Offset, MI.Count});
      auto NextII = std::next(II);
      if (NextII == Function.Instructions.end())
        MemAccessProfile.NextInstrOffset = Function.getSize();
      else
        MemAccessProfile.NextInstrOffset = II->first;
    }
    Function.HasMemoryProfile = true;
  }

  return Error::success();
}

Error DataReader::readProfile(BinaryContext &BC) {
  for (auto &BFI : BC.getBinaryFunctions()) {
    BinaryFunction &Function = BFI.second;
    readProfile(Function);
  }

  uint64_t NumUnused = 0;
  for (const StringMapEntry<FuncBranchData> &FuncData : NamesToBranches)
    if (!FuncData.getValue().Used)
      ++NumUnused;
  BC.setNumUnusedProfiledObjects(NumUnused);

  return Error::success();
}

std::error_code DataReader::parseInput() {
  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
      MemoryBuffer::getFileOrSTDIN(Filename);
  if (std::error_code EC = MB.getError()) {
    Diag << "cannot open " << Filename << ": " << EC.message() << "\n";
    return EC;
  }
  FileBuf = std::move(MB.get());
  ParsingBuf = FileBuf->getBuffer();
  if (std::error_code EC = parse())
    return EC;
  if (!ParsingBuf.empty())
    Diag << "WARNING: invalid profile data detected at line " << Line
         << ". Possibly corrupted profile.\n";

  buildLTONameMaps();

  return std::error_code();
}

void DataReader::readProfile(BinaryFunction &BF) {
  if (BF.empty())
    return;

  if (!hasLBR()) {
    BF.ProfileFlags = BinaryFunction::PF_SAMPLE;
    readSampleData(BF);
    return;
  }

  BF.ProfileFlags = BinaryFunction::PF_LBR;

  // Possibly assign/re-assign branch profile data.
  matchProfileData(BF);

  FuncBranchData *FBD = getBranchData(BF);
  if (!FBD)
    return;

  // Assign basic block counts to function entry points. These only include
  // counts for outside entries.
  //
  // There is a slight skew introduced here as branches originated from RETs
  // may be accounted for in the execution count of an entry block if the last
  // instruction in a predecessor fall-through block is a call. This situation
  // should rarely happen because there are few multiple-entry functions.
  for (const BranchInfo &BI : FBD->EntryData) {
    BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset);
    if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
      uint64_t Count = BB->getExecutionCount();
      if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
        Count = 0;
      BB->setExecutionCount(Count + BI.Branches);
    }
  }

  for (const BranchInfo &BI : FBD->Data) {
    if (BI.From.Name != BI.To.Name)
      continue;

    if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches,
                      BI.Mispreds)) {
      LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
                        << BI.To.Offset << '\n');
    }
  }

  // Convert branch data into annotations.
  convertBranchData(BF);
}

void DataReader::matchProfileData(BinaryFunction &BF) {
  // This functionality is available for LBR-mode only
  // TODO: Implement evaluateProfileData() for samples, checking whether
  // sample addresses match instruction addresses in the function
  if (!hasLBR())
    return;

  FuncBranchData *FBD = getBranchData(BF);
  if (FBD) {
    BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
    BF.RawBranchCount = FBD->getNumExecutedBranches();
    if (BF.ProfileMatchRatio == 1.0f) {
      if (fetchProfileForOtherEntryPoints(BF)) {
        BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
        BF.ExecutionCount = FBD->ExecutionCount;
        BF.RawBranchCount = FBD->getNumExecutedBranches();
      }
      return;
    }
  }

  // Check if the function name can fluctuate between several compilations
  // possibly triggered by minor unrelated code changes in the source code
  // of the input binary.
  if (!hasVolatileName(BF))
    return;

  // Check for a profile that matches with 100% confidence.
  const std::vector<FuncBranchData *> AllBranchData =
      getBranchDataForNamesRegex(BF.getNames());
  for (FuncBranchData *NewBranchData : AllBranchData) {
    // Prevent functions from sharing the same profile.
    if (NewBranchData->Used)
      continue;

    if (evaluateProfileData(BF, *NewBranchData) != 1.0f)
      continue;

    if (FBD)
      FBD->Used = false;

    // Update function profile data with the new set.
    setBranchData(BF, NewBranchData);
    NewBranchData->Used = true;
    BF.ExecutionCount = NewBranchData->ExecutionCount;
    BF.ProfileMatchRatio = 1.0f;
    break;
  }
}

void DataReader::matchProfileMemData(BinaryFunction &BF) {
  const std::vector<FuncMemData *> AllMemData =
      getMemDataForNamesRegex(BF.getNames());
  for (FuncMemData *NewMemData : AllMemData) {
    // Prevent functions from sharing the same profile.
    if (NewMemData->Used)
      continue;

    if (FuncMemData *MD = getMemData(BF))
      MD->Used = false;

    // Update function profile data with the new set.
    setMemData(BF, NewMemData);
    NewMemData->Used = true;
    break;
  }
}

bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) {
  BinaryContext &BC = BF.getBinaryContext();

  FuncBranchData *FBD = getBranchData(BF);
  if (!FBD)
    return false;

  // Check if we are missing profiling data for secondary entry points
  bool First = true;
  bool Updated = false;
  for (BinaryBasicBlock *BB : BF.BasicBlocks) {
    if (First) {
      First = false;
      continue;
    }
    if (BB->isEntryPoint()) {
      uint64_t EntryAddress = BB->getOffset() + BF.getAddress();
      // Look for branch data associated with this entry point
      if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) {
        if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) {
          FBD->appendFrom(*Data, BB->getOffset());
          Data->Used = true;
          Updated = true;
        }
      }
    }
  }

  return Updated;
}

float DataReader::evaluateProfileData(BinaryFunction &BF,
                                      const FuncBranchData &BranchData) const {
  BinaryContext &BC = BF.getBinaryContext();

  // Until we define a minimal profile, we consider an empty branch data to be
  // a valid profile. It could happen to a function without branches when we
  // still have an EntryData for the execution count.
  if (BranchData.Data.empty())
    return 1.0f;

  uint64_t NumMatchedBranches = 0;
  for (const BranchInfo &BI : BranchData.Data) {
    bool IsValid = false;
    if (BI.From.Name == BI.To.Name) {
      // Try to record information with 0 count.
      IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0);
    } else if (collectedInBoltedBinary()) {
      // We can't check branch source for collections in bolted binaries because
      // the source of the branch may be mapped to the first instruction in a BB
      // instead of the original branch (which may not exist in the source bin).
      IsValid = true;
    } else {
      // The branch has to originate from this function.
      // Check for calls, tail calls, rets and indirect branches.
      // When matching profiling info, we did not reach the stage
      // when we identify tail calls, so they are still represented
      // by regular branch instructions and we need isBranch() here.
      MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
      // If it's a prefix - skip it.
      if (Instr && BC.MIB->isPrefix(*Instr))
        Instr = BF.getInstructionAtOffset(BI.From.Offset + 1);
      if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) ||
                    BC.MIB->isReturn(*Instr)))
        IsValid = true;
    }

    if (IsValid) {
      ++NumMatchedBranches;
      continue;
    }

    LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x"
                      << Twine::utohexstr(BI.From.Offset) << " -> ";
               if (BI.From.Name == BI.To.Name) dbgs()
               << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
               else dbgs() << "<outbounds>\n";);
  }

  const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size();
  if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size())
    errs() << "BOLT-WARNING: profile branches match only "
           << format("%.1f%%", MatchRatio * 100.0f) << " ("
           << NumMatchedBranches << '/' << BranchData.Data.size()
           << ") for function " << BF << '\n';

  return MatchRatio;
}

void DataReader::readSampleData(BinaryFunction &BF) {
  FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
  if (!SampleDataOrErr)
    return;

  // Basic samples mode territory (without LBR info)
  // First step is to assign BB execution count based on samples from perf
  BF.ProfileMatchRatio = 1.0f;
  BF.removeTagsFromProfile();
  bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
  bool NormalizeByCalls = usesEvent("branches");
  static bool NagUser = true;
  if (NagUser) {
    outs()
        << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n";
    if (NormalizeByInsnCount)
      outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
    else if (NormalizeByCalls)
      outs() << "BOLT-INFO: normalizing samples by branches.\n";

    NagUser = false;
  }
  uint64_t LastOffset = BF.getSize();
  uint64_t TotalEntryCount = 0;
  for (auto I = BF.BasicBlockOffsets.rbegin(), E = BF.BasicBlockOffsets.rend();
       I != E; ++I) {
    uint64_t CurOffset = I->first;
    // Always work with samples multiplied by 1000 to avoid losing them if we
    // later need to normalize numbers
    uint64_t NumSamples =
        SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
    if (NormalizeByInsnCount && I->second->getNumNonPseudos()) {
      NumSamples /= I->second->getNumNonPseudos();
    } else if (NormalizeByCalls) {
      uint32_t NumCalls = I->second->getNumCalls();
      NumSamples /= NumCalls + 1;
    }
    I->second->setExecutionCount(NumSamples);
    if (I->second->isEntryPoint())
      TotalEntryCount += NumSamples;
    LastOffset = CurOffset;
  }

  BF.ExecutionCount = TotalEntryCount;

  estimateEdgeCounts(BF);
}

void DataReader::convertBranchData(BinaryFunction &BF) const {
  BinaryContext &BC = BF.getBinaryContext();

  if (BF.empty())
    return;

  FuncBranchData *FBD = getBranchData(BF);
  if (!FBD)
    return;

  // Profile information for calls.
  //
  // There are 3 cases that we annotate differently:
  //   1) Conditional tail calls that could be mispredicted.
  //   2) Indirect calls to multiple destinations with mispredictions.
  //      Before we validate CFG we have to handle indirect branches here too.
  //   3) Regular direct calls. The count could be different from containing
  //      basic block count. Keep this data in case we find it useful.
  //
  for (BranchInfo &BI : FBD->Data) {
    // Ignore internal branches.
    if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0)
      continue;

    MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset);
    if (!Instr ||
        (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)))
      continue;

    auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) {
      if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name))
        errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x"
               << Twine::utohexstr(BI.From.Offset) << " in function " << BF
               << '\n';
      auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name);
      Value += Count;
    };

    if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) {
      IndirectCallSiteProfile &CSP =
          BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
              *Instr, "CallProfile");
      MCSymbol *CalleeSymbol = nullptr;
      if (BI.To.IsSymbol) {
        if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name))
          CalleeSymbol = BD->getSymbol();
      }
      CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds);
    } else if (BC.MIB->getConditionalTailCall(*Instr)) {
      setOrUpdateAnnotation("CTCTakenCount", BI.Branches);
      setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds);
    } else {
      setOrUpdateAnnotation("Count", BI.Branches);
    }
  }
}

bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To,
                              uint64_t Count, uint64_t Mispreds) const {
  BinaryContext &BC = BF.getBinaryContext();

  BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From);
  BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To);

  if (!FromBB || !ToBB) {
    LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n");
    return false;
  }

  // Could be bad LBR data; ignore the branch. In the case of data collected
  // in binaries optimized by BOLT, a source BB may be mapped to two output
  // BBs as a result of optimizations. In that case, a branch between these
  // two will be recorded as a branch from A going to A in the source address
  // space. Keep processing.
  if (From == To)
    return true;

  // Return from a tail call.
  if (FromBB->succ_size() == 0)
    return true;

  // Very rarely we will see ignored branches. Do a linear check.
  for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches)
    if (Branch ==
        std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To)))
      return true;

  bool OffsetMatches = !!(To == ToBB->getOffset());
  if (!OffsetMatches) {
    // Skip the nops to support old .fdata
    uint64_t Offset = ToBB->getOffset();
    for (MCInst &Instr : *ToBB) {
      if (!BC.MIB->isNoop(Instr))
        break;

      Offset += BC.MIB->getAnnotationWithDefault<uint32_t>(Instr, "Size");
    }

    if (To == Offset)
      OffsetMatches = true;
  }

  if (!OffsetMatches) {
    // "To" could be referring to nop instructions in between 2 basic blocks.
    // While building the CFG we make sure these nops are attributed to the
    // previous basic block, thus we check if the destination belongs to the
    // gap past the last instruction.
    const MCInst *LastInstr = ToBB->getLastNonPseudoInstr();
    if (LastInstr) {
      const uint32_t LastInstrOffset =
          BC.MIB->getOffsetWithDefault(*LastInstr, 0);

      // With old .fdata we are getting FT branches for "jcc,jmp" sequences.
      if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr))
        return true;

      if (To <= LastInstrOffset) {
        LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block"
                          << " in " << BF << " : " << From << " -> " << To
                          << '\n');
        return false;
      }
    }

    // The real destination is the layout successor of the detected ToBB.
    if (ToBB == BF.BasicBlocksLayout.back())
      return false;
    BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1];
    assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
    ToBB = NextBB;
  }

  // If there's no corresponding instruction for 'From', we have probably
  // discarded it as a FT from __builtin_unreachable.
  MCInst *FromInstruction = BF.getInstructionAtOffset(From);
  if (!FromInstruction) {
    // If the data was collected in a bolted binary, the From addresses may be
    // translated to the first instruction of the source BB if BOLT inserted
    // a new branch that did not exist in the source (we can't map it to the
    // source instruction, so we map it to the first instr of source BB).
    // We do not keep offsets for random instructions. So the check above will
    // evaluate to true if the first instr is not a branch (call/jmp/ret/etc)
    if (collectedInBoltedBinary()) {
      if (FromBB->getInputOffset() != From) {
        LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in "
                          << BF << '\n');
        return false;
      }
      FromInstruction = nullptr;
    } else {
      LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF
                        << '\n');
      return false;
    }
  }

  if (!FromBB->getSuccessor(ToBB->getLabel())) {
    // Check if this is a recursive call or a return from a recursive call.
    if (FromInstruction && ToBB->isEntryPoint() &&
        (BC.MIB->isCall(*FromInstruction) ||
         BC.MIB->isIndirectBranch(*FromInstruction))) {
      // Execution count is already accounted for.
      return true;
    }
    // For data collected in a bolted binary, we may have created two output BBs
    // that map to one original block. Branches between these two blocks will
    // appear here as one BB jumping to itself, even though it has no loop
    // edges. Ignore these.
    if (collectedInBoltedBinary() && FromBB == ToBB)
      return true;

    BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false);
    if (FTSuccessor && FTSuccessor->succ_size() == 1 &&
        FTSuccessor->getSuccessor(ToBB->getLabel())) {
      BinaryBasicBlock::BinaryBranchInfo &FTBI =
          FTSuccessor->getBranchInfo(*ToBB);
      FTBI.Count += Count;
      if (Count)
        FTBI.MispredictedCount += Mispreds;
      ToBB = FTSuccessor;
    } else {
      LLVM_DEBUG(dbgs() << "invalid branch in " << BF << '\n'
                        << Twine::utohexstr(From) << " -> "
                        << Twine::utohexstr(To) << '\n');
      return false;
    }
  }

  BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB);
  BI.Count += Count;
  // Only update mispredicted count if it the count was real.
  if (Count) {
    BI.MispredictedCount += Mispreds;
  }

  return true;
}

void DataReader::reportError(StringRef ErrorMsg) {
  Diag << "Error reading BOLT data input file: line " << Line << ", column "
       << Col << ": " << ErrorMsg << '\n';
}

bool DataReader::expectAndConsumeFS() {
  if (ParsingBuf[0] != FieldSeparator) {
    reportError("expected field separator");
    return false;
  }
  ParsingBuf = ParsingBuf.drop_front(1);
  Col += 1;
  return true;
}

void DataReader::consumeAllRemainingFS() {
  while (ParsingBuf[0] == FieldSeparator) {
    ParsingBuf = ParsingBuf.drop_front(1);
    Col += 1;
  }
}

bool DataReader::checkAndConsumeNewLine() {
  if (ParsingBuf[0] != '\n')
    return false;

  ParsingBuf = ParsingBuf.drop_front(1);
  Col = 0;
  Line += 1;
  return true;
}

ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) {
  if (EndChar == '\\') {
    reportError("EndChar could not be backslash");
    return make_error_code(llvm::errc::io_error);
  }

  std::string EndChars(1, EndChar);
  EndChars.push_back('\\');
  if (EndNl)
    EndChars.push_back('\n');

  size_t StringEnd = 0;
  do {
    StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd);
    if (StringEnd == StringRef::npos ||
        (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) {
      reportError("malformed field");
      return make_error_code(llvm::errc::io_error);
    }

    if (ParsingBuf[StringEnd] != '\\')
      break;

    StringEnd += 2;
  } while (1);

  StringRef Str = ParsingBuf.substr(0, StringEnd);

  // If EndNl was set and nl was found instead of EndChar, do not consume the
  // new line.
  bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n';
  unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1;

  ParsingBuf = ParsingBuf.drop_front(End);
  if (EndChar == '\n') {
    Col = 0;
    Line += 1;
  } else {
    Col += End;
  }
  return Str;
}

ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) {
  ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
  if (std::error_code EC = NumStrRes.getError())
    return EC;
  StringRef NumStr = NumStrRes.get();
  int64_t Num;
  if (NumStr.getAsInteger(10, Num)) {
    reportError("expected decimal number");
    Diag << "Found: " << NumStr << "\n";
    return make_error_code(llvm::errc::io_error);
  }
  return Num;
}

ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) {
  ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl);
  if (std::error_code EC = NumStrRes.getError())
    return EC;
  StringRef NumStr = NumStrRes.get();
  uint64_t Num;
  if (NumStr.getAsInteger(16, Num)) {
    reportError("expected hexidecimal number");
    Diag << "Found: " << NumStr << "\n";
    return make_error_code(llvm::errc::io_error);
  }
  return Num;
}

ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl,
                                            bool ExpectMemLoc) {
  // Read whether the location of the branch should be DSO or a symbol
  // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local
  // symbol.
  // The symbol flag is also used to tag memory load events by adding 3 to the
  // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol.
  if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' &&
      ParsingBuf[0] != '2') {
    reportError("expected 0, 1 or 2");
    return make_error_code(llvm::errc::io_error);
  }

  if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' &&
      ParsingBuf[0] != '5') {
    reportError("expected 3, 4 or 5");
    return make_error_code(llvm::errc::io_error);
  }

  bool IsSymbol =
      (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) ||
      (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5'));
  ParsingBuf = ParsingBuf.drop_front(1);
  Col += 1;

  if (!expectAndConsumeFS())
    return make_error_code(llvm::errc::io_error);
  consumeAllRemainingFS();

  // Read the string containing the symbol or the DSO name
  ErrorOr<StringRef> NameRes = parseString(FieldSeparator);
  if (std::error_code EC = NameRes.getError())
    return EC;
  StringRef Name = NameRes.get();
  consumeAllRemainingFS();

  // Read the offset
  ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl);
  if (std::error_code EC = Offset.getError())
    return EC;

  return Location(IsSymbol, Name, Offset.get());
}

ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
  ErrorOr<Location> Res = parseLocation(FieldSeparator);
  if (std::error_code EC = Res.getError())
    return EC;
  Location From = Res.get();

  consumeAllRemainingFS();
  Res = parseLocation(FieldSeparator);
  if (std::error_code EC = Res.getError())
    return EC;
  Location To = Res.get();

  consumeAllRemainingFS();
  ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator);
  if (std::error_code EC = MRes.getError())
    return EC;
  int64_t NumMispreds = MRes.get();

  consumeAllRemainingFS();
  ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
  if (std::error_code EC = BRes.getError())
    return EC;
  int64_t NumBranches = BRes.get();

  consumeAllRemainingFS();
  if (!checkAndConsumeNewLine()) {
    reportError("expected end of line");
    return make_error_code(llvm::errc::io_error);
  }

  return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
}

ErrorOr<MemInfo> DataReader::parseMemInfo() {
  ErrorOr<Location> Res = parseMemLocation(FieldSeparator);
  if (std::error_code EC = Res.getError())
    return EC;
  Location Offset = Res.get();

  consumeAllRemainingFS();
  Res = parseMemLocation(FieldSeparator);
  if (std::error_code EC = Res.getError())
    return EC;
  Location Addr = Res.get();

  consumeAllRemainingFS();
  ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true);
  if (std::error_code EC = CountRes.getError())
    return EC;

  consumeAllRemainingFS();
  if (!checkAndConsumeNewLine()) {
    reportError("expected end of line");
    return make_error_code(llvm::errc::io_error);
  }

  return MemInfo(Offset, Addr, CountRes.get());
}

ErrorOr<SampleInfo> DataReader::parseSampleInfo() {
  ErrorOr<Location> Res = parseLocation(FieldSeparator);
  if (std::error_code EC = Res.getError())
    return EC;
  Location Address = Res.get();

  consumeAllRemainingFS();
  ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true);
  if (std::error_code EC = BRes.getError())
    return EC;
  int64_t Occurrences = BRes.get();

  consumeAllRemainingFS();
  if (!checkAndConsumeNewLine()) {
    reportError("expected end of line");
    return make_error_code(llvm::errc::io_error);
  }

  return SampleInfo(std::move(Address), Occurrences);
}

ErrorOr<bool> DataReader::maybeParseNoLBRFlag() {
  if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr")
    return false;
  ParsingBuf = ParsingBuf.drop_front(6);
  Col += 6;

  if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ')
    ParsingBuf = ParsingBuf.drop_front(1);

  while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') {
    ErrorOr<StringRef> EventName = parseString(' ', true);
    if (!EventName)
      return make_error_code(llvm::errc::io_error);
    EventNames.insert(EventName.get());
  }

  if (!checkAndConsumeNewLine()) {
    reportError("malformed no_lbr line");
    return make_error_code(llvm::errc::io_error);
  }
  return true;
}

ErrorOr<bool> DataReader::maybeParseBATFlag() {
  if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection")
    return false;
  ParsingBuf = ParsingBuf.drop_front(16);
  Col += 16;

  if (!checkAndConsumeNewLine()) {
    reportError("malformed boltedcollection line");
    return make_error_code(llvm::errc::io_error);
  }
  return true;
}

bool DataReader::hasBranchData() {
  if (ParsingBuf.size() == 0)
    return false;

  if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2')
    return true;
  return false;
}

bool DataReader::hasMemData() {
  if (ParsingBuf.size() == 0)
    return false;

  if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5')
    return true;
  return false;
}

std::error_code DataReader::parseInNoLBRMode() {
  auto GetOrCreateFuncEntry = [&](StringRef Name) {
    auto I = NamesToSamples.find(Name);
    if (I == NamesToSamples.end()) {
      bool Success;
      std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
          Name, FuncSampleData(Name, FuncSampleData::ContainerTy())));

      assert(Success && "unexpected result of insert");
    }
    return I;
  };

  auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
    auto I = NamesToMemEvents.find(Name);
    if (I == NamesToMemEvents.end()) {
      bool Success;
      std::tie(I, Success) = NamesToMemEvents.insert(
          std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
      assert(Success && "unexpected result of insert");
    }
    return I;
  };

  while (hasBranchData()) {
    ErrorOr<SampleInfo> Res = parseSampleInfo();
    if (std::error_code EC = Res.getError())
      return EC;

    SampleInfo SI = Res.get();

    // Ignore samples not involving known locations
    if (!SI.Loc.IsSymbol)
      continue;

    StringMapIterator<FuncSampleData> I = GetOrCreateFuncEntry(SI.Loc.Name);
    I->getValue().Data.emplace_back(std::move(SI));
  }

  while (hasMemData()) {
    ErrorOr<MemInfo> Res = parseMemInfo();
    if (std::error_code EC = Res.getError())
      return EC;

    MemInfo MI = Res.get();

    // Ignore memory events not involving known pc.
    if (!MI.Offset.IsSymbol)
      continue;

    StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
    I->getValue().Data.emplace_back(std::move(MI));
  }

  for (StringMapEntry<FuncSampleData> &FuncSamples : NamesToSamples)
    llvm::stable_sort(FuncSamples.second.Data);

  for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
    llvm::stable_sort(MemEvents.second.Data);

  return std::error_code();
}

std::error_code DataReader::parse() {
  auto GetOrCreateFuncEntry = [&](StringRef Name) {
    auto I = NamesToBranches.find(Name);
    if (I == NamesToBranches.end()) {
      bool Success;
      std::tie(I, Success) = NamesToBranches.insert(std::make_pair(
          Name, FuncBranchData(Name, FuncBranchData::ContainerTy(),
                               FuncBranchData::ContainerTy())));
      assert(Success && "unexpected result of insert");
    }
    return I;
  };

  auto GetOrCreateFuncMemEntry = [&](StringRef Name) {
    auto I = NamesToMemEvents.find(Name);
    if (I == NamesToMemEvents.end()) {
      bool Success;
      std::tie(I, Success) = NamesToMemEvents.insert(
          std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy())));
      assert(Success && "unexpected result of insert");
    }
    return I;
  };

  Col = 0;
  Line = 1;
  ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag();
  if (!FlagOrErr)
    return FlagOrErr.getError();
  NoLBRMode = *FlagOrErr;

  ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag();
  if (!BATFlagOrErr)
    return BATFlagOrErr.getError();
  BATMode = *BATFlagOrErr;

  if (!hasBranchData() && !hasMemData()) {
    Diag << "ERROR: no valid profile data found\n";
    return make_error_code(llvm::errc::io_error);
  }

  if (NoLBRMode)
    return parseInNoLBRMode();

  while (hasBranchData()) {
    ErrorOr<BranchInfo> Res = parseBranchInfo();
    if (std::error_code EC = Res.getError())
      return EC;

    BranchInfo BI = Res.get();

    // Ignore branches not involving known location.
    if (!BI.From.IsSymbol && !BI.To.IsSymbol)
      continue;

    StringMapIterator<FuncBranchData> I = GetOrCreateFuncEntry(BI.From.Name);
    I->getValue().Data.emplace_back(std::move(BI));

    // Add entry data for branches to another function or branches
    // to entry points (including recursive calls)
    if (BI.To.IsSymbol &&
        (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
      I = GetOrCreateFuncEntry(BI.To.Name);
      I->getValue().EntryData.emplace_back(std::move(BI));
    }

    // If destination is the function start - update execution count.
    // NB: the data is skewed since we cannot tell tail recursion from
    //     branches to the function start.
    if (BI.To.IsSymbol && BI.To.Offset == 0) {
      I = GetOrCreateFuncEntry(BI.To.Name);
      I->getValue().ExecutionCount += BI.Branches;
    }
  }

  while (hasMemData()) {
    ErrorOr<MemInfo> Res = parseMemInfo();
    if (std::error_code EC = Res.getError())
      return EC;

    MemInfo MI = Res.get();

    // Ignore memory events not involving known pc.
    if (!MI.Offset.IsSymbol)
      continue;

    StringMapIterator<FuncMemData> I = GetOrCreateFuncMemEntry(MI.Offset.Name);
    I->getValue().Data.emplace_back(std::move(MI));
  }

  for (StringMapEntry<FuncBranchData> &FuncBranches : NamesToBranches)
    llvm::stable_sort(FuncBranches.second.Data);

  for (StringMapEntry<FuncMemData> &MemEvents : NamesToMemEvents)
    llvm::stable_sort(MemEvents.second.Data);

  return std::error_code();
}

void DataReader::buildLTONameMaps() {
  for (StringMapEntry<FuncBranchData> &FuncData : NamesToBranches) {
    const StringRef FuncName = FuncData.getKey();
    const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
    if (CommonName)
      LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
  }

  for (StringMapEntry<FuncMemData> &FuncData : NamesToMemEvents) {
    const StringRef FuncName = FuncData.getKey();
    const Optional<StringRef> CommonName = getLTOCommonName(FuncName);
    if (CommonName)
      LTOCommonNameMemMap[*CommonName].push_back(&FuncData.getValue());
  }
}

namespace {
template <typename MapTy>
decltype(MapTy::MapEntryTy::second) *
fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) {
  // Do a reverse order iteration since the name in profile has a higher chance
  // of matching a name at the end of the list.
  for (auto SI = Symbols.rbegin(), SE = Symbols.rend(); SI != SE; ++SI) {
    auto I = Map.find(normalizeName((*SI)->getName()));
    if (I != Map.end())
      return &I->getValue();
  }
  return nullptr;
}

template <typename MapTy>
decltype(MapTy::MapEntryTy::second) *
fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) {
  // Do a reverse order iteration since the name in profile has a higher chance
  // of matching a name at the end of the list.
  for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
    auto I = Map.find(normalizeName(*FI));
    if (I != Map.end())
      return &I->getValue();
  }
  return nullptr;
}

template <typename MapTy>
std::vector<decltype(MapTy::MapEntryTy::second) *> fetchMapEntriesRegex(
    MapTy &Map,
    const StringMap<std::vector<decltype(MapTy::MapEntryTy::second) *>>
        &LTOCommonNameMap,
    const std::vector<StringRef> &FuncNames) {
  std::vector<decltype(MapTy::MapEntryTy::second) *> AllData;
  // Do a reverse order iteration since the name in profile has a higher chance
  // of matching a name at the end of the list.
  for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
    std::string Name = normalizeName(*FI);
    const Optional<StringRef> LTOCommonName = getLTOCommonName(Name);
    if (LTOCommonName) {
      auto I = LTOCommonNameMap.find(*LTOCommonName);
      if (I != LTOCommonNameMap.end()) {
        const std::vector<decltype(MapTy::MapEntryTy::second) *> &CommonData =
            I->getValue();
        AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
      }
    } else {
      auto I = Map.find(Name);
      if (I != Map.end())
        return {&I->getValue()};
    }
  }
  return AllData;
}

}

bool DataReader::mayHaveProfileData(const BinaryFunction &Function) {
  if (getBranchData(Function) || getMemData(Function))
    return true;

  if (getBranchDataForNames(Function.getNames()) ||
      getMemDataForNames(Function.getNames()))
    return true;

  if (!hasVolatileName(Function))
    return false;

  const std::vector<FuncBranchData *> AllBranchData =
      getBranchDataForNamesRegex(Function.getNames());
  if (!AllBranchData.empty())
    return true;

  const std::vector<FuncMemData *> AllMemData =
      getMemDataForNamesRegex(Function.getNames());
  if (!AllMemData.empty())
    return true;

  return false;
}

FuncBranchData *
DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) {
  return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames);
}

FuncBranchData *
DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) {
  return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols);
}

FuncMemData *
DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) {
  return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames);
}

FuncSampleData *
DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) {
  return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames);
}

std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex(
    const std::vector<StringRef> &FuncNames) {
  return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames);
}

std::vector<FuncMemData *>
DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) {
  return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames);
}

bool DataReader::hasLocalsWithFileName() const {
  for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
    const StringRef &FuncName = Func.getKey();
    if (FuncName.count('/') == 2 && FuncName[0] != '/')
      return true;
  }
  return false;
}

void DataReader::dump() const {
  for (const StringMapEntry<FuncBranchData> &Func : NamesToBranches) {
    Diag << Func.getKey() << " branches:\n";
    for (const BranchInfo &BI : Func.getValue().Data)
      Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
           << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
    Diag << Func.getKey() << " entry points:\n";
    for (const BranchInfo &BI : Func.getValue().EntryData)
      Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
           << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
  }

  for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) {
    StringRef Event = I->getKey();
    Diag << "Data was collected with event: " << Event << "\n";
  }
  for (const StringMapEntry<FuncSampleData> &Func : NamesToSamples) {
    Diag << Func.getKey() << " samples:\n";
    for (const SampleInfo &SI : Func.getValue().Data)
      Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n";
  }

  for (const StringMapEntry<FuncMemData> &Func : NamesToMemEvents) {
    Diag << "Memory events for " << Func.getValue().Name;
    Location LastOffset(0);
    for (const MemInfo &MI : Func.getValue().Data) {
      if (MI.Offset == LastOffset)
        Diag << ", " << MI.Addr << "/" << MI.Count;
      else
        Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count;
      LastOffset = MI.Offset;
    }
    Diag << "\n";
  }
}

} // namespace bolt
} // namespace llvm
