1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SourceMgr class. This class is used as a simple 10 // substrate for diagnostics, #include handling, and other low level things for 11 // simple parsers. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Support/SourceMgr.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/ErrorOr.h" 22 #include "llvm/Support/Locale.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/SMLoc.h" 26 #include "llvm/Support/WithColor.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstddef> 31 #include <limits> 32 #include <memory> 33 #include <string> 34 #include <utility> 35 36 using namespace llvm; 37 38 static const size_t TabStop = 8; 39 40 unsigned SourceMgr::AddIncludeFile(const std::string &Filename, 41 SMLoc IncludeLoc, 42 std::string &IncludedFile) { 43 IncludedFile = Filename; 44 ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr = 45 MemoryBuffer::getFile(IncludedFile); 46 47 // If the file didn't exist directly, see if it's in an include path. 48 for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr; 49 ++i) { 50 IncludedFile = 51 IncludeDirectories[i] + sys::path::get_separator().data() + Filename; 52 NewBufOrErr = MemoryBuffer::getFile(IncludedFile); 53 } 54 55 if (!NewBufOrErr) 56 return 0; 57 58 return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc); 59 } 60 61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { 62 for (unsigned i = 0, e = Buffers.size(); i != e; ++i) 63 if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() && 64 // Use <= here so that a pointer to the null at the end of the buffer 65 // is included as part of the buffer. 66 Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd()) 67 return i + 1; 68 return 0; 69 } 70 71 template <typename T> 72 static std::vector<T> &GetOrCreateOffsetCache( 73 PointerUnion<std::vector<uint8_t> *, std::vector<uint16_t> *, 74 std::vector<uint32_t> *, std::vector<uint64_t> *> &OffsetCache, 75 MemoryBuffer *Buffer) { 76 if (!OffsetCache.isNull()) 77 return *OffsetCache.get<std::vector<T> *>(); 78 79 // Lazily fill in the offset cache. 80 auto *Offsets = new std::vector<T>(); 81 OffsetCache = Offsets; 82 size_t Sz = Buffer->getBufferSize(); 83 assert(Sz <= std::numeric_limits<T>::max()); 84 StringRef S = Buffer->getBuffer(); 85 for (size_t N = 0; N < Sz; ++N) { 86 if (S[N] == '\n') 87 Offsets->push_back(static_cast<T>(N)); 88 } 89 90 return *Offsets; 91 } 92 93 template <typename T> 94 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const { 95 std::vector<T> &Offsets = 96 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get()); 97 98 const char *BufStart = Buffer->getBufferStart(); 99 assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); 100 ptrdiff_t PtrDiff = Ptr - BufStart; 101 assert(PtrDiff >= 0 && 102 static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max()); 103 T PtrOffset = static_cast<T>(PtrDiff); 104 105 // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get 106 // the line number. 107 return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1; 108 } 109 110 /// Look up a given \p Ptr in in the buffer, determining which line it came 111 /// from. 112 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { 113 size_t Sz = Buffer->getBufferSize(); 114 if (Sz <= std::numeric_limits<uint8_t>::max()) 115 return getLineNumberSpecialized<uint8_t>(Ptr); 116 else if (Sz <= std::numeric_limits<uint16_t>::max()) 117 return getLineNumberSpecialized<uint16_t>(Ptr); 118 else if (Sz <= std::numeric_limits<uint32_t>::max()) 119 return getLineNumberSpecialized<uint32_t>(Ptr); 120 else 121 return getLineNumberSpecialized<uint64_t>(Ptr); 122 } 123 124 template <typename T> 125 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized( 126 unsigned LineNo) const { 127 std::vector<T> &Offsets = 128 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get()); 129 130 // We start counting line and column numbers from 1. 131 if (LineNo != 0) 132 --LineNo; 133 134 const char *BufStart = Buffer->getBufferStart(); 135 136 // The offset cache contains the location of the \n for the specified line, 137 // we want the start of the line. As such, we look for the previous entry. 138 if (LineNo == 0) 139 return BufStart; 140 if (LineNo > Offsets.size()) 141 return nullptr; 142 return BufStart + Offsets[LineNo - 1] + 1; 143 } 144 145 /// Return a pointer to the first character of the specified line number or 146 /// null if the line number is invalid. 147 const char * 148 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const { 149 size_t Sz = Buffer->getBufferSize(); 150 if (Sz <= std::numeric_limits<uint8_t>::max()) 151 return getPointerForLineNumberSpecialized<uint8_t>(LineNo); 152 else if (Sz <= std::numeric_limits<uint16_t>::max()) 153 return getPointerForLineNumberSpecialized<uint16_t>(LineNo); 154 else if (Sz <= std::numeric_limits<uint32_t>::max()) 155 return getPointerForLineNumberSpecialized<uint32_t>(LineNo); 156 else 157 return getPointerForLineNumberSpecialized<uint64_t>(LineNo); 158 } 159 160 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) 161 : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache), 162 IncludeLoc(Other.IncludeLoc) { 163 Other.OffsetCache = nullptr; 164 } 165 166 SourceMgr::SrcBuffer::~SrcBuffer() { 167 if (!OffsetCache.isNull()) { 168 if (OffsetCache.is<std::vector<uint8_t> *>()) 169 delete OffsetCache.get<std::vector<uint8_t> *>(); 170 else if (OffsetCache.is<std::vector<uint16_t> *>()) 171 delete OffsetCache.get<std::vector<uint16_t> *>(); 172 else if (OffsetCache.is<std::vector<uint32_t> *>()) 173 delete OffsetCache.get<std::vector<uint32_t> *>(); 174 else 175 delete OffsetCache.get<std::vector<uint64_t> *>(); 176 OffsetCache = nullptr; 177 } 178 } 179 180 std::pair<unsigned, unsigned> 181 SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { 182 if (!BufferID) 183 BufferID = FindBufferContainingLoc(Loc); 184 assert(BufferID && "Invalid Location!"); 185 186 auto &SB = getBufferInfo(BufferID); 187 const char *Ptr = Loc.getPointer(); 188 189 unsigned LineNo = SB.getLineNumber(Ptr); 190 const char *BufStart = SB.Buffer->getBufferStart(); 191 size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r"); 192 if (NewlineOffs == StringRef::npos) 193 NewlineOffs = ~(size_t)0; 194 return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs); 195 } 196 197 /// Given a line and column number in a mapped buffer, turn it into an SMLoc. 198 /// This will return a null SMLoc if the line/column location is invalid. 199 SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, 200 unsigned ColNo) { 201 auto &SB = getBufferInfo(BufferID); 202 const char *Ptr = SB.getPointerForLineNumber(LineNo); 203 if (!Ptr) 204 return SMLoc(); 205 206 // We start counting line and column numbers from 1. 207 if (ColNo != 0) 208 --ColNo; 209 210 // If we have a column number, validate it. 211 if (ColNo) { 212 // Make sure the location is within the current line. 213 if (Ptr + ColNo > SB.Buffer->getBufferEnd()) 214 return SMLoc(); 215 216 // Make sure there is no newline in the way. 217 if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos) 218 return SMLoc(); 219 220 Ptr += ColNo; 221 } 222 223 return SMLoc::getFromPointer(Ptr); 224 } 225 226 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { 227 if (IncludeLoc == SMLoc()) 228 return; // Top of stack. 229 230 unsigned CurBuf = FindBufferContainingLoc(IncludeLoc); 231 assert(CurBuf && "Invalid or unspecified location!"); 232 233 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); 234 235 OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() 236 << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n"; 237 } 238 239 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, 240 const Twine &Msg, ArrayRef<SMRange> Ranges, 241 ArrayRef<SMFixIt> FixIts) const { 242 // First thing to do: find the current buffer containing the specified 243 // location to pull out the source line. 244 SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges; 245 std::pair<unsigned, unsigned> LineAndCol; 246 StringRef BufferID = "<unknown>"; 247 std::string LineStr; 248 249 if (Loc.isValid()) { 250 unsigned CurBuf = FindBufferContainingLoc(Loc); 251 assert(CurBuf && "Invalid or unspecified location!"); 252 253 const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf); 254 BufferID = CurMB->getBufferIdentifier(); 255 256 // Scan backward to find the start of the line. 257 const char *LineStart = Loc.getPointer(); 258 const char *BufStart = CurMB->getBufferStart(); 259 while (LineStart != BufStart && LineStart[-1] != '\n' && 260 LineStart[-1] != '\r') 261 --LineStart; 262 263 // Get the end of the line. 264 const char *LineEnd = Loc.getPointer(); 265 const char *BufEnd = CurMB->getBufferEnd(); 266 while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r') 267 ++LineEnd; 268 LineStr = std::string(LineStart, LineEnd); 269 270 // Convert any ranges to column ranges that only intersect the line of the 271 // location. 272 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 273 SMRange R = Ranges[i]; 274 if (!R.isValid()) 275 continue; 276 277 // If the line doesn't contain any part of the range, then ignore it. 278 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) 279 continue; 280 281 // Ignore pieces of the range that go onto other lines. 282 if (R.Start.getPointer() < LineStart) 283 R.Start = SMLoc::getFromPointer(LineStart); 284 if (R.End.getPointer() > LineEnd) 285 R.End = SMLoc::getFromPointer(LineEnd); 286 287 // Translate from SMLoc ranges to column ranges. 288 // FIXME: Handle multibyte characters. 289 ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart, 290 R.End.getPointer() - LineStart)); 291 } 292 293 LineAndCol = getLineAndColumn(Loc, CurBuf); 294 } 295 296 return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, 297 LineAndCol.second - 1, Kind, Msg.str(), LineStr, 298 ColRanges, FixIts); 299 } 300 301 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic, 302 bool ShowColors) const { 303 // Report the message with the diagnostic handler if present. 304 if (DiagHandler) { 305 DiagHandler(Diagnostic, DiagContext); 306 return; 307 } 308 309 if (Diagnostic.getLoc().isValid()) { 310 unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc()); 311 assert(CurBuf && "Invalid or unspecified location!"); 312 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); 313 } 314 315 Diagnostic.print(nullptr, OS, ShowColors); 316 } 317 318 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, 319 SourceMgr::DiagKind Kind, const Twine &Msg, 320 ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts, 321 bool ShowColors) const { 322 PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors); 323 } 324 325 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, 326 const Twine &Msg, ArrayRef<SMRange> Ranges, 327 ArrayRef<SMFixIt> FixIts, bool ShowColors) const { 328 PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors); 329 } 330 331 //===----------------------------------------------------------------------===// 332 // SMDiagnostic Implementation 333 //===----------------------------------------------------------------------===// 334 335 SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, 336 int Col, SourceMgr::DiagKind Kind, StringRef Msg, 337 StringRef LineStr, 338 ArrayRef<std::pair<unsigned, unsigned>> Ranges, 339 ArrayRef<SMFixIt> Hints) 340 : SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col), 341 Kind(Kind), Message(std::string(Msg)), LineContents(std::string(LineStr)), 342 Ranges(Ranges.vec()), FixIts(Hints.begin(), Hints.end()) { 343 llvm::sort(FixIts); 344 } 345 346 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, 347 ArrayRef<SMFixIt> FixIts, 348 ArrayRef<char> SourceLine) { 349 if (FixIts.empty()) 350 return; 351 352 const char *LineStart = SourceLine.begin(); 353 const char *LineEnd = SourceLine.end(); 354 355 size_t PrevHintEndCol = 0; 356 357 for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E; 358 ++I) { 359 // If the fixit contains a newline or tab, ignore it. 360 if (I->getText().find_first_of("\n\r\t") != StringRef::npos) 361 continue; 362 363 SMRange R = I->getRange(); 364 365 // If the line doesn't contain any part of the range, then ignore it. 366 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) 367 continue; 368 369 // Translate from SMLoc to column. 370 // Ignore pieces of the range that go onto other lines. 371 // FIXME: Handle multibyte characters in the source line. 372 unsigned FirstCol; 373 if (R.Start.getPointer() < LineStart) 374 FirstCol = 0; 375 else 376 FirstCol = R.Start.getPointer() - LineStart; 377 378 // If we inserted a long previous hint, push this one forwards, and add 379 // an extra space to show that this is not part of the previous 380 // completion. This is sort of the best we can do when two hints appear 381 // to overlap. 382 // 383 // Note that if this hint is located immediately after the previous 384 // hint, no space will be added, since the location is more important. 385 unsigned HintCol = FirstCol; 386 if (HintCol < PrevHintEndCol) 387 HintCol = PrevHintEndCol + 1; 388 389 // FIXME: This assertion is intended to catch unintended use of multibyte 390 // characters in fixits. If we decide to do this, we'll have to track 391 // separate byte widths for the source and fixit lines. 392 assert((size_t)sys::locale::columnWidth(I->getText()) == 393 I->getText().size()); 394 395 // This relies on one byte per column in our fixit hints. 396 unsigned LastColumnModified = HintCol + I->getText().size(); 397 if (LastColumnModified > FixItLine.size()) 398 FixItLine.resize(LastColumnModified, ' '); 399 400 std::copy(I->getText().begin(), I->getText().end(), 401 FixItLine.begin() + HintCol); 402 403 PrevHintEndCol = LastColumnModified; 404 405 // For replacements, mark the removal range with '~'. 406 // FIXME: Handle multibyte characters in the source line. 407 unsigned LastCol; 408 if (R.End.getPointer() >= LineEnd) 409 LastCol = LineEnd - LineStart; 410 else 411 LastCol = R.End.getPointer() - LineStart; 412 413 std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~'); 414 } 415 } 416 417 static void printSourceLine(raw_ostream &S, StringRef LineContents) { 418 // Print out the source line one character at a time, so we can expand tabs. 419 for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { 420 size_t NextTab = LineContents.find('\t', i); 421 // If there were no tabs left, print the rest, we are done. 422 if (NextTab == StringRef::npos) { 423 S << LineContents.drop_front(i); 424 break; 425 } 426 427 // Otherwise, print from i to NextTab. 428 S << LineContents.slice(i, NextTab); 429 OutCol += NextTab - i; 430 i = NextTab; 431 432 // If we have a tab, emit at least one space, then round up to 8 columns. 433 do { 434 S << ' '; 435 ++OutCol; 436 } while ((OutCol % TabStop) != 0); 437 } 438 S << '\n'; 439 } 440 441 static bool isNonASCII(char c) { return c & 0x80; } 442 443 void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors, 444 bool ShowKindLabel) const { 445 { 446 WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors); 447 448 if (ProgName && ProgName[0]) 449 S << ProgName << ": "; 450 451 if (!Filename.empty()) { 452 if (Filename == "-") 453 S << "<stdin>"; 454 else 455 S << Filename; 456 457 if (LineNo != -1) { 458 S << ':' << LineNo; 459 if (ColumnNo != -1) 460 S << ':' << (ColumnNo + 1); 461 } 462 S << ": "; 463 } 464 } 465 466 if (ShowKindLabel) { 467 switch (Kind) { 468 case SourceMgr::DK_Error: 469 WithColor::error(OS, "", !ShowColors); 470 break; 471 case SourceMgr::DK_Warning: 472 WithColor::warning(OS, "", !ShowColors); 473 break; 474 case SourceMgr::DK_Note: 475 WithColor::note(OS, "", !ShowColors); 476 break; 477 case SourceMgr::DK_Remark: 478 WithColor::remark(OS, "", !ShowColors); 479 break; 480 } 481 } 482 483 WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors) 484 << Message << '\n'; 485 486 if (LineNo == -1 || ColumnNo == -1) 487 return; 488 489 // FIXME: If there are multibyte or multi-column characters in the source, all 490 // our ranges will be wrong. To do this properly, we'll need a byte-to-column 491 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by 492 // expanding them later, and bail out rather than show incorrect ranges and 493 // misaligned fixits for any other odd characters. 494 if (find_if(LineContents, isNonASCII) != LineContents.end()) { 495 printSourceLine(OS, LineContents); 496 return; 497 } 498 size_t NumColumns = LineContents.size(); 499 500 // Build the line with the caret and ranges. 501 std::string CaretLine(NumColumns + 1, ' '); 502 503 // Expand any ranges. 504 for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { 505 std::pair<unsigned, unsigned> R = Ranges[r]; 506 std::fill(&CaretLine[R.first], 507 &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~'); 508 } 509 510 // Add any fix-its. 511 // FIXME: Find the beginning of the line properly for multibyte characters. 512 std::string FixItInsertionLine; 513 buildFixItLine( 514 CaretLine, FixItInsertionLine, FixIts, 515 makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size())); 516 517 // Finally, plop on the caret. 518 if (unsigned(ColumnNo) <= NumColumns) 519 CaretLine[ColumnNo] = '^'; 520 else 521 CaretLine[NumColumns] = '^'; 522 523 // ... and remove trailing whitespace so the output doesn't wrap for it. We 524 // know that the line isn't completely empty because it has the caret in it at 525 // least. 526 CaretLine.erase(CaretLine.find_last_not_of(' ') + 1); 527 528 printSourceLine(OS, LineContents); 529 530 { 531 WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors); 532 533 // Print out the caret line, matching tabs in the source line. 534 for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) { 535 if (i >= LineContents.size() || LineContents[i] != '\t') { 536 S << CaretLine[i]; 537 ++OutCol; 538 continue; 539 } 540 541 // Okay, we have a tab. Insert the appropriate number of characters. 542 do { 543 S << CaretLine[i]; 544 ++OutCol; 545 } while ((OutCol % TabStop) != 0); 546 } 547 S << '\n'; 548 } 549 550 // Print out the replacement line, matching tabs in the source line. 551 if (FixItInsertionLine.empty()) 552 return; 553 554 for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) { 555 if (i >= LineContents.size() || LineContents[i] != '\t') { 556 OS << FixItInsertionLine[i]; 557 ++OutCol; 558 continue; 559 } 560 561 // Okay, we have a tab. Insert the appropriate number of characters. 562 do { 563 OS << FixItInsertionLine[i]; 564 // FIXME: This is trying not to break up replacements, but then to re-sync 565 // with the tabs between replacements. This will fail, though, if two 566 // fix-it replacements are exactly adjacent, or if a fix-it contains a 567 // space. Really we should be precomputing column widths, which we'll 568 // need anyway for multibyte chars. 569 if (FixItInsertionLine[i] != ' ') 570 ++i; 571 ++OutCol; 572 } while (((OutCol % TabStop) != 0) && i != e); 573 } 574 OS << '\n'; 575 } 576