1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SourceMgr class. This class is used as a simple 10 // substrate for diagnostics, #include handling, and other low level things for 11 // simple parsers. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Support/SourceMgr.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/ErrorOr.h" 22 #include "llvm/Support/Locale.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/SMLoc.h" 26 #include "llvm/Support/WithColor.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstddef> 31 #include <limits> 32 #include <memory> 33 #include <string> 34 #include <utility> 35 36 using namespace llvm; 37 38 static const size_t TabStop = 8; 39 40 unsigned SourceMgr::AddIncludeFile(const std::string &Filename, 41 SMLoc IncludeLoc, 42 std::string &IncludedFile) { 43 IncludedFile = Filename; 44 ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr = 45 MemoryBuffer::getFile(IncludedFile); 46 47 // If the file didn't exist directly, see if it's in an include path. 48 for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr; 49 ++i) { 50 IncludedFile = 51 IncludeDirectories[i] + sys::path::get_separator().data() + Filename; 52 NewBufOrErr = MemoryBuffer::getFile(IncludedFile); 53 } 54 55 if (!NewBufOrErr) 56 return 0; 57 58 return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc); 59 } 60 61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { 62 for (unsigned i = 0, e = Buffers.size(); i != e; ++i) 63 if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() && 64 // Use <= here so that a pointer to the null at the end of the buffer 65 // is included as part of the buffer. 66 Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd()) 67 return i + 1; 68 return 0; 69 } 70 71 template <typename T> 72 static std::vector<T> &GetOrCreateOffsetCache(void *&OffsetCache, 73 MemoryBuffer *Buffer) { 74 if (OffsetCache) 75 return *static_cast<std::vector<T> *>(OffsetCache); 76 77 // Lazily fill in the offset cache. 78 auto *Offsets = new std::vector<T>(); 79 size_t Sz = Buffer->getBufferSize(); 80 assert(Sz <= std::numeric_limits<T>::max()); 81 StringRef S = Buffer->getBuffer(); 82 for (size_t N = 0; N < Sz; ++N) { 83 if (S[N] == '\n') 84 Offsets->push_back(static_cast<T>(N)); 85 } 86 87 OffsetCache = Offsets; 88 return *Offsets; 89 } 90 91 template <typename T> 92 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const { 93 std::vector<T> &Offsets = 94 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get()); 95 96 const char *BufStart = Buffer->getBufferStart(); 97 assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); 98 ptrdiff_t PtrDiff = Ptr - BufStart; 99 assert(PtrDiff >= 0 && 100 static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max()); 101 T PtrOffset = static_cast<T>(PtrDiff); 102 103 // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get 104 // the line number. 105 return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1; 106 } 107 108 /// Look up a given \p Ptr in in the buffer, determining which line it came 109 /// from. 110 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { 111 size_t Sz = Buffer->getBufferSize(); 112 if (Sz <= std::numeric_limits<uint8_t>::max()) 113 return getLineNumberSpecialized<uint8_t>(Ptr); 114 else if (Sz <= std::numeric_limits<uint16_t>::max()) 115 return getLineNumberSpecialized<uint16_t>(Ptr); 116 else if (Sz <= std::numeric_limits<uint32_t>::max()) 117 return getLineNumberSpecialized<uint32_t>(Ptr); 118 else 119 return getLineNumberSpecialized<uint64_t>(Ptr); 120 } 121 122 template <typename T> 123 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized( 124 unsigned LineNo) const { 125 std::vector<T> &Offsets = 126 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get()); 127 128 // We start counting line and column numbers from 1. 129 if (LineNo != 0) 130 --LineNo; 131 132 const char *BufStart = Buffer->getBufferStart(); 133 134 // The offset cache contains the location of the \n for the specified line, 135 // we want the start of the line. As such, we look for the previous entry. 136 if (LineNo == 0) 137 return BufStart; 138 if (LineNo > Offsets.size()) 139 return nullptr; 140 return BufStart + Offsets[LineNo - 1] + 1; 141 } 142 143 /// Return a pointer to the first character of the specified line number or 144 /// null if the line number is invalid. 145 const char * 146 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const { 147 size_t Sz = Buffer->getBufferSize(); 148 if (Sz <= std::numeric_limits<uint8_t>::max()) 149 return getPointerForLineNumberSpecialized<uint8_t>(LineNo); 150 else if (Sz <= std::numeric_limits<uint16_t>::max()) 151 return getPointerForLineNumberSpecialized<uint16_t>(LineNo); 152 else if (Sz <= std::numeric_limits<uint32_t>::max()) 153 return getPointerForLineNumberSpecialized<uint32_t>(LineNo); 154 else 155 return getPointerForLineNumberSpecialized<uint64_t>(LineNo); 156 } 157 158 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) 159 : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache), 160 IncludeLoc(Other.IncludeLoc) { 161 Other.OffsetCache = nullptr; 162 } 163 164 SourceMgr::SrcBuffer::~SrcBuffer() { 165 if (OffsetCache) { 166 size_t Sz = Buffer->getBufferSize(); 167 if (Sz <= std::numeric_limits<uint8_t>::max()) 168 delete static_cast<std::vector<uint8_t> *>(OffsetCache); 169 else if (Sz <= std::numeric_limits<uint16_t>::max()) 170 delete static_cast<std::vector<uint16_t> *>(OffsetCache); 171 else if (Sz <= std::numeric_limits<uint32_t>::max()) 172 delete static_cast<std::vector<uint32_t> *>(OffsetCache); 173 else 174 delete static_cast<std::vector<uint64_t> *>(OffsetCache); 175 OffsetCache = nullptr; 176 } 177 } 178 179 std::pair<unsigned, unsigned> 180 SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { 181 if (!BufferID) 182 BufferID = FindBufferContainingLoc(Loc); 183 assert(BufferID && "Invalid Location!"); 184 185 auto &SB = getBufferInfo(BufferID); 186 const char *Ptr = Loc.getPointer(); 187 188 unsigned LineNo = SB.getLineNumber(Ptr); 189 const char *BufStart = SB.Buffer->getBufferStart(); 190 size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r"); 191 if (NewlineOffs == StringRef::npos) 192 NewlineOffs = ~(size_t)0; 193 return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs); 194 } 195 196 /// Given a line and column number in a mapped buffer, turn it into an SMLoc. 197 /// This will return a null SMLoc if the line/column location is invalid. 198 SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, 199 unsigned ColNo) { 200 auto &SB = getBufferInfo(BufferID); 201 const char *Ptr = SB.getPointerForLineNumber(LineNo); 202 if (!Ptr) 203 return SMLoc(); 204 205 // We start counting line and column numbers from 1. 206 if (ColNo != 0) 207 --ColNo; 208 209 // If we have a column number, validate it. 210 if (ColNo) { 211 // Make sure the location is within the current line. 212 if (Ptr + ColNo > SB.Buffer->getBufferEnd()) 213 return SMLoc(); 214 215 // Make sure there is no newline in the way. 216 if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos) 217 return SMLoc(); 218 219 Ptr += ColNo; 220 } 221 222 return SMLoc::getFromPointer(Ptr); 223 } 224 225 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { 226 if (IncludeLoc == SMLoc()) 227 return; // Top of stack. 228 229 unsigned CurBuf = FindBufferContainingLoc(IncludeLoc); 230 assert(CurBuf && "Invalid or unspecified location!"); 231 232 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); 233 234 OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() 235 << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n"; 236 } 237 238 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, 239 const Twine &Msg, ArrayRef<SMRange> Ranges, 240 ArrayRef<SMFixIt> FixIts) const { 241 // First thing to do: find the current buffer containing the specified 242 // location to pull out the source line. 243 SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges; 244 std::pair<unsigned, unsigned> LineAndCol; 245 StringRef BufferID = "<unknown>"; 246 std::string LineStr; 247 248 if (Loc.isValid()) { 249 unsigned CurBuf = FindBufferContainingLoc(Loc); 250 assert(CurBuf && "Invalid or unspecified location!"); 251 252 const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf); 253 BufferID = CurMB->getBufferIdentifier(); 254 255 // Scan backward to find the start of the line. 256 const char *LineStart = Loc.getPointer(); 257 const char *BufStart = CurMB->getBufferStart(); 258 while (LineStart != BufStart && LineStart[-1] != '\n' && 259 LineStart[-1] != '\r') 260 --LineStart; 261 262 // Get the end of the line. 263 const char *LineEnd = Loc.getPointer(); 264 const char *BufEnd = CurMB->getBufferEnd(); 265 while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r') 266 ++LineEnd; 267 LineStr = std::string(LineStart, LineEnd); 268 269 // Convert any ranges to column ranges that only intersect the line of the 270 // location. 271 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 272 SMRange R = Ranges[i]; 273 if (!R.isValid()) 274 continue; 275 276 // If the line doesn't contain any part of the range, then ignore it. 277 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) 278 continue; 279 280 // Ignore pieces of the range that go onto other lines. 281 if (R.Start.getPointer() < LineStart) 282 R.Start = SMLoc::getFromPointer(LineStart); 283 if (R.End.getPointer() > LineEnd) 284 R.End = SMLoc::getFromPointer(LineEnd); 285 286 // Translate from SMLoc ranges to column ranges. 287 // FIXME: Handle multibyte characters. 288 ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart, 289 R.End.getPointer() - LineStart)); 290 } 291 292 LineAndCol = getLineAndColumn(Loc, CurBuf); 293 } 294 295 return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, 296 LineAndCol.second - 1, Kind, Msg.str(), LineStr, 297 ColRanges, FixIts); 298 } 299 300 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic, 301 bool ShowColors) const { 302 // Report the message with the diagnostic handler if present. 303 if (DiagHandler) { 304 DiagHandler(Diagnostic, DiagContext); 305 return; 306 } 307 308 if (Diagnostic.getLoc().isValid()) { 309 unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc()); 310 assert(CurBuf && "Invalid or unspecified location!"); 311 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); 312 } 313 314 Diagnostic.print(nullptr, OS, ShowColors); 315 } 316 317 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, 318 SourceMgr::DiagKind Kind, const Twine &Msg, 319 ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts, 320 bool ShowColors) const { 321 PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors); 322 } 323 324 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, 325 const Twine &Msg, ArrayRef<SMRange> Ranges, 326 ArrayRef<SMFixIt> FixIts, bool ShowColors) const { 327 PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors); 328 } 329 330 //===----------------------------------------------------------------------===// 331 // SMFixIt Implementation 332 //===----------------------------------------------------------------------===// 333 334 SMFixIt::SMFixIt(SMRange R, const Twine &Replacement) 335 : Range(R), Text(Replacement.str()) { 336 assert(R.isValid()); 337 } 338 339 //===----------------------------------------------------------------------===// 340 // SMDiagnostic Implementation 341 //===----------------------------------------------------------------------===// 342 343 SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, 344 int Col, SourceMgr::DiagKind Kind, StringRef Msg, 345 StringRef LineStr, 346 ArrayRef<std::pair<unsigned, unsigned>> Ranges, 347 ArrayRef<SMFixIt> Hints) 348 : SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col), 349 Kind(Kind), Message(std::string(Msg)), LineContents(std::string(LineStr)), 350 Ranges(Ranges.vec()), FixIts(Hints.begin(), Hints.end()) { 351 llvm::sort(FixIts); 352 } 353 354 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, 355 ArrayRef<SMFixIt> FixIts, 356 ArrayRef<char> SourceLine) { 357 if (FixIts.empty()) 358 return; 359 360 const char *LineStart = SourceLine.begin(); 361 const char *LineEnd = SourceLine.end(); 362 363 size_t PrevHintEndCol = 0; 364 365 for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E; 366 ++I) { 367 // If the fixit contains a newline or tab, ignore it. 368 if (I->getText().find_first_of("\n\r\t") != StringRef::npos) 369 continue; 370 371 SMRange R = I->getRange(); 372 373 // If the line doesn't contain any part of the range, then ignore it. 374 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) 375 continue; 376 377 // Translate from SMLoc to column. 378 // Ignore pieces of the range that go onto other lines. 379 // FIXME: Handle multibyte characters in the source line. 380 unsigned FirstCol; 381 if (R.Start.getPointer() < LineStart) 382 FirstCol = 0; 383 else 384 FirstCol = R.Start.getPointer() - LineStart; 385 386 // If we inserted a long previous hint, push this one forwards, and add 387 // an extra space to show that this is not part of the previous 388 // completion. This is sort of the best we can do when two hints appear 389 // to overlap. 390 // 391 // Note that if this hint is located immediately after the previous 392 // hint, no space will be added, since the location is more important. 393 unsigned HintCol = FirstCol; 394 if (HintCol < PrevHintEndCol) 395 HintCol = PrevHintEndCol + 1; 396 397 // FIXME: This assertion is intended to catch unintended use of multibyte 398 // characters in fixits. If we decide to do this, we'll have to track 399 // separate byte widths for the source and fixit lines. 400 assert((size_t)sys::locale::columnWidth(I->getText()) == 401 I->getText().size()); 402 403 // This relies on one byte per column in our fixit hints. 404 unsigned LastColumnModified = HintCol + I->getText().size(); 405 if (LastColumnModified > FixItLine.size()) 406 FixItLine.resize(LastColumnModified, ' '); 407 408 std::copy(I->getText().begin(), I->getText().end(), 409 FixItLine.begin() + HintCol); 410 411 PrevHintEndCol = LastColumnModified; 412 413 // For replacements, mark the removal range with '~'. 414 // FIXME: Handle multibyte characters in the source line. 415 unsigned LastCol; 416 if (R.End.getPointer() >= LineEnd) 417 LastCol = LineEnd - LineStart; 418 else 419 LastCol = R.End.getPointer() - LineStart; 420 421 std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~'); 422 } 423 } 424 425 static void printSourceLine(raw_ostream &S, StringRef LineContents) { 426 // Print out the source line one character at a time, so we can expand tabs. 427 for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { 428 size_t NextTab = LineContents.find('\t', i); 429 // If there were no tabs left, print the rest, we are done. 430 if (NextTab == StringRef::npos) { 431 S << LineContents.drop_front(i); 432 break; 433 } 434 435 // Otherwise, print from i to NextTab. 436 S << LineContents.slice(i, NextTab); 437 OutCol += NextTab - i; 438 i = NextTab; 439 440 // If we have a tab, emit at least one space, then round up to 8 columns. 441 do { 442 S << ' '; 443 ++OutCol; 444 } while ((OutCol % TabStop) != 0); 445 } 446 S << '\n'; 447 } 448 449 static bool isNonASCII(char c) { return c & 0x80; } 450 451 void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors, 452 bool ShowKindLabel) const { 453 { 454 WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors); 455 456 if (ProgName && ProgName[0]) 457 S << ProgName << ": "; 458 459 if (!Filename.empty()) { 460 if (Filename == "-") 461 S << "<stdin>"; 462 else 463 S << Filename; 464 465 if (LineNo != -1) { 466 S << ':' << LineNo; 467 if (ColumnNo != -1) 468 S << ':' << (ColumnNo + 1); 469 } 470 S << ": "; 471 } 472 } 473 474 if (ShowKindLabel) { 475 switch (Kind) { 476 case SourceMgr::DK_Error: 477 WithColor::error(OS, "", !ShowColors); 478 break; 479 case SourceMgr::DK_Warning: 480 WithColor::warning(OS, "", !ShowColors); 481 break; 482 case SourceMgr::DK_Note: 483 WithColor::note(OS, "", !ShowColors); 484 break; 485 case SourceMgr::DK_Remark: 486 WithColor::remark(OS, "", !ShowColors); 487 break; 488 } 489 } 490 491 WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors) 492 << Message << '\n'; 493 494 if (LineNo == -1 || ColumnNo == -1) 495 return; 496 497 // FIXME: If there are multibyte or multi-column characters in the source, all 498 // our ranges will be wrong. To do this properly, we'll need a byte-to-column 499 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by 500 // expanding them later, and bail out rather than show incorrect ranges and 501 // misaligned fixits for any other odd characters. 502 if (find_if(LineContents, isNonASCII) != LineContents.end()) { 503 printSourceLine(OS, LineContents); 504 return; 505 } 506 size_t NumColumns = LineContents.size(); 507 508 // Build the line with the caret and ranges. 509 std::string CaretLine(NumColumns + 1, ' '); 510 511 // Expand any ranges. 512 for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { 513 std::pair<unsigned, unsigned> R = Ranges[r]; 514 std::fill(&CaretLine[R.first], 515 &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~'); 516 } 517 518 // Add any fix-its. 519 // FIXME: Find the beginning of the line properly for multibyte characters. 520 std::string FixItInsertionLine; 521 buildFixItLine( 522 CaretLine, FixItInsertionLine, FixIts, 523 makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size())); 524 525 // Finally, plop on the caret. 526 if (unsigned(ColumnNo) <= NumColumns) 527 CaretLine[ColumnNo] = '^'; 528 else 529 CaretLine[NumColumns] = '^'; 530 531 // ... and remove trailing whitespace so the output doesn't wrap for it. We 532 // know that the line isn't completely empty because it has the caret in it at 533 // least. 534 CaretLine.erase(CaretLine.find_last_not_of(' ') + 1); 535 536 printSourceLine(OS, LineContents); 537 538 { 539 WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors); 540 541 // Print out the caret line, matching tabs in the source line. 542 for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) { 543 if (i >= LineContents.size() || LineContents[i] != '\t') { 544 S << CaretLine[i]; 545 ++OutCol; 546 continue; 547 } 548 549 // Okay, we have a tab. Insert the appropriate number of characters. 550 do { 551 S << CaretLine[i]; 552 ++OutCol; 553 } while ((OutCol % TabStop) != 0); 554 } 555 S << '\n'; 556 } 557 558 // Print out the replacement line, matching tabs in the source line. 559 if (FixItInsertionLine.empty()) 560 return; 561 562 for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) { 563 if (i >= LineContents.size() || LineContents[i] != '\t') { 564 OS << FixItInsertionLine[i]; 565 ++OutCol; 566 continue; 567 } 568 569 // Okay, we have a tab. Insert the appropriate number of characters. 570 do { 571 OS << FixItInsertionLine[i]; 572 // FIXME: This is trying not to break up replacements, but then to re-sync 573 // with the tabs between replacements. This will fail, though, if two 574 // fix-it replacements are exactly adjacent, or if a fix-it contains a 575 // space. Really we should be precomputing column widths, which we'll 576 // need anyway for multibyte chars. 577 if (FixItInsertionLine[i] != ' ') 578 ++i; 579 ++OutCol; 580 } while (((OutCol % TabStop) != 0) && i != e); 581 } 582 OS << '\n'; 583 } 584