1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SourceMgr class.  This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
11 // simple parsers.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/Locale.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SMLoc.h"
26 #include "llvm/Support/WithColor.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <limits>
32 #include <memory>
33 #include <string>
34 #include <utility>
35 
36 using namespace llvm;
37 
38 static const size_t TabStop = 8;
39 
40 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
41                                    SMLoc IncludeLoc,
42                                    std::string &IncludedFile) {
43   IncludedFile = Filename;
44   ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
45       MemoryBuffer::getFile(IncludedFile);
46 
47   // If the file didn't exist directly, see if it's in an include path.
48   for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
49        ++i) {
50     IncludedFile =
51         IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
52     NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
53   }
54 
55   if (!NewBufOrErr)
56     return 0;
57 
58   return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
59 }
60 
61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
62   for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
63     if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
64         // Use <= here so that a pointer to the null at the end of the buffer
65         // is included as part of the buffer.
66         Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
67       return i + 1;
68   return 0;
69 }
70 
71 template <typename T>
72 static std::vector<T> &GetOrCreateOffsetCache(
73     PointerUnion<std::vector<uint8_t> *, std::vector<uint16_t> *,
74                  std::vector<uint32_t> *, std::vector<uint64_t> *> &OffsetCache,
75     MemoryBuffer *Buffer) {
76   if (!OffsetCache.isNull())
77     return *OffsetCache.get<std::vector<T> *>();
78 
79   // Lazily fill in the offset cache.
80   auto *Offsets = new std::vector<T>();
81   OffsetCache = Offsets;
82   size_t Sz = Buffer->getBufferSize();
83   assert(Sz <= std::numeric_limits<T>::max());
84   StringRef S = Buffer->getBuffer();
85   for (size_t N = 0; N < Sz; ++N) {
86     if (S[N] == '\n')
87       Offsets->push_back(static_cast<T>(N));
88   }
89 
90   return *Offsets;
91 }
92 
93 template <typename T>
94 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const {
95   std::vector<T> &Offsets =
96       GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
97 
98   const char *BufStart = Buffer->getBufferStart();
99   assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
100   ptrdiff_t PtrDiff = Ptr - BufStart;
101   assert(PtrDiff >= 0 &&
102          static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
103   T PtrOffset = static_cast<T>(PtrDiff);
104 
105   // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
106   // the line number.
107   return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1;
108 }
109 
110 /// Look up a given \p Ptr in in the buffer, determining which line it came
111 /// from.
112 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
113   size_t Sz = Buffer->getBufferSize();
114   if (Sz <= std::numeric_limits<uint8_t>::max())
115     return getLineNumberSpecialized<uint8_t>(Ptr);
116   else if (Sz <= std::numeric_limits<uint16_t>::max())
117     return getLineNumberSpecialized<uint16_t>(Ptr);
118   else if (Sz <= std::numeric_limits<uint32_t>::max())
119     return getLineNumberSpecialized<uint32_t>(Ptr);
120   else
121     return getLineNumberSpecialized<uint64_t>(Ptr);
122 }
123 
124 template <typename T>
125 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
126     unsigned LineNo) const {
127   std::vector<T> &Offsets =
128       GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
129 
130   // We start counting line and column numbers from 1.
131   if (LineNo != 0)
132     --LineNo;
133 
134   const char *BufStart = Buffer->getBufferStart();
135 
136   // The offset cache contains the location of the \n for the specified line,
137   // we want the start of the line.  As such, we look for the previous entry.
138   if (LineNo == 0)
139     return BufStart;
140   if (LineNo > Offsets.size())
141     return nullptr;
142   return BufStart + Offsets[LineNo - 1] + 1;
143 }
144 
145 /// Return a pointer to the first character of the specified line number or
146 /// null if the line number is invalid.
147 const char *
148 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const {
149   size_t Sz = Buffer->getBufferSize();
150   if (Sz <= std::numeric_limits<uint8_t>::max())
151     return getPointerForLineNumberSpecialized<uint8_t>(LineNo);
152   else if (Sz <= std::numeric_limits<uint16_t>::max())
153     return getPointerForLineNumberSpecialized<uint16_t>(LineNo);
154   else if (Sz <= std::numeric_limits<uint32_t>::max())
155     return getPointerForLineNumberSpecialized<uint32_t>(LineNo);
156   else
157     return getPointerForLineNumberSpecialized<uint64_t>(LineNo);
158 }
159 
160 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
161     : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache),
162       IncludeLoc(Other.IncludeLoc) {
163   Other.OffsetCache = nullptr;
164 }
165 
166 SourceMgr::SrcBuffer::~SrcBuffer() {
167   if (!OffsetCache.isNull()) {
168     if (OffsetCache.is<std::vector<uint8_t> *>())
169       delete OffsetCache.get<std::vector<uint8_t> *>();
170     else if (OffsetCache.is<std::vector<uint16_t> *>())
171       delete OffsetCache.get<std::vector<uint16_t> *>();
172     else if (OffsetCache.is<std::vector<uint32_t> *>())
173       delete OffsetCache.get<std::vector<uint32_t> *>();
174     else
175       delete OffsetCache.get<std::vector<uint64_t> *>();
176     OffsetCache = nullptr;
177   }
178 }
179 
180 std::pair<unsigned, unsigned>
181 SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
182   if (!BufferID)
183     BufferID = FindBufferContainingLoc(Loc);
184   assert(BufferID && "Invalid Location!");
185 
186   auto &SB = getBufferInfo(BufferID);
187   const char *Ptr = Loc.getPointer();
188 
189   unsigned LineNo = SB.getLineNumber(Ptr);
190   const char *BufStart = SB.Buffer->getBufferStart();
191   size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r");
192   if (NewlineOffs == StringRef::npos)
193     NewlineOffs = ~(size_t)0;
194   return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs);
195 }
196 
197 /// Given a line and column number in a mapped buffer, turn it into an SMLoc.
198 /// This will return a null SMLoc if the line/column location is invalid.
199 SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
200                                          unsigned ColNo) {
201   auto &SB = getBufferInfo(BufferID);
202   const char *Ptr = SB.getPointerForLineNumber(LineNo);
203   if (!Ptr)
204     return SMLoc();
205 
206   // We start counting line and column numbers from 1.
207   if (ColNo != 0)
208     --ColNo;
209 
210   // If we have a column number, validate it.
211   if (ColNo) {
212     // Make sure the location is within the current line.
213     if (Ptr + ColNo > SB.Buffer->getBufferEnd())
214       return SMLoc();
215 
216     // Make sure there is no newline in the way.
217     if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos)
218       return SMLoc();
219 
220     Ptr += ColNo;
221   }
222 
223   return SMLoc::getFromPointer(Ptr);
224 }
225 
226 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
227   if (IncludeLoc == SMLoc())
228     return; // Top of stack.
229 
230   unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
231   assert(CurBuf && "Invalid or unspecified location!");
232 
233   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
234 
235   OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
236      << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
237 }
238 
239 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
240                                    const Twine &Msg, ArrayRef<SMRange> Ranges,
241                                    ArrayRef<SMFixIt> FixIts) const {
242   // First thing to do: find the current buffer containing the specified
243   // location to pull out the source line.
244   SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
245   std::pair<unsigned, unsigned> LineAndCol;
246   StringRef BufferID = "<unknown>";
247   std::string LineStr;
248 
249   if (Loc.isValid()) {
250     unsigned CurBuf = FindBufferContainingLoc(Loc);
251     assert(CurBuf && "Invalid or unspecified location!");
252 
253     const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
254     BufferID = CurMB->getBufferIdentifier();
255 
256     // Scan backward to find the start of the line.
257     const char *LineStart = Loc.getPointer();
258     const char *BufStart = CurMB->getBufferStart();
259     while (LineStart != BufStart && LineStart[-1] != '\n' &&
260            LineStart[-1] != '\r')
261       --LineStart;
262 
263     // Get the end of the line.
264     const char *LineEnd = Loc.getPointer();
265     const char *BufEnd = CurMB->getBufferEnd();
266     while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
267       ++LineEnd;
268     LineStr = std::string(LineStart, LineEnd);
269 
270     // Convert any ranges to column ranges that only intersect the line of the
271     // location.
272     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
273       SMRange R = Ranges[i];
274       if (!R.isValid())
275         continue;
276 
277       // If the line doesn't contain any part of the range, then ignore it.
278       if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
279         continue;
280 
281       // Ignore pieces of the range that go onto other lines.
282       if (R.Start.getPointer() < LineStart)
283         R.Start = SMLoc::getFromPointer(LineStart);
284       if (R.End.getPointer() > LineEnd)
285         R.End = SMLoc::getFromPointer(LineEnd);
286 
287       // Translate from SMLoc ranges to column ranges.
288       // FIXME: Handle multibyte characters.
289       ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart,
290                                          R.End.getPointer() - LineStart));
291     }
292 
293     LineAndCol = getLineAndColumn(Loc, CurBuf);
294   }
295 
296   return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
297                       LineAndCol.second - 1, Kind, Msg.str(), LineStr,
298                       ColRanges, FixIts);
299 }
300 
301 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
302                              bool ShowColors) const {
303   // Report the message with the diagnostic handler if present.
304   if (DiagHandler) {
305     DiagHandler(Diagnostic, DiagContext);
306     return;
307   }
308 
309   if (Diagnostic.getLoc().isValid()) {
310     unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
311     assert(CurBuf && "Invalid or unspecified location!");
312     PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
313   }
314 
315   Diagnostic.print(nullptr, OS, ShowColors);
316 }
317 
318 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
319                              SourceMgr::DiagKind Kind, const Twine &Msg,
320                              ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts,
321                              bool ShowColors) const {
322   PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
323 }
324 
325 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
326                              const Twine &Msg, ArrayRef<SMRange> Ranges,
327                              ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
328   PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
329 }
330 
331 //===----------------------------------------------------------------------===//
332 // SMDiagnostic Implementation
333 //===----------------------------------------------------------------------===//
334 
335 SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line,
336                            int Col, SourceMgr::DiagKind Kind, StringRef Msg,
337                            StringRef LineStr,
338                            ArrayRef<std::pair<unsigned, unsigned>> Ranges,
339                            ArrayRef<SMFixIt> Hints)
340     : SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col),
341       Kind(Kind), Message(std::string(Msg)), LineContents(std::string(LineStr)),
342       Ranges(Ranges.vec()), FixIts(Hints.begin(), Hints.end()) {
343   llvm::sort(FixIts);
344 }
345 
346 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
347                            ArrayRef<SMFixIt> FixIts,
348                            ArrayRef<char> SourceLine) {
349   if (FixIts.empty())
350     return;
351 
352   const char *LineStart = SourceLine.begin();
353   const char *LineEnd = SourceLine.end();
354 
355   size_t PrevHintEndCol = 0;
356 
357   for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E;
358        ++I) {
359     // If the fixit contains a newline or tab, ignore it.
360     if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
361       continue;
362 
363     SMRange R = I->getRange();
364 
365     // If the line doesn't contain any part of the range, then ignore it.
366     if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
367       continue;
368 
369     // Translate from SMLoc to column.
370     // Ignore pieces of the range that go onto other lines.
371     // FIXME: Handle multibyte characters in the source line.
372     unsigned FirstCol;
373     if (R.Start.getPointer() < LineStart)
374       FirstCol = 0;
375     else
376       FirstCol = R.Start.getPointer() - LineStart;
377 
378     // If we inserted a long previous hint, push this one forwards, and add
379     // an extra space to show that this is not part of the previous
380     // completion. This is sort of the best we can do when two hints appear
381     // to overlap.
382     //
383     // Note that if this hint is located immediately after the previous
384     // hint, no space will be added, since the location is more important.
385     unsigned HintCol = FirstCol;
386     if (HintCol < PrevHintEndCol)
387       HintCol = PrevHintEndCol + 1;
388 
389     // FIXME: This assertion is intended to catch unintended use of multibyte
390     // characters in fixits. If we decide to do this, we'll have to track
391     // separate byte widths for the source and fixit lines.
392     assert((size_t)sys::locale::columnWidth(I->getText()) ==
393            I->getText().size());
394 
395     // This relies on one byte per column in our fixit hints.
396     unsigned LastColumnModified = HintCol + I->getText().size();
397     if (LastColumnModified > FixItLine.size())
398       FixItLine.resize(LastColumnModified, ' ');
399 
400     std::copy(I->getText().begin(), I->getText().end(),
401               FixItLine.begin() + HintCol);
402 
403     PrevHintEndCol = LastColumnModified;
404 
405     // For replacements, mark the removal range with '~'.
406     // FIXME: Handle multibyte characters in the source line.
407     unsigned LastCol;
408     if (R.End.getPointer() >= LineEnd)
409       LastCol = LineEnd - LineStart;
410     else
411       LastCol = R.End.getPointer() - LineStart;
412 
413     std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
414   }
415 }
416 
417 static void printSourceLine(raw_ostream &S, StringRef LineContents) {
418   // Print out the source line one character at a time, so we can expand tabs.
419   for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
420     size_t NextTab = LineContents.find('\t', i);
421     // If there were no tabs left, print the rest, we are done.
422     if (NextTab == StringRef::npos) {
423       S << LineContents.drop_front(i);
424       break;
425     }
426 
427     // Otherwise, print from i to NextTab.
428     S << LineContents.slice(i, NextTab);
429     OutCol += NextTab - i;
430     i = NextTab;
431 
432     // If we have a tab, emit at least one space, then round up to 8 columns.
433     do {
434       S << ' ';
435       ++OutCol;
436     } while ((OutCol % TabStop) != 0);
437   }
438   S << '\n';
439 }
440 
441 static bool isNonASCII(char c) { return c & 0x80; }
442 
443 void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors,
444                          bool ShowKindLabel) const {
445   {
446     WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors);
447 
448     if (ProgName && ProgName[0])
449       S << ProgName << ": ";
450 
451     if (!Filename.empty()) {
452       if (Filename == "-")
453         S << "<stdin>";
454       else
455         S << Filename;
456 
457       if (LineNo != -1) {
458         S << ':' << LineNo;
459         if (ColumnNo != -1)
460           S << ':' << (ColumnNo + 1);
461       }
462       S << ": ";
463     }
464   }
465 
466   if (ShowKindLabel) {
467     switch (Kind) {
468     case SourceMgr::DK_Error:
469       WithColor::error(OS, "", !ShowColors);
470       break;
471     case SourceMgr::DK_Warning:
472       WithColor::warning(OS, "", !ShowColors);
473       break;
474     case SourceMgr::DK_Note:
475       WithColor::note(OS, "", !ShowColors);
476       break;
477     case SourceMgr::DK_Remark:
478       WithColor::remark(OS, "", !ShowColors);
479       break;
480     }
481   }
482 
483   WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors)
484       << Message << '\n';
485 
486   if (LineNo == -1 || ColumnNo == -1)
487     return;
488 
489   // FIXME: If there are multibyte or multi-column characters in the source, all
490   // our ranges will be wrong. To do this properly, we'll need a byte-to-column
491   // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
492   // expanding them later, and bail out rather than show incorrect ranges and
493   // misaligned fixits for any other odd characters.
494   if (find_if(LineContents, isNonASCII) != LineContents.end()) {
495     printSourceLine(OS, LineContents);
496     return;
497   }
498   size_t NumColumns = LineContents.size();
499 
500   // Build the line with the caret and ranges.
501   std::string CaretLine(NumColumns + 1, ' ');
502 
503   // Expand any ranges.
504   for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
505     std::pair<unsigned, unsigned> R = Ranges[r];
506     std::fill(&CaretLine[R.first],
507               &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~');
508   }
509 
510   // Add any fix-its.
511   // FIXME: Find the beginning of the line properly for multibyte characters.
512   std::string FixItInsertionLine;
513   buildFixItLine(
514       CaretLine, FixItInsertionLine, FixIts,
515       makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size()));
516 
517   // Finally, plop on the caret.
518   if (unsigned(ColumnNo) <= NumColumns)
519     CaretLine[ColumnNo] = '^';
520   else
521     CaretLine[NumColumns] = '^';
522 
523   // ... and remove trailing whitespace so the output doesn't wrap for it.  We
524   // know that the line isn't completely empty because it has the caret in it at
525   // least.
526   CaretLine.erase(CaretLine.find_last_not_of(' ') + 1);
527 
528   printSourceLine(OS, LineContents);
529 
530   {
531     WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors);
532 
533     // Print out the caret line, matching tabs in the source line.
534     for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
535       if (i >= LineContents.size() || LineContents[i] != '\t') {
536         S << CaretLine[i];
537         ++OutCol;
538         continue;
539       }
540 
541       // Okay, we have a tab.  Insert the appropriate number of characters.
542       do {
543         S << CaretLine[i];
544         ++OutCol;
545       } while ((OutCol % TabStop) != 0);
546     }
547     S << '\n';
548   }
549 
550   // Print out the replacement line, matching tabs in the source line.
551   if (FixItInsertionLine.empty())
552     return;
553 
554   for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
555     if (i >= LineContents.size() || LineContents[i] != '\t') {
556       OS << FixItInsertionLine[i];
557       ++OutCol;
558       continue;
559     }
560 
561     // Okay, we have a tab.  Insert the appropriate number of characters.
562     do {
563       OS << FixItInsertionLine[i];
564       // FIXME: This is trying not to break up replacements, but then to re-sync
565       // with the tabs between replacements. This will fail, though, if two
566       // fix-it replacements are exactly adjacent, or if a fix-it contains a
567       // space. Really we should be precomputing column widths, which we'll
568       // need anyway for multibyte chars.
569       if (FixItInsertionLine[i] != ' ')
570         ++i;
571       ++OutCol;
572     } while (((OutCol % TabStop) != 0) && i != e);
573   }
574   OS << '\n';
575 }
576