120f6ac07SDavid Greene //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
2c97b778bSDavid Greene //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c97b778bSDavid Greene //
7c97b778bSDavid Greene //===----------------------------------------------------------------------===//
8c97b778bSDavid Greene //
9ca99c348SChris Lattner // This file contains the implementation of formatted_raw_ostream.
10c97b778bSDavid Greene //
11c97b778bSDavid Greene //===----------------------------------------------------------------------===//
12c97b778bSDavid Greene 
13c97b778bSDavid Greene #include "llvm/Support/FormattedStream.h"
14e80b81d1SOliver Stannard #include "llvm/Support/ConvertUTF.h"
156bda14b3SChandler Carruth #include "llvm/Support/Debug.h"
16e80b81d1SOliver Stannard #include "llvm/Support/Unicode.h"
17799003bfSBenjamin Kramer #include "llvm/Support/raw_ostream.h"
180de20af7SNick Lewycky #include <algorithm>
198e621f01SDavid Greene 
2020f6ac07SDavid Greene using namespace llvm;
2120f6ac07SDavid Greene 
22f83beab4SDaniel Malea /// UpdatePosition - Examine the given char sequence and figure out which
23f83beab4SDaniel Malea /// column we end up in after output, and how many line breaks are contained.
24e80b81d1SOliver Stannard /// This assumes that the input string is well-formed UTF-8, and takes into
25e80b81d1SOliver Stannard /// account Unicode characters which render as multiple columns wide.
UpdatePosition(const char * Ptr,size_t Size)26e80b81d1SOliver Stannard void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
27f83beab4SDaniel Malea   unsigned &Column = Position.first;
28f83beab4SDaniel Malea   unsigned &Line = Position.second;
29c97b778bSDavid Greene 
30e80b81d1SOliver Stannard   auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
31e80b81d1SOliver Stannard     int Width = sys::unicode::columnWidthUTF8(CP);
32e80b81d1SOliver Stannard     if (Width != sys::unicode::ErrorNonPrintableCharacter)
33e80b81d1SOliver Stannard       Column += Width;
34e80b81d1SOliver Stannard 
35e80b81d1SOliver Stannard     // The only special whitespace characters we care about are single-byte.
36e80b81d1SOliver Stannard     if (CP.size() > 1)
37e80b81d1SOliver Stannard       return;
38e80b81d1SOliver Stannard 
39e80b81d1SOliver Stannard     switch (CP[0]) {
40f83beab4SDaniel Malea     case '\n':
41f83beab4SDaniel Malea       Line += 1;
4278706a3dSGalina Kistanova       LLVM_FALLTHROUGH;
43f83beab4SDaniel Malea     case '\r':
44186b85dbSDan Gohman       Column = 0;
45f83beab4SDaniel Malea       break;
46f83beab4SDaniel Malea     case '\t':
47186b85dbSDan Gohman       // Assumes tab stop = 8 characters.
48186b85dbSDan Gohman       Column += (8 - (Column & 0x7)) & 0x7;
49f83beab4SDaniel Malea       break;
50f83beab4SDaniel Malea     }
51e80b81d1SOliver Stannard   };
52e80b81d1SOliver Stannard 
53e80b81d1SOliver Stannard   // If we have a partial UTF-8 sequence from the previous buffer, check that
54e80b81d1SOliver Stannard   // first.
55e80b81d1SOliver Stannard   if (PartialUTF8Char.size()) {
56e80b81d1SOliver Stannard     size_t BytesFromBuffer =
57e80b81d1SOliver Stannard         getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
58e80b81d1SOliver Stannard     if (Size < BytesFromBuffer) {
59e80b81d1SOliver Stannard       // If we still don't have enough bytes for a complete code point, just
60e80b81d1SOliver Stannard       // append what we have.
61e80b81d1SOliver Stannard       PartialUTF8Char.append(StringRef(Ptr, Size));
62e80b81d1SOliver Stannard       return;
63e80b81d1SOliver Stannard     } else {
64e80b81d1SOliver Stannard       // The first few bytes from the buffer will complete the code point.
65e80b81d1SOliver Stannard       // Concatenate them and process their effect on the line and column
66e80b81d1SOliver Stannard       // numbers.
67e80b81d1SOliver Stannard       PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
68e80b81d1SOliver Stannard       ProcessUTF8CodePoint(PartialUTF8Char);
69e80b81d1SOliver Stannard       PartialUTF8Char.clear();
70e80b81d1SOliver Stannard       Ptr += BytesFromBuffer;
71e80b81d1SOliver Stannard       Size -= BytesFromBuffer;
72e80b81d1SOliver Stannard     }
73e80b81d1SOliver Stannard   }
74e80b81d1SOliver Stannard 
75e80b81d1SOliver Stannard   // Now scan the rest of the buffer.
76e80b81d1SOliver Stannard   unsigned NumBytes;
77e80b81d1SOliver Stannard   for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
78e80b81d1SOliver Stannard     NumBytes = getNumBytesForUTF8(*Ptr);
79e80b81d1SOliver Stannard 
80e80b81d1SOliver Stannard     // The buffer might end part way through a UTF-8 code unit sequence for a
81e80b81d1SOliver Stannard     // Unicode scalar value if it got flushed. If this happens, we can't know
82e80b81d1SOliver Stannard     // the display width until we see the rest of the code point. Stash the
83e80b81d1SOliver Stannard     // bytes we do have, so that we can reconstruct the whole code point later,
84e80b81d1SOliver Stannard     // even if the buffer is being flushed.
85*a50c7ebfSOliver Stannard     if ((unsigned)(End - Ptr) < NumBytes) {
86e80b81d1SOliver Stannard       PartialUTF8Char = StringRef(Ptr, End - Ptr);
87e80b81d1SOliver Stannard       return;
88e80b81d1SOliver Stannard     }
89e80b81d1SOliver Stannard 
90e80b81d1SOliver Stannard     ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
91f83beab4SDaniel Malea   }
92ea2f1cebSDavid Greene }
93ea2f1cebSDavid Greene 
94f83beab4SDaniel Malea /// ComputePosition - Examine the current output and update line and column
95f83beab4SDaniel Malea /// counts.
ComputePosition(const char * Ptr,size_t Size)96f83beab4SDaniel Malea void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
9717a6fd22SDaniel Dunbar   // If our previous scan pointer is inside the buffer, assume we already
9817a6fd22SDaniel Dunbar   // scanned those bytes. This depends on raw_ostream to not change our buffer
9917a6fd22SDaniel Dunbar   // in unexpected ways.
100f83beab4SDaniel Malea   if (Ptr <= Scanned && Scanned <= Ptr + Size)
10117a6fd22SDaniel Dunbar     // Scan all characters added since our last scan to determine the new
10217a6fd22SDaniel Dunbar     // column.
103e80b81d1SOliver Stannard     UpdatePosition(Scanned, Size - (Scanned - Ptr));
104f83beab4SDaniel Malea   else
105e80b81d1SOliver Stannard     UpdatePosition(Ptr, Size);
10617a6fd22SDaniel Dunbar 
10717a6fd22SDaniel Dunbar   // Update the scanning pointer.
10817a6fd22SDaniel Dunbar   Scanned = Ptr + Size;
109c97b778bSDavid Greene }
110c97b778bSDavid Greene 
11120f6ac07SDavid Greene /// PadToColumn - Align the output to some column number.
112c97b778bSDavid Greene ///
11320f6ac07SDavid Greene /// \param NewCol - The column to move to.
114c97b778bSDavid Greene ///
PadToColumn(unsigned NewCol)115f733d758SChris Lattner formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
116ea2f1cebSDavid Greene   // Figure out what's in the buffer and add it to the column count.
117f83beab4SDaniel Malea   ComputePosition(getBufferStart(), GetNumBytesInBuffer());
118c97b778bSDavid Greene 
11920f6ac07SDavid Greene   // Output spaces until we reach the desired column.
120f83beab4SDaniel Malea   indent(std::max(int(NewCol - getColumn()), 1));
121f733d758SChris Lattner   return *this;
122c97b778bSDavid Greene }
12320f6ac07SDavid Greene 
write_impl(const char * Ptr,size_t Size)124250635e3SDan Gohman void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
125186b85dbSDan Gohman   // Figure out what's in the buffer and add it to the column count.
126f83beab4SDaniel Malea   ComputePosition(Ptr, Size);
127186b85dbSDan Gohman 
128186b85dbSDan Gohman   // Write the data to the underlying stream (which is unbuffered, so
129186b85dbSDan Gohman   // the data will be immediately written out).
130250635e3SDan Gohman   TheStream->write(Ptr, Size);
131186b85dbSDan Gohman 
13217a6fd22SDaniel Dunbar   // Reset the scanning pointer.
133c10719f5SCraig Topper   Scanned = nullptr;
134250635e3SDan Gohman }
135250635e3SDan Gohman 
136a31f96cfSDavid Greene /// fouts() - This returns a reference to a formatted_raw_ostream for
137a31f96cfSDavid Greene /// standard output.  Use it like: fouts() << "foo" << "bar";
fouts()138a31f96cfSDavid Greene formatted_raw_ostream &llvm::fouts() {
139a31f96cfSDavid Greene   static formatted_raw_ostream S(outs());
140a31f96cfSDavid Greene   return S;
141a31f96cfSDavid Greene }
142a31f96cfSDavid Greene 
143a31f96cfSDavid Greene /// ferrs() - This returns a reference to a formatted_raw_ostream for
144a31f96cfSDavid Greene /// standard error.  Use it like: ferrs() << "foo" << "bar";
ferrs()145a31f96cfSDavid Greene formatted_raw_ostream &llvm::ferrs() {
146a31f96cfSDavid Greene   static formatted_raw_ostream S(errs());
147a31f96cfSDavid Greene   return S;
148a31f96cfSDavid Greene }
14993a522baSDavid Greene 
15093a522baSDavid Greene /// fdbgs() - This returns a reference to a formatted_raw_ostream for
15193a522baSDavid Greene /// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
fdbgs()15293a522baSDavid Greene formatted_raw_ostream &llvm::fdbgs() {
15393a522baSDavid Greene   static formatted_raw_ostream S(dbgs());
15493a522baSDavid Greene   return S;
15593a522baSDavid Greene }
156