120f6ac07SDavid Greene //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
2c97b778bSDavid Greene //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c97b778bSDavid Greene //
7c97b778bSDavid Greene //===----------------------------------------------------------------------===//
8c97b778bSDavid Greene //
9ca99c348SChris Lattner // This file contains the implementation of formatted_raw_ostream.
10c97b778bSDavid Greene //
11c97b778bSDavid Greene //===----------------------------------------------------------------------===//
12c97b778bSDavid Greene
13c97b778bSDavid Greene #include "llvm/Support/FormattedStream.h"
14e80b81d1SOliver Stannard #include "llvm/Support/ConvertUTF.h"
156bda14b3SChandler Carruth #include "llvm/Support/Debug.h"
16e80b81d1SOliver Stannard #include "llvm/Support/Unicode.h"
17799003bfSBenjamin Kramer #include "llvm/Support/raw_ostream.h"
180de20af7SNick Lewycky #include <algorithm>
198e621f01SDavid Greene
2020f6ac07SDavid Greene using namespace llvm;
2120f6ac07SDavid Greene
22f83beab4SDaniel Malea /// UpdatePosition - Examine the given char sequence and figure out which
23f83beab4SDaniel Malea /// column we end up in after output, and how many line breaks are contained.
24e80b81d1SOliver Stannard /// This assumes that the input string is well-formed UTF-8, and takes into
25e80b81d1SOliver Stannard /// account Unicode characters which render as multiple columns wide.
UpdatePosition(const char * Ptr,size_t Size)26e80b81d1SOliver Stannard void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
27f83beab4SDaniel Malea unsigned &Column = Position.first;
28f83beab4SDaniel Malea unsigned &Line = Position.second;
29c97b778bSDavid Greene
30e80b81d1SOliver Stannard auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
31e80b81d1SOliver Stannard int Width = sys::unicode::columnWidthUTF8(CP);
32e80b81d1SOliver Stannard if (Width != sys::unicode::ErrorNonPrintableCharacter)
33e80b81d1SOliver Stannard Column += Width;
34e80b81d1SOliver Stannard
35e80b81d1SOliver Stannard // The only special whitespace characters we care about are single-byte.
36e80b81d1SOliver Stannard if (CP.size() > 1)
37e80b81d1SOliver Stannard return;
38e80b81d1SOliver Stannard
39e80b81d1SOliver Stannard switch (CP[0]) {
40f83beab4SDaniel Malea case '\n':
41f83beab4SDaniel Malea Line += 1;
4278706a3dSGalina Kistanova LLVM_FALLTHROUGH;
43f83beab4SDaniel Malea case '\r':
44186b85dbSDan Gohman Column = 0;
45f83beab4SDaniel Malea break;
46f83beab4SDaniel Malea case '\t':
47186b85dbSDan Gohman // Assumes tab stop = 8 characters.
48186b85dbSDan Gohman Column += (8 - (Column & 0x7)) & 0x7;
49f83beab4SDaniel Malea break;
50f83beab4SDaniel Malea }
51e80b81d1SOliver Stannard };
52e80b81d1SOliver Stannard
53e80b81d1SOliver Stannard // If we have a partial UTF-8 sequence from the previous buffer, check that
54e80b81d1SOliver Stannard // first.
55e80b81d1SOliver Stannard if (PartialUTF8Char.size()) {
56e80b81d1SOliver Stannard size_t BytesFromBuffer =
57e80b81d1SOliver Stannard getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
58e80b81d1SOliver Stannard if (Size < BytesFromBuffer) {
59e80b81d1SOliver Stannard // If we still don't have enough bytes for a complete code point, just
60e80b81d1SOliver Stannard // append what we have.
61e80b81d1SOliver Stannard PartialUTF8Char.append(StringRef(Ptr, Size));
62e80b81d1SOliver Stannard return;
63e80b81d1SOliver Stannard } else {
64e80b81d1SOliver Stannard // The first few bytes from the buffer will complete the code point.
65e80b81d1SOliver Stannard // Concatenate them and process their effect on the line and column
66e80b81d1SOliver Stannard // numbers.
67e80b81d1SOliver Stannard PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
68e80b81d1SOliver Stannard ProcessUTF8CodePoint(PartialUTF8Char);
69e80b81d1SOliver Stannard PartialUTF8Char.clear();
70e80b81d1SOliver Stannard Ptr += BytesFromBuffer;
71e80b81d1SOliver Stannard Size -= BytesFromBuffer;
72e80b81d1SOliver Stannard }
73e80b81d1SOliver Stannard }
74e80b81d1SOliver Stannard
75e80b81d1SOliver Stannard // Now scan the rest of the buffer.
76e80b81d1SOliver Stannard unsigned NumBytes;
77e80b81d1SOliver Stannard for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
78e80b81d1SOliver Stannard NumBytes = getNumBytesForUTF8(*Ptr);
79e80b81d1SOliver Stannard
80e80b81d1SOliver Stannard // The buffer might end part way through a UTF-8 code unit sequence for a
81e80b81d1SOliver Stannard // Unicode scalar value if it got flushed. If this happens, we can't know
82e80b81d1SOliver Stannard // the display width until we see the rest of the code point. Stash the
83e80b81d1SOliver Stannard // bytes we do have, so that we can reconstruct the whole code point later,
84e80b81d1SOliver Stannard // even if the buffer is being flushed.
85*a50c7ebfSOliver Stannard if ((unsigned)(End - Ptr) < NumBytes) {
86e80b81d1SOliver Stannard PartialUTF8Char = StringRef(Ptr, End - Ptr);
87e80b81d1SOliver Stannard return;
88e80b81d1SOliver Stannard }
89e80b81d1SOliver Stannard
90e80b81d1SOliver Stannard ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
91f83beab4SDaniel Malea }
92ea2f1cebSDavid Greene }
93ea2f1cebSDavid Greene
94f83beab4SDaniel Malea /// ComputePosition - Examine the current output and update line and column
95f83beab4SDaniel Malea /// counts.
ComputePosition(const char * Ptr,size_t Size)96f83beab4SDaniel Malea void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
9717a6fd22SDaniel Dunbar // If our previous scan pointer is inside the buffer, assume we already
9817a6fd22SDaniel Dunbar // scanned those bytes. This depends on raw_ostream to not change our buffer
9917a6fd22SDaniel Dunbar // in unexpected ways.
100f83beab4SDaniel Malea if (Ptr <= Scanned && Scanned <= Ptr + Size)
10117a6fd22SDaniel Dunbar // Scan all characters added since our last scan to determine the new
10217a6fd22SDaniel Dunbar // column.
103e80b81d1SOliver Stannard UpdatePosition(Scanned, Size - (Scanned - Ptr));
104f83beab4SDaniel Malea else
105e80b81d1SOliver Stannard UpdatePosition(Ptr, Size);
10617a6fd22SDaniel Dunbar
10717a6fd22SDaniel Dunbar // Update the scanning pointer.
10817a6fd22SDaniel Dunbar Scanned = Ptr + Size;
109c97b778bSDavid Greene }
110c97b778bSDavid Greene
11120f6ac07SDavid Greene /// PadToColumn - Align the output to some column number.
112c97b778bSDavid Greene ///
11320f6ac07SDavid Greene /// \param NewCol - The column to move to.
114c97b778bSDavid Greene ///
PadToColumn(unsigned NewCol)115f733d758SChris Lattner formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
116ea2f1cebSDavid Greene // Figure out what's in the buffer and add it to the column count.
117f83beab4SDaniel Malea ComputePosition(getBufferStart(), GetNumBytesInBuffer());
118c97b778bSDavid Greene
11920f6ac07SDavid Greene // Output spaces until we reach the desired column.
120f83beab4SDaniel Malea indent(std::max(int(NewCol - getColumn()), 1));
121f733d758SChris Lattner return *this;
122c97b778bSDavid Greene }
12320f6ac07SDavid Greene
write_impl(const char * Ptr,size_t Size)124250635e3SDan Gohman void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
125186b85dbSDan Gohman // Figure out what's in the buffer and add it to the column count.
126f83beab4SDaniel Malea ComputePosition(Ptr, Size);
127186b85dbSDan Gohman
128186b85dbSDan Gohman // Write the data to the underlying stream (which is unbuffered, so
129186b85dbSDan Gohman // the data will be immediately written out).
130250635e3SDan Gohman TheStream->write(Ptr, Size);
131186b85dbSDan Gohman
13217a6fd22SDaniel Dunbar // Reset the scanning pointer.
133c10719f5SCraig Topper Scanned = nullptr;
134250635e3SDan Gohman }
135250635e3SDan Gohman
136a31f96cfSDavid Greene /// fouts() - This returns a reference to a formatted_raw_ostream for
137a31f96cfSDavid Greene /// standard output. Use it like: fouts() << "foo" << "bar";
fouts()138a31f96cfSDavid Greene formatted_raw_ostream &llvm::fouts() {
139a31f96cfSDavid Greene static formatted_raw_ostream S(outs());
140a31f96cfSDavid Greene return S;
141a31f96cfSDavid Greene }
142a31f96cfSDavid Greene
143a31f96cfSDavid Greene /// ferrs() - This returns a reference to a formatted_raw_ostream for
144a31f96cfSDavid Greene /// standard error. Use it like: ferrs() << "foo" << "bar";
ferrs()145a31f96cfSDavid Greene formatted_raw_ostream &llvm::ferrs() {
146a31f96cfSDavid Greene static formatted_raw_ostream S(errs());
147a31f96cfSDavid Greene return S;
148a31f96cfSDavid Greene }
14993a522baSDavid Greene
15093a522baSDavid Greene /// fdbgs() - This returns a reference to a formatted_raw_ostream for
15193a522baSDavid Greene /// the debug stream. Use it like: fdbgs() << "foo" << "bar";
fdbgs()15293a522baSDavid Greene formatted_raw_ostream &llvm::fdbgs() {
15393a522baSDavid Greene static formatted_raw_ostream S(dbgs());
15493a522baSDavid Greene return S;
15593a522baSDavid Greene }
156