1 //===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an abstract TokenAnalyzer and associated helper
12 /// classes. TokenAnalyzer can be extended to generate replacements based on
13 /// an annotated and pre-processed token stream.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "TokenAnalyzer.h"
18 #include "AffectedRangeManager.h"
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "FormatTokenLexer.h"
22 #include "TokenAnnotator.h"
23 #include "UnwrappedLineParser.h"
24 #include "clang/Basic/Diagnostic.h"
25 #include "clang/Basic/DiagnosticOptions.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Format/Format.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/Support/Debug.h"
31 
32 #define DEBUG_TYPE "format-formatter"
33 
34 namespace clang {
35 namespace format {
36 
37 // This sets up an virtual file system with file \p FileName containing \p
38 // Code.
39 std::unique_ptr<Environment>
40 Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
41                                       ArrayRef<tooling::Range> Ranges) {
42   // This is referenced by `FileMgr` and will be released by `FileMgr` when it
43   // is deleted.
44   IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
45       new vfs::InMemoryFileSystem);
46   // This is passed to `SM` as reference, so the pointer has to be referenced
47   // in `Environment` so that `FileMgr` can out-live this function scope.
48   std::unique_ptr<FileManager> FileMgr(
49       new FileManager(FileSystemOptions(), InMemoryFileSystem));
50   // This is passed to `SM` as reference, so the pointer has to be referenced
51   // by `Environment` due to the same reason above.
52   std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
53       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
54       new DiagnosticOptions));
55   // This will be stored as reference, so the pointer has to be stored in
56   // due to the same reason above.
57   std::unique_ptr<SourceManager> VirtualSM(
58       new SourceManager(*Diagnostics, *FileMgr));
59   InMemoryFileSystem->addFile(
60       FileName, 0,
61       llvm::MemoryBuffer::getMemBuffer(Code, FileName,
62                                        /*RequiresNullTerminator=*/false));
63   FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),
64                                       SourceLocation(), clang::SrcMgr::C_User);
65   assert(ID.isValid());
66   SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
67   std::vector<CharSourceRange> CharRanges;
68   for (const tooling::Range &Range : Ranges) {
69     SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
70     SourceLocation End = Start.getLocWithOffset(Range.getLength());
71     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
72   }
73   return llvm::make_unique<Environment>(ID, std::move(FileMgr),
74                                         std::move(VirtualSM),
75                                         std::move(Diagnostics), CharRanges);
76 }
77 
78 TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
79     : Style(Style), Env(Env),
80       AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
81       UnwrappedLines(1),
82       Encoding(encoding::detectEncoding(
83           Env.getSourceManager().getBufferData(Env.getFileID()))) {
84   DEBUG(
85       llvm::dbgs() << "File encoding: "
86                    << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
87                    << "\n");
88   DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
89                      << "\n");
90 }
91 
92 tooling::Replacements TokenAnalyzer::process() {
93   tooling::Replacements Result;
94   FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
95                           Encoding);
96 
97   UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this);
98   Parser.parse();
99   assert(UnwrappedLines.rbegin()->empty());
100   for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
101     DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
102     SmallVector<AnnotatedLine *, 16> AnnotatedLines;
103 
104     TokenAnnotator Annotator(Style, Tokens.getKeywords());
105     for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
106       AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
107       Annotator.annotate(*AnnotatedLines.back());
108     }
109 
110     tooling::Replacements RunResult =
111         analyze(Annotator, AnnotatedLines, Tokens);
112 
113     DEBUG({
114       llvm::dbgs() << "Replacements for run " << Run << ":\n";
115       for (tooling::Replacements::const_iterator I = RunResult.begin(),
116                                                  E = RunResult.end();
117            I != E; ++I) {
118         llvm::dbgs() << I->toString() << "\n";
119       }
120     });
121     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
122       delete AnnotatedLines[i];
123     }
124     for (const auto &R : RunResult) {
125       auto Err = Result.add(R);
126       // FIXME: better error handling here. For now, simply return an empty
127       // Replacements to indicate failure.
128       if (Err) {
129         llvm::errs() << llvm::toString(std::move(Err)) << "\n";
130         return tooling::Replacements();
131       }
132     }
133   }
134   return Result;
135 }
136 
137 void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
138   assert(!UnwrappedLines.empty());
139   UnwrappedLines.back().push_back(TheLine);
140 }
141 
142 void TokenAnalyzer::finishRun() {
143   UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
144 }
145 
146 } // end namespace format
147 } // end namespace clang
148