1 //===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang-pseudo/DirectiveTree.h" 10 #include "clang/Basic/IdentifierTable.h" 11 #include "clang/Basic/TokenKinds.h" 12 #include "llvm/Support/FormatVariadic.h" 13 14 namespace clang { 15 namespace pseudo { 16 namespace { 17 18 class DirectiveParser { 19 public: 20 explicit DirectiveParser(const TokenStream &Code) 21 : Code(Code), Tok(&Code.front()) {} 22 void parse(DirectiveTree *Result) { parse(Result, /*TopLevel=*/true); } 23 24 private: 25 // Roles that a directive might take within a conditional block. 26 enum class Cond { None, If, Else, End }; 27 static Cond classifyDirective(tok::PPKeywordKind K) { 28 switch (K) { 29 case clang::tok::pp_if: 30 case clang::tok::pp_ifdef: 31 case clang::tok::pp_ifndef: 32 return Cond::If; 33 case clang::tok::pp_elif: 34 case clang::tok::pp_elifdef: 35 case clang::tok::pp_elifndef: 36 case clang::tok::pp_else: 37 return Cond::Else; 38 case clang::tok::pp_endif: 39 return Cond::End; 40 default: 41 return Cond::None; 42 } 43 } 44 45 // Parses tokens starting at Tok into Tree. 46 // If we reach an End or Else directive that ends Tree, returns it. 47 // If TopLevel is true, then we do not expect End and always return None. 48 llvm::Optional<DirectiveTree::Directive> parse(DirectiveTree *Tree, 49 bool TopLevel) { 50 auto StartsDirective = 51 [&, AllowDirectiveAt((const Token *)nullptr)]() mutable { 52 if (Tok->flag(LexFlags::StartsPPLine)) { 53 // If we considered a comment at the start of a PP-line, it doesn't 54 // start a directive but the directive can still start after it. 55 if (Tok->Kind == tok::comment) 56 AllowDirectiveAt = Tok + 1; 57 return Tok->Kind == tok::hash; 58 } 59 return Tok->Kind == tok::hash && AllowDirectiveAt == Tok; 60 }; 61 // Each iteration adds one chunk (or returns, if we see #endif). 62 while (Tok->Kind != tok::eof) { 63 // If there's no directive here, we have a code chunk. 64 if (!StartsDirective()) { 65 const Token *Start = Tok; 66 do 67 ++Tok; 68 while (Tok->Kind != tok::eof && !StartsDirective()); 69 Tree->Chunks.push_back(DirectiveTree::Code{ 70 Token::Range{Code.index(*Start), Code.index(*Tok)}}); 71 continue; 72 } 73 74 // We have some kind of directive. 75 DirectiveTree::Directive Directive; 76 parseDirective(&Directive); 77 Cond Kind = classifyDirective(Directive.Kind); 78 if (Kind == Cond::If) { 79 // #if or similar, starting a nested conditional block. 80 DirectiveTree::Conditional Conditional; 81 Conditional.Branches.emplace_back(); 82 Conditional.Branches.back().first = std::move(Directive); 83 parseConditional(&Conditional); 84 Tree->Chunks.push_back(std::move(Conditional)); 85 } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) { 86 // #endif or similar, ending this PStructure scope. 87 // (#endif is unexpected at the top level, treat as simple directive). 88 return std::move(Directive); 89 } else { 90 // #define or similar, a simple directive at the current scope. 91 Tree->Chunks.push_back(std::move(Directive)); 92 } 93 } 94 return None; 95 } 96 97 // Parse the rest of a conditional section, after seeing the If directive. 98 // Returns after consuming the End directive. 99 void parseConditional(DirectiveTree::Conditional *C) { 100 assert(C->Branches.size() == 1 && 101 C->Branches.front().second.Chunks.empty() && 102 "Should be ready to parse first branch body"); 103 while (Tok->Kind != tok::eof) { 104 auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false); 105 if (!Terminator) { 106 assert(Tok->Kind == tok::eof && "gave up parsing before eof?"); 107 C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok)); 108 return; 109 } 110 if (classifyDirective(Terminator->Kind) == Cond::End) { 111 C->End = std::move(*Terminator); 112 return; 113 } 114 assert(classifyDirective(Terminator->Kind) == Cond::Else && 115 "ended branch unexpectedly"); 116 C->Branches.emplace_back(); 117 C->Branches.back().first = std::move(*Terminator); 118 } 119 } 120 121 // Parse a directive. Tok is the hash. 122 void parseDirective(DirectiveTree::Directive *D) { 123 assert(Tok->Kind == tok::hash); 124 125 // Directive spans from the hash until the end of line or file. 126 const Token *Begin = Tok++; 127 while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine)) 128 ++Tok; 129 ArrayRef<Token> Tokens{Begin, Tok}; 130 D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())}; 131 132 // Directive name is the first non-comment token after the hash. 133 Tokens = Tokens.drop_front().drop_while( 134 [](const Token &T) { return T.Kind == tok::comment; }); 135 if (!Tokens.empty()) 136 D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID(); 137 } 138 139 const TokenStream &Code; 140 const Token *Tok; 141 clang::IdentifierTable PPKeywords; 142 }; 143 144 } // namespace 145 146 DirectiveTree DirectiveTree::parse(const TokenStream &Code) { 147 DirectiveTree Result; 148 DirectiveParser(Code).parse(&Result); 149 return Result; 150 } 151 152 static void dump(llvm::raw_ostream &OS, const DirectiveTree &, unsigned Indent); 153 static void dump(llvm::raw_ostream &OS, 154 const DirectiveTree::Directive &Directive, unsigned Indent, 155 bool Taken = false) { 156 OS.indent(Indent) << llvm::formatv( 157 "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind), 158 Directive.Tokens.size(), Taken ? " TAKEN" : ""); 159 } 160 static void dump(llvm::raw_ostream &OS, const DirectiveTree::Code &Code, 161 unsigned Indent) { 162 OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n", Code.Tokens.size()); 163 } 164 static void dump(llvm::raw_ostream &OS, 165 const DirectiveTree::Conditional &Conditional, 166 unsigned Indent) { 167 for (unsigned I = 0; I < Conditional.Branches.size(); ++I) { 168 const auto &Branch = Conditional.Branches[I]; 169 dump(OS, Branch.first, Indent, Conditional.Taken == I); 170 dump(OS, Branch.second, Indent + 2); 171 } 172 dump(OS, Conditional.End, Indent); 173 } 174 175 static void dump(llvm::raw_ostream &OS, const DirectiveTree::Chunk &Chunk, 176 unsigned Indent) { 177 switch (Chunk.kind()) { 178 case DirectiveTree::Chunk::K_Empty: 179 llvm_unreachable("invalid chunk"); 180 case DirectiveTree::Chunk::K_Code: 181 return dump(OS, (const DirectiveTree::Code &)Chunk, Indent); 182 case DirectiveTree::Chunk::K_Directive: 183 return dump(OS, (const DirectiveTree::Directive &)Chunk, Indent); 184 case DirectiveTree::Chunk::K_Conditional: 185 return dump(OS, (const DirectiveTree::Conditional &)Chunk, Indent); 186 } 187 } 188 189 static void dump(llvm::raw_ostream &OS, const DirectiveTree &Tree, 190 unsigned Indent) { 191 for (const auto &Chunk : Tree.Chunks) 192 dump(OS, Chunk, Indent); 193 } 194 195 // Define operator<< in terms of dump() functions above. 196 #define OSTREAM_DUMP(Type) \ 197 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \ 198 dump(OS, T, 0); \ 199 return OS; \ 200 } 201 OSTREAM_DUMP(DirectiveTree) 202 OSTREAM_DUMP(DirectiveTree::Chunk) 203 OSTREAM_DUMP(DirectiveTree::Directive) 204 OSTREAM_DUMP(DirectiveTree::Conditional) 205 OSTREAM_DUMP(DirectiveTree::Code) 206 #undef OSTREAM_DUMP 207 208 namespace { 209 // Makes choices about conditional branches. 210 // 211 // Generally it tries to maximize the amount of useful code we see. 212 // 213 // Caveat: each conditional is evaluated independently. Consider this code: 214 // #ifdef WINDOWS 215 // bool isWindows = true; 216 // #endif 217 // #ifndef WINDOWS 218 // bool isWindows = false; 219 // #endif 220 // We take both branches and define isWindows twice. We could track more state 221 // in order to produce a consistent view, but this is complex. 222 class BranchChooser { 223 public: 224 BranchChooser(const TokenStream &Code) : Code(Code) {} 225 226 void choose(DirectiveTree &M) { walk(M); } 227 228 private: 229 // Describes code seen by making particular branch choices. Higher is better. 230 struct Score { 231 int Tokens = 0; // excluding comments and directives 232 int Directives = 0; 233 int Errors = 0; // #error directives 234 235 bool operator>(const Score &Other) const { 236 // Seeing errors is bad, other things are good. 237 return std::make_tuple(-Errors, Tokens, Directives) > 238 std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives); 239 } 240 241 Score &operator+=(const Score &Other) { 242 Tokens += Other.Tokens; 243 Directives += Other.Directives; 244 Errors += Other.Errors; 245 return *this; 246 } 247 }; 248 249 Score walk(DirectiveTree::Code &C) { 250 Score S; 251 for (const Token &T : Code.tokens(C.Tokens)) 252 if (T.Kind != tok::comment) 253 ++S.Tokens; 254 return S; 255 } 256 257 Score walk(DirectiveTree::Directive &D) { 258 Score S; 259 S.Directives = 1; 260 S.Errors = D.Kind == tok::pp_error; 261 return S; 262 } 263 264 Score walk(DirectiveTree::Chunk &C) { 265 switch (C.kind()) { 266 case DirectiveTree::Chunk::K_Code: 267 return walk((DirectiveTree::Code &)C); 268 case DirectiveTree::Chunk::K_Directive: 269 return walk((DirectiveTree::Directive &)C); 270 case DirectiveTree::Chunk::K_Conditional: 271 return walk((DirectiveTree::Conditional &)C); 272 case DirectiveTree::Chunk::K_Empty: 273 break; 274 } 275 llvm_unreachable("bad chunk kind"); 276 } 277 278 Score walk(DirectiveTree &M) { 279 Score S; 280 for (DirectiveTree::Chunk &C : M.Chunks) 281 S += walk(C); 282 return S; 283 } 284 285 Score walk(DirectiveTree::Conditional &C) { 286 Score Best; 287 bool MayTakeTrivial = true; 288 bool TookTrivial = false; 289 290 for (unsigned I = 0; I < C.Branches.size(); ++I) { 291 // Walk the branch to make its nested choices in any case. 292 Score BranchScore = walk(C.Branches[I].second); 293 // If we already took an #if 1, don't consider any other branches. 294 if (TookTrivial) 295 continue; 296 // Is this a trivial #if 0 or #if 1? 297 if (auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) { 298 if (!*TriviallyTaken) 299 continue; // Don't consider #if 0 even if it scores well. 300 if (MayTakeTrivial) 301 TookTrivial = true; 302 } else { 303 // After a nontrivial condition, #elif 1 isn't guaranteed taken. 304 MayTakeTrivial = false; 305 } 306 // Is this the best branch so far? (Including if it's #if 1). 307 if (TookTrivial || !C.Taken.hasValue() || BranchScore > Best) { 308 Best = BranchScore; 309 C.Taken = I; 310 } 311 } 312 return Best; 313 } 314 315 // Return true if the directive starts an always-taken conditional branch, 316 // false if the branch is never taken, and None otherwise. 317 llvm::Optional<bool> isTakenWhenReached(const DirectiveTree::Directive &Dir) { 318 switch (Dir.Kind) { 319 case clang::tok::pp_if: 320 case clang::tok::pp_elif: 321 break; // handled below 322 case clang::tok::pp_else: 323 return true; 324 default: // #ifdef etc 325 return llvm::None; 326 } 327 328 const auto &Tokens = Code.tokens(Dir.Tokens); 329 assert(!Tokens.empty() && Tokens.front().Kind == tok::hash); 330 const Token &Name = Tokens.front().nextNC(); 331 const Token &Value = Name.nextNC(); 332 // Does the condition consist of exactly one token? 333 if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end()) 334 return llvm::None; 335 return llvm::StringSwitch<llvm::Optional<bool>>(Value.text()) 336 .Cases("true", "1", true) 337 .Cases("false", "0", false) 338 .Default(llvm::None); 339 } 340 341 const TokenStream &Code; 342 }; 343 344 } // namespace 345 346 void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) { 347 BranchChooser{Code}.choose(Tree); 348 } 349 350 namespace { 351 class Preprocessor { 352 const TokenStream &In; 353 TokenStream &Out; 354 355 public: 356 Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {} 357 ~Preprocessor() { Out.finalize(); } 358 359 void walk(const DirectiveTree &T) { 360 for (const auto &C : T.Chunks) 361 walk(C); 362 } 363 364 void walk(const DirectiveTree::Chunk &C) { 365 switch (C.kind()) { 366 case DirectiveTree::Chunk::K_Code: 367 return walk((const DirectiveTree::Code &)C); 368 case DirectiveTree::Chunk::K_Directive: 369 return walk((const DirectiveTree::Directive &)C); 370 case DirectiveTree::Chunk::K_Conditional: 371 return walk((const DirectiveTree::Conditional &)C); 372 case DirectiveTree::Chunk::K_Empty: 373 break; 374 } 375 llvm_unreachable("bad chunk kind"); 376 } 377 378 void walk(const DirectiveTree::Code &C) { 379 for (const auto &Tok : In.tokens(C.Tokens)) 380 Out.push(Tok); 381 } 382 383 void walk(const DirectiveTree::Directive &) {} 384 385 void walk(const DirectiveTree::Conditional &C) { 386 if (C.Taken) 387 walk(C.Branches[*C.Taken].second); 388 } 389 }; 390 } // namespace 391 392 TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const { 393 TokenStream Out; 394 Preprocessor(In, Out).walk(*this); 395 return Out; 396 } 397 398 } // namespace pseudo 399 } // namespace clang 400