1 #include <cctype> 2 #include <cstdio> 3 #include <cstdlib> 4 #include <map> 5 #include <memory> 6 #include <string> 7 #include <utility> 8 #include <vector> 9 10 //===----------------------------------------------------------------------===// 11 // Lexer 12 //===----------------------------------------------------------------------===// 13 14 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 15 // of these for known things. 16 enum Token { 17 tok_eof = -1, 18 19 // commands 20 tok_def = -2, 21 tok_extern = -3, 22 23 // primary 24 tok_identifier = -4, 25 tok_number = -5 26 }; 27 28 static std::string IdentifierStr; // Filled in if tok_identifier 29 static double NumVal; // Filled in if tok_number 30 31 /// gettok - Return the next token from standard input. 32 static int gettok() { 33 static int LastChar = ' '; 34 35 // Skip any whitespace. 36 while (isspace(LastChar)) 37 LastChar = getchar(); 38 39 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 40 IdentifierStr = LastChar; 41 while (isalnum((LastChar = getchar()))) 42 IdentifierStr += LastChar; 43 44 if (IdentifierStr == "def") 45 return tok_def; 46 if (IdentifierStr == "extern") 47 return tok_extern; 48 return tok_identifier; 49 } 50 51 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 52 std::string NumStr; 53 do { 54 NumStr += LastChar; 55 LastChar = getchar(); 56 } while (isdigit(LastChar) || LastChar == '.'); 57 58 NumVal = strtod(NumStr.c_str(), nullptr); 59 return tok_number; 60 } 61 62 if (LastChar == '#') { 63 // Comment until end of line. 64 do 65 LastChar = getchar(); 66 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 67 68 if (LastChar != EOF) 69 return gettok(); 70 } 71 72 // Check for end of file. Don't eat the EOF. 73 if (LastChar == EOF) 74 return tok_eof; 75 76 // Otherwise, just return the character as its ascii value. 77 int ThisChar = LastChar; 78 LastChar = getchar(); 79 return ThisChar; 80 } 81 82 //===----------------------------------------------------------------------===// 83 // Abstract Syntax Tree (aka Parse Tree) 84 //===----------------------------------------------------------------------===// 85 86 namespace { 87 88 /// ExprAST - Base class for all expression nodes. 89 class ExprAST { 90 public: 91 virtual ~ExprAST() = default; 92 }; 93 94 /// NumberExprAST - Expression class for numeric literals like "1.0". 95 class NumberExprAST : public ExprAST { 96 double Val; 97 98 public: 99 NumberExprAST(double Val) : Val(Val) {} 100 }; 101 102 /// VariableExprAST - Expression class for referencing a variable, like "a". 103 class VariableExprAST : public ExprAST { 104 std::string Name; 105 106 public: 107 VariableExprAST(const std::string &Name) : Name(Name) {} 108 }; 109 110 /// BinaryExprAST - Expression class for a binary operator. 111 class BinaryExprAST : public ExprAST { 112 char Op; 113 std::unique_ptr<ExprAST> LHS, RHS; 114 115 public: 116 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 117 std::unique_ptr<ExprAST> RHS) 118 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 119 }; 120 121 /// CallExprAST - Expression class for function calls. 122 class CallExprAST : public ExprAST { 123 std::string Callee; 124 std::vector<std::unique_ptr<ExprAST>> Args; 125 126 public: 127 CallExprAST(const std::string &Callee, 128 std::vector<std::unique_ptr<ExprAST>> Args) 129 : Callee(Callee), Args(std::move(Args)) {} 130 }; 131 132 /// PrototypeAST - This class represents the "prototype" for a function, 133 /// which captures its name, and its argument names (thus implicitly the number 134 /// of arguments the function takes). 135 class PrototypeAST { 136 std::string Name; 137 std::vector<std::string> Args; 138 139 public: 140 PrototypeAST(const std::string &Name, std::vector<std::string> Args) 141 : Name(Name), Args(std::move(Args)) {} 142 143 const std::string &getName() const { return Name; } 144 }; 145 146 /// FunctionAST - This class represents a function definition itself. 147 class FunctionAST { 148 std::unique_ptr<PrototypeAST> Proto; 149 std::unique_ptr<ExprAST> Body; 150 151 public: 152 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 153 std::unique_ptr<ExprAST> Body) 154 : Proto(std::move(Proto)), Body(std::move(Body)) {} 155 }; 156 157 } // end anonymous namespace 158 159 //===----------------------------------------------------------------------===// 160 // Parser 161 //===----------------------------------------------------------------------===// 162 163 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 164 /// token the parser is looking at. getNextToken reads another token from the 165 /// lexer and updates CurTok with its results. 166 static int CurTok; 167 static int getNextToken() { return CurTok = gettok(); } 168 169 /// BinopPrecedence - This holds the precedence for each binary operator that is 170 /// defined. 171 static std::map<char, int> BinopPrecedence; 172 173 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 174 static int GetTokPrecedence() { 175 if (!isascii(CurTok)) 176 return -1; 177 178 // Make sure it's a declared binop. 179 int TokPrec = BinopPrecedence[CurTok]; 180 if (TokPrec <= 0) 181 return -1; 182 return TokPrec; 183 } 184 185 /// LogError* - These are little helper functions for error handling. 186 std::unique_ptr<ExprAST> LogError(const char *Str) { 187 fprintf(stderr, "Error: %s\n", Str); 188 return nullptr; 189 } 190 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 191 LogError(Str); 192 return nullptr; 193 } 194 195 static std::unique_ptr<ExprAST> ParseExpression(); 196 197 /// numberexpr ::= number 198 static std::unique_ptr<ExprAST> ParseNumberExpr() { 199 auto Result = std::make_unique<NumberExprAST>(NumVal); 200 getNextToken(); // consume the number 201 return std::move(Result); 202 } 203 204 /// parenexpr ::= '(' expression ')' 205 static std::unique_ptr<ExprAST> ParseParenExpr() { 206 getNextToken(); // eat (. 207 auto V = ParseExpression(); 208 if (!V) 209 return nullptr; 210 211 if (CurTok != ')') 212 return LogError("expected ')'"); 213 getNextToken(); // eat ). 214 return V; 215 } 216 217 /// identifierexpr 218 /// ::= identifier 219 /// ::= identifier '(' expression* ')' 220 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 221 std::string IdName = IdentifierStr; 222 223 getNextToken(); // eat identifier. 224 225 if (CurTok != '(') // Simple variable ref. 226 return std::make_unique<VariableExprAST>(IdName); 227 228 // Call. 229 getNextToken(); // eat ( 230 std::vector<std::unique_ptr<ExprAST>> Args; 231 if (CurTok != ')') { 232 while (true) { 233 if (auto Arg = ParseExpression()) 234 Args.push_back(std::move(Arg)); 235 else 236 return nullptr; 237 238 if (CurTok == ')') 239 break; 240 241 if (CurTok != ',') 242 return LogError("Expected ')' or ',' in argument list"); 243 getNextToken(); 244 } 245 } 246 247 // Eat the ')'. 248 getNextToken(); 249 250 return std::make_unique<CallExprAST>(IdName, std::move(Args)); 251 } 252 253 /// primary 254 /// ::= identifierexpr 255 /// ::= numberexpr 256 /// ::= parenexpr 257 static std::unique_ptr<ExprAST> ParsePrimary() { 258 switch (CurTok) { 259 default: 260 return LogError("unknown token when expecting an expression"); 261 case tok_identifier: 262 return ParseIdentifierExpr(); 263 case tok_number: 264 return ParseNumberExpr(); 265 case '(': 266 return ParseParenExpr(); 267 } 268 } 269 270 /// binoprhs 271 /// ::= ('+' primary)* 272 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 273 std::unique_ptr<ExprAST> LHS) { 274 // If this is a binop, find its precedence. 275 while (true) { 276 int TokPrec = GetTokPrecedence(); 277 278 // If this is a binop that binds at least as tightly as the current binop, 279 // consume it, otherwise we are done. 280 if (TokPrec < ExprPrec) 281 return LHS; 282 283 // Okay, we know this is a binop. 284 int BinOp = CurTok; 285 getNextToken(); // eat binop 286 287 // Parse the primary expression after the binary operator. 288 auto RHS = ParsePrimary(); 289 if (!RHS) 290 return nullptr; 291 292 // If BinOp binds less tightly with RHS than the operator after RHS, let 293 // the pending operator take RHS as its LHS. 294 int NextPrec = GetTokPrecedence(); 295 if (TokPrec < NextPrec) { 296 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 297 if (!RHS) 298 return nullptr; 299 } 300 301 // Merge LHS/RHS. 302 LHS = 303 std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 304 } 305 } 306 307 /// expression 308 /// ::= primary binoprhs 309 /// 310 static std::unique_ptr<ExprAST> ParseExpression() { 311 auto LHS = ParsePrimary(); 312 if (!LHS) 313 return nullptr; 314 315 return ParseBinOpRHS(0, std::move(LHS)); 316 } 317 318 /// prototype 319 /// ::= id '(' id* ')' 320 static std::unique_ptr<PrototypeAST> ParsePrototype() { 321 if (CurTok != tok_identifier) 322 return LogErrorP("Expected function name in prototype"); 323 324 std::string FnName = IdentifierStr; 325 getNextToken(); 326 327 if (CurTok != '(') 328 return LogErrorP("Expected '(' in prototype"); 329 330 std::vector<std::string> ArgNames; 331 while (getNextToken() == tok_identifier) 332 ArgNames.push_back(IdentifierStr); 333 if (CurTok != ')') 334 return LogErrorP("Expected ')' in prototype"); 335 336 // success. 337 getNextToken(); // eat ')'. 338 339 return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); 340 } 341 342 /// definition ::= 'def' prototype expression 343 static std::unique_ptr<FunctionAST> ParseDefinition() { 344 getNextToken(); // eat def. 345 auto Proto = ParsePrototype(); 346 if (!Proto) 347 return nullptr; 348 349 if (auto E = ParseExpression()) 350 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 351 return nullptr; 352 } 353 354 /// toplevelexpr ::= expression 355 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 356 if (auto E = ParseExpression()) { 357 // Make an anonymous proto. 358 auto Proto = std::make_unique<PrototypeAST>("__anon_expr", 359 std::vector<std::string>()); 360 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 361 } 362 return nullptr; 363 } 364 365 /// external ::= 'extern' prototype 366 static std::unique_ptr<PrototypeAST> ParseExtern() { 367 getNextToken(); // eat extern. 368 return ParsePrototype(); 369 } 370 371 //===----------------------------------------------------------------------===// 372 // Top-Level parsing 373 //===----------------------------------------------------------------------===// 374 375 static void HandleDefinition() { 376 if (ParseDefinition()) { 377 fprintf(stderr, "Parsed a function definition.\n"); 378 } else { 379 // Skip token for error recovery. 380 getNextToken(); 381 } 382 } 383 384 static void HandleExtern() { 385 if (ParseExtern()) { 386 fprintf(stderr, "Parsed an extern\n"); 387 } else { 388 // Skip token for error recovery. 389 getNextToken(); 390 } 391 } 392 393 static void HandleTopLevelExpression() { 394 // Evaluate a top-level expression into an anonymous function. 395 if (ParseTopLevelExpr()) { 396 fprintf(stderr, "Parsed a top-level expr\n"); 397 } else { 398 // Skip token for error recovery. 399 getNextToken(); 400 } 401 } 402 403 /// top ::= definition | external | expression | ';' 404 static void MainLoop() { 405 while (true) { 406 fprintf(stderr, "ready> "); 407 switch (CurTok) { 408 case tok_eof: 409 return; 410 case ';': // ignore top-level semicolons. 411 getNextToken(); 412 break; 413 case tok_def: 414 HandleDefinition(); 415 break; 416 case tok_extern: 417 HandleExtern(); 418 break; 419 default: 420 HandleTopLevelExpression(); 421 break; 422 } 423 } 424 } 425 426 //===----------------------------------------------------------------------===// 427 // Main driver code. 428 //===----------------------------------------------------------------------===// 429 430 int main() { 431 // Install standard binary operators. 432 // 1 is lowest precedence. 433 BinopPrecedence['<'] = 10; 434 BinopPrecedence['+'] = 20; 435 BinopPrecedence['-'] = 20; 436 BinopPrecedence['*'] = 40; // highest. 437 438 // Prime the first token. 439 fprintf(stderr, "ready> "); 440 getNextToken(); 441 442 // Run the main "interpreter loop" now. 443 MainLoop(); 444 445 return 0; 446 } 447