1 #include "llvm/ADT/STLExtras.h" 2 #include <algorithm> 3 #include <cctype> 4 #include <cstdio> 5 #include <cstdlib> 6 #include <map> 7 #include <memory> 8 #include <string> 9 #include <vector> 10 11 //===----------------------------------------------------------------------===// 12 // Lexer 13 //===----------------------------------------------------------------------===// 14 15 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 16 // of these for known things. 17 enum Token { 18 tok_eof = -1, 19 20 // commands 21 tok_def = -2, 22 tok_extern = -3, 23 24 // primary 25 tok_identifier = -4, 26 tok_number = -5 27 }; 28 29 static std::string IdentifierStr; // Filled in if tok_identifier 30 static double NumVal; // Filled in if tok_number 31 32 /// gettok - Return the next token from standard input. 33 static int gettok() { 34 static int LastChar = ' '; 35 36 // Skip any whitespace. 37 while (isspace(LastChar)) 38 LastChar = getchar(); 39 40 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 41 IdentifierStr = LastChar; 42 while (isalnum((LastChar = getchar()))) 43 IdentifierStr += LastChar; 44 45 if (IdentifierStr == "def") 46 return tok_def; 47 if (IdentifierStr == "extern") 48 return tok_extern; 49 return tok_identifier; 50 } 51 52 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 53 std::string NumStr; 54 do { 55 NumStr += LastChar; 56 LastChar = getchar(); 57 } while (isdigit(LastChar) || LastChar == '.'); 58 59 NumVal = strtod(NumStr.c_str(), nullptr); 60 return tok_number; 61 } 62 63 if (LastChar == '#') { 64 // Comment until end of line. 65 do 66 LastChar = getchar(); 67 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 68 69 if (LastChar != EOF) 70 return gettok(); 71 } 72 73 // Check for end of file. Don't eat the EOF. 74 if (LastChar == EOF) 75 return tok_eof; 76 77 // Otherwise, just return the character as its ascii value. 78 int ThisChar = LastChar; 79 LastChar = getchar(); 80 return ThisChar; 81 } 82 83 //===----------------------------------------------------------------------===// 84 // Abstract Syntax Tree (aka Parse Tree) 85 //===----------------------------------------------------------------------===// 86 87 namespace { 88 89 /// ExprAST - Base class for all expression nodes. 90 class ExprAST { 91 public: 92 virtual ~ExprAST() = default; 93 }; 94 95 /// NumberExprAST - Expression class for numeric literals like "1.0". 96 class NumberExprAST : public ExprAST { 97 double Val; 98 99 public: 100 NumberExprAST(double Val) : Val(Val) {} 101 }; 102 103 /// VariableExprAST - Expression class for referencing a variable, like "a". 104 class VariableExprAST : public ExprAST { 105 std::string Name; 106 107 public: 108 VariableExprAST(const std::string &Name) : Name(Name) {} 109 }; 110 111 /// BinaryExprAST - Expression class for a binary operator. 112 class BinaryExprAST : public ExprAST { 113 char Op; 114 std::unique_ptr<ExprAST> LHS, RHS; 115 116 public: 117 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 118 std::unique_ptr<ExprAST> RHS) 119 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 120 }; 121 122 /// CallExprAST - Expression class for function calls. 123 class CallExprAST : public ExprAST { 124 std::string Callee; 125 std::vector<std::unique_ptr<ExprAST>> Args; 126 127 public: 128 CallExprAST(const std::string &Callee, 129 std::vector<std::unique_ptr<ExprAST>> Args) 130 : Callee(Callee), Args(std::move(Args)) {} 131 }; 132 133 /// PrototypeAST - This class represents the "prototype" for a function, 134 /// which captures its name, and its argument names (thus implicitly the number 135 /// of arguments the function takes). 136 class PrototypeAST { 137 std::string Name; 138 std::vector<std::string> Args; 139 140 public: 141 PrototypeAST(const std::string &Name, std::vector<std::string> Args) 142 : Name(Name), Args(std::move(Args)) {} 143 }; 144 145 /// FunctionAST - This class represents a function definition itself. 146 class FunctionAST { 147 std::unique_ptr<PrototypeAST> Proto; 148 std::unique_ptr<ExprAST> Body; 149 150 public: 151 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 152 std::unique_ptr<ExprAST> Body) 153 : Proto(std::move(Proto)), Body(std::move(Body)) {} 154 }; 155 156 } // end anonymous namespace 157 158 //===----------------------------------------------------------------------===// 159 // Parser 160 //===----------------------------------------------------------------------===// 161 162 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 163 /// token the parser is looking at. getNextToken reads another token from the 164 /// lexer and updates CurTok with its results. 165 static int CurTok; 166 static int getNextToken() { return CurTok = gettok(); } 167 168 /// BinopPrecedence - This holds the precedence for each binary operator that is 169 /// defined. 170 static std::map<char, int> BinopPrecedence; 171 172 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 173 static int GetTokPrecedence() { 174 if (!isascii(CurTok)) 175 return -1; 176 177 // Make sure it's a declared binop. 178 int TokPrec = BinopPrecedence[CurTok]; 179 if (TokPrec <= 0) 180 return -1; 181 return TokPrec; 182 } 183 184 /// LogError* - These are little helper functions for error handling. 185 std::unique_ptr<ExprAST> LogError(const char *Str) { 186 fprintf(stderr, "Error: %s\n", Str); 187 return nullptr; 188 } 189 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 190 LogError(Str); 191 return nullptr; 192 } 193 194 static std::unique_ptr<ExprAST> ParseExpression(); 195 196 /// numberexpr ::= number 197 static std::unique_ptr<ExprAST> ParseNumberExpr() { 198 auto Result = llvm::make_unique<NumberExprAST>(NumVal); 199 getNextToken(); // consume the number 200 return std::move(Result); 201 } 202 203 /// parenexpr ::= '(' expression ')' 204 static std::unique_ptr<ExprAST> ParseParenExpr() { 205 getNextToken(); // eat (. 206 auto V = ParseExpression(); 207 if (!V) 208 return nullptr; 209 210 if (CurTok != ')') 211 return LogError("expected ')'"); 212 getNextToken(); // eat ). 213 return V; 214 } 215 216 /// identifierexpr 217 /// ::= identifier 218 /// ::= identifier '(' expression* ')' 219 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 220 std::string IdName = IdentifierStr; 221 222 getNextToken(); // eat identifier. 223 224 if (CurTok != '(') // Simple variable ref. 225 return llvm::make_unique<VariableExprAST>(IdName); 226 227 // Call. 228 getNextToken(); // eat ( 229 std::vector<std::unique_ptr<ExprAST>> Args; 230 if (CurTok != ')') { 231 while (true) { 232 if (auto Arg = ParseExpression()) 233 Args.push_back(std::move(Arg)); 234 else 235 return nullptr; 236 237 if (CurTok == ')') 238 break; 239 240 if (CurTok != ',') 241 return LogError("Expected ')' or ',' in argument list"); 242 getNextToken(); 243 } 244 } 245 246 // Eat the ')'. 247 getNextToken(); 248 249 return llvm::make_unique<CallExprAST>(IdName, std::move(Args)); 250 } 251 252 /// primary 253 /// ::= identifierexpr 254 /// ::= numberexpr 255 /// ::= parenexpr 256 static std::unique_ptr<ExprAST> ParsePrimary() { 257 switch (CurTok) { 258 default: 259 return LogError("unknown token when expecting an expression"); 260 case tok_identifier: 261 return ParseIdentifierExpr(); 262 case tok_number: 263 return ParseNumberExpr(); 264 case '(': 265 return ParseParenExpr(); 266 } 267 } 268 269 /// binoprhs 270 /// ::= ('+' primary)* 271 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 272 std::unique_ptr<ExprAST> LHS) { 273 // If this is a binop, find its precedence. 274 while (true) { 275 int TokPrec = GetTokPrecedence(); 276 277 // If this is a binop that binds at least as tightly as the current binop, 278 // consume it, otherwise we are done. 279 if (TokPrec < ExprPrec) 280 return LHS; 281 282 // Okay, we know this is a binop. 283 int BinOp = CurTok; 284 getNextToken(); // eat binop 285 286 // Parse the primary expression after the binary operator. 287 auto RHS = ParsePrimary(); 288 if (!RHS) 289 return nullptr; 290 291 // If BinOp binds less tightly with RHS than the operator after RHS, let 292 // the pending operator take RHS as its LHS. 293 int NextPrec = GetTokPrecedence(); 294 if (TokPrec < NextPrec) { 295 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 296 if (!RHS) 297 return nullptr; 298 } 299 300 // Merge LHS/RHS. 301 LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), 302 std::move(RHS)); 303 } 304 } 305 306 /// expression 307 /// ::= primary binoprhs 308 /// 309 static std::unique_ptr<ExprAST> ParseExpression() { 310 auto LHS = ParsePrimary(); 311 if (!LHS) 312 return nullptr; 313 314 return ParseBinOpRHS(0, std::move(LHS)); 315 } 316 317 /// prototype 318 /// ::= id '(' id* ')' 319 static std::unique_ptr<PrototypeAST> ParsePrototype() { 320 if (CurTok != tok_identifier) 321 return LogErrorP("Expected function name in prototype"); 322 323 std::string FnName = IdentifierStr; 324 getNextToken(); 325 326 if (CurTok != '(') 327 return LogErrorP("Expected '(' in prototype"); 328 329 std::vector<std::string> ArgNames; 330 while (getNextToken() == tok_identifier) 331 ArgNames.push_back(IdentifierStr); 332 if (CurTok != ')') 333 return LogErrorP("Expected ')' in prototype"); 334 335 // success. 336 getNextToken(); // eat ')'. 337 338 return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); 339 } 340 341 /// definition ::= 'def' prototype expression 342 static std::unique_ptr<FunctionAST> ParseDefinition() { 343 getNextToken(); // eat def. 344 auto Proto = ParsePrototype(); 345 if (!Proto) 346 return nullptr; 347 348 if (auto E = ParseExpression()) 349 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 350 return nullptr; 351 } 352 353 /// toplevelexpr ::= expression 354 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 355 if (auto E = ParseExpression()) { 356 // Make an anonymous proto. 357 auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr", 358 std::vector<std::string>()); 359 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 360 } 361 return nullptr; 362 } 363 364 /// external ::= 'extern' prototype 365 static std::unique_ptr<PrototypeAST> ParseExtern() { 366 getNextToken(); // eat extern. 367 return ParsePrototype(); 368 } 369 370 //===----------------------------------------------------------------------===// 371 // Top-Level parsing 372 //===----------------------------------------------------------------------===// 373 374 static void HandleDefinition() { 375 if (ParseDefinition()) { 376 fprintf(stderr, "Parsed a function definition.\n"); 377 } else { 378 // Skip token for error recovery. 379 getNextToken(); 380 } 381 } 382 383 static void HandleExtern() { 384 if (ParseExtern()) { 385 fprintf(stderr, "Parsed an extern\n"); 386 } else { 387 // Skip token for error recovery. 388 getNextToken(); 389 } 390 } 391 392 static void HandleTopLevelExpression() { 393 // Evaluate a top-level expression into an anonymous function. 394 if (ParseTopLevelExpr()) { 395 fprintf(stderr, "Parsed a top-level expr\n"); 396 } else { 397 // Skip token for error recovery. 398 getNextToken(); 399 } 400 } 401 402 /// top ::= definition | external | expression | ';' 403 static void MainLoop() { 404 while (true) { 405 fprintf(stderr, "ready> "); 406 switch (CurTok) { 407 case tok_eof: 408 return; 409 case ';': // ignore top-level semicolons. 410 getNextToken(); 411 break; 412 case tok_def: 413 HandleDefinition(); 414 break; 415 case tok_extern: 416 HandleExtern(); 417 break; 418 default: 419 HandleTopLevelExpression(); 420 break; 421 } 422 } 423 } 424 425 //===----------------------------------------------------------------------===// 426 // Main driver code. 427 //===----------------------------------------------------------------------===// 428 429 int main() { 430 // Install standard binary operators. 431 // 1 is lowest precedence. 432 BinopPrecedence['<'] = 10; 433 BinopPrecedence['+'] = 20; 434 BinopPrecedence['-'] = 20; 435 BinopPrecedence['*'] = 40; // highest. 436 437 // Prime the first token. 438 fprintf(stderr, "ready> "); 439 getNextToken(); 440 441 // Run the main "interpreter loop" now. 442 MainLoop(); 443 444 return 0; 445 } 446