1 #include "llvm/ADT/APFloat.h" 2 #include "llvm/ADT/Optional.h" 3 #include "llvm/ADT/STLExtras.h" 4 #include "llvm/IR/BasicBlock.h" 5 #include "llvm/IR/Constants.h" 6 #include "llvm/IR/DerivedTypes.h" 7 #include "llvm/IR/Function.h" 8 #include "llvm/IR/Instructions.h" 9 #include "llvm/IR/IRBuilder.h" 10 #include "llvm/IR/LLVMContext.h" 11 #include "llvm/IR/LegacyPassManager.h" 12 #include "llvm/IR/Module.h" 13 #include "llvm/IR/Type.h" 14 #include "llvm/IR/Verifier.h" 15 #include "llvm/Support/FileSystem.h" 16 #include "llvm/Support/Host.h" 17 #include "llvm/Support/raw_ostream.h" 18 #include "llvm/Support/TargetRegistry.h" 19 #include "llvm/Support/TargetSelect.h" 20 #include "llvm/Target/TargetMachine.h" 21 #include "llvm/Target/TargetOptions.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <cctype> 25 #include <cstdio> 26 #include <cstdlib> 27 #include <map> 28 #include <memory> 29 #include <string> 30 #include <system_error> 31 #include <utility> 32 #include <vector> 33 34 using namespace llvm; 35 using namespace llvm::sys; 36 37 //===----------------------------------------------------------------------===// 38 // Lexer 39 //===----------------------------------------------------------------------===// 40 41 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 42 // of these for known things. 43 enum Token { 44 tok_eof = -1, 45 46 // commands 47 tok_def = -2, 48 tok_extern = -3, 49 50 // primary 51 tok_identifier = -4, 52 tok_number = -5, 53 54 // control 55 tok_if = -6, 56 tok_then = -7, 57 tok_else = -8, 58 tok_for = -9, 59 tok_in = -10, 60 61 // operators 62 tok_binary = -11, 63 tok_unary = -12, 64 65 // var definition 66 tok_var = -13 67 }; 68 69 static std::string IdentifierStr; // Filled in if tok_identifier 70 static double NumVal; // Filled in if tok_number 71 72 /// gettok - Return the next token from standard input. 73 static int gettok() { 74 static int LastChar = ' '; 75 76 // Skip any whitespace. 77 while (isspace(LastChar)) 78 LastChar = getchar(); 79 80 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 81 IdentifierStr = LastChar; 82 while (isalnum((LastChar = getchar()))) 83 IdentifierStr += LastChar; 84 85 if (IdentifierStr == "def") 86 return tok_def; 87 if (IdentifierStr == "extern") 88 return tok_extern; 89 if (IdentifierStr == "if") 90 return tok_if; 91 if (IdentifierStr == "then") 92 return tok_then; 93 if (IdentifierStr == "else") 94 return tok_else; 95 if (IdentifierStr == "for") 96 return tok_for; 97 if (IdentifierStr == "in") 98 return tok_in; 99 if (IdentifierStr == "binary") 100 return tok_binary; 101 if (IdentifierStr == "unary") 102 return tok_unary; 103 if (IdentifierStr == "var") 104 return tok_var; 105 return tok_identifier; 106 } 107 108 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 109 std::string NumStr; 110 do { 111 NumStr += LastChar; 112 LastChar = getchar(); 113 } while (isdigit(LastChar) || LastChar == '.'); 114 115 NumVal = strtod(NumStr.c_str(), nullptr); 116 return tok_number; 117 } 118 119 if (LastChar == '#') { 120 // Comment until end of line. 121 do 122 LastChar = getchar(); 123 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 124 125 if (LastChar != EOF) 126 return gettok(); 127 } 128 129 // Check for end of file. Don't eat the EOF. 130 if (LastChar == EOF) 131 return tok_eof; 132 133 // Otherwise, just return the character as its ascii value. 134 int ThisChar = LastChar; 135 LastChar = getchar(); 136 return ThisChar; 137 } 138 139 //===----------------------------------------------------------------------===// 140 // Abstract Syntax Tree (aka Parse Tree) 141 //===----------------------------------------------------------------------===// 142 143 namespace { 144 145 /// ExprAST - Base class for all expression nodes. 146 class ExprAST { 147 public: 148 virtual ~ExprAST() = default; 149 150 virtual Value *codegen() = 0; 151 }; 152 153 /// NumberExprAST - Expression class for numeric literals like "1.0". 154 class NumberExprAST : public ExprAST { 155 double Val; 156 157 public: 158 NumberExprAST(double Val) : Val(Val) {} 159 160 Value *codegen() override; 161 }; 162 163 /// VariableExprAST - Expression class for referencing a variable, like "a". 164 class VariableExprAST : public ExprAST { 165 std::string Name; 166 167 public: 168 VariableExprAST(const std::string &Name) : Name(Name) {} 169 170 Value *codegen() override; 171 const std::string &getName() const { return Name; } 172 }; 173 174 /// UnaryExprAST - Expression class for a unary operator. 175 class UnaryExprAST : public ExprAST { 176 char Opcode; 177 std::unique_ptr<ExprAST> Operand; 178 179 public: 180 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 181 : Opcode(Opcode), Operand(std::move(Operand)) {} 182 183 Value *codegen() override; 184 }; 185 186 /// BinaryExprAST - Expression class for a binary operator. 187 class BinaryExprAST : public ExprAST { 188 char Op; 189 std::unique_ptr<ExprAST> LHS, RHS; 190 191 public: 192 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 193 std::unique_ptr<ExprAST> RHS) 194 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 195 196 Value *codegen() override; 197 }; 198 199 /// CallExprAST - Expression class for function calls. 200 class CallExprAST : public ExprAST { 201 std::string Callee; 202 std::vector<std::unique_ptr<ExprAST>> Args; 203 204 public: 205 CallExprAST(const std::string &Callee, 206 std::vector<std::unique_ptr<ExprAST>> Args) 207 : Callee(Callee), Args(std::move(Args)) {} 208 209 Value *codegen() override; 210 }; 211 212 /// IfExprAST - Expression class for if/then/else. 213 class IfExprAST : public ExprAST { 214 std::unique_ptr<ExprAST> Cond, Then, Else; 215 216 public: 217 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 218 std::unique_ptr<ExprAST> Else) 219 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 220 221 Value *codegen() override; 222 }; 223 224 /// ForExprAST - Expression class for for/in. 225 class ForExprAST : public ExprAST { 226 std::string VarName; 227 std::unique_ptr<ExprAST> Start, End, Step, Body; 228 229 public: 230 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 231 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 232 std::unique_ptr<ExprAST> Body) 233 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 234 Step(std::move(Step)), Body(std::move(Body)) {} 235 236 Value *codegen() override; 237 }; 238 239 /// VarExprAST - Expression class for var/in 240 class VarExprAST : public ExprAST { 241 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 242 std::unique_ptr<ExprAST> Body; 243 244 public: 245 VarExprAST( 246 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 247 std::unique_ptr<ExprAST> Body) 248 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 249 250 Value *codegen() override; 251 }; 252 253 /// PrototypeAST - This class represents the "prototype" for a function, 254 /// which captures its name, and its argument names (thus implicitly the number 255 /// of arguments the function takes), as well as if it is an operator. 256 class PrototypeAST { 257 std::string Name; 258 std::vector<std::string> Args; 259 bool IsOperator; 260 unsigned Precedence; // Precedence if a binary op. 261 262 public: 263 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 264 bool IsOperator = false, unsigned Prec = 0) 265 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 266 Precedence(Prec) {} 267 268 Function *codegen(); 269 const std::string &getName() const { return Name; } 270 271 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 272 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 273 274 char getOperatorName() const { 275 assert(isUnaryOp() || isBinaryOp()); 276 return Name[Name.size() - 1]; 277 } 278 279 unsigned getBinaryPrecedence() const { return Precedence; } 280 }; 281 282 /// FunctionAST - This class represents a function definition itself. 283 class FunctionAST { 284 std::unique_ptr<PrototypeAST> Proto; 285 std::unique_ptr<ExprAST> Body; 286 287 public: 288 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 289 std::unique_ptr<ExprAST> Body) 290 : Proto(std::move(Proto)), Body(std::move(Body)) {} 291 292 Function *codegen(); 293 }; 294 295 } // end anonymous namespace 296 297 //===----------------------------------------------------------------------===// 298 // Parser 299 //===----------------------------------------------------------------------===// 300 301 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 302 /// token the parser is looking at. getNextToken reads another token from the 303 /// lexer and updates CurTok with its results. 304 static int CurTok; 305 static int getNextToken() { return CurTok = gettok(); } 306 307 /// BinopPrecedence - This holds the precedence for each binary operator that is 308 /// defined. 309 static std::map<char, int> BinopPrecedence; 310 311 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 312 static int GetTokPrecedence() { 313 if (!isascii(CurTok)) 314 return -1; 315 316 // Make sure it's a declared binop. 317 int TokPrec = BinopPrecedence[CurTok]; 318 if (TokPrec <= 0) 319 return -1; 320 return TokPrec; 321 } 322 323 /// LogError* - These are little helper functions for error handling. 324 std::unique_ptr<ExprAST> LogError(const char *Str) { 325 fprintf(stderr, "Error: %s\n", Str); 326 return nullptr; 327 } 328 329 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 330 LogError(Str); 331 return nullptr; 332 } 333 334 static std::unique_ptr<ExprAST> ParseExpression(); 335 336 /// numberexpr ::= number 337 static std::unique_ptr<ExprAST> ParseNumberExpr() { 338 auto Result = llvm::make_unique<NumberExprAST>(NumVal); 339 getNextToken(); // consume the number 340 return std::move(Result); 341 } 342 343 /// parenexpr ::= '(' expression ')' 344 static std::unique_ptr<ExprAST> ParseParenExpr() { 345 getNextToken(); // eat (. 346 auto V = ParseExpression(); 347 if (!V) 348 return nullptr; 349 350 if (CurTok != ')') 351 return LogError("expected ')'"); 352 getNextToken(); // eat ). 353 return V; 354 } 355 356 /// identifierexpr 357 /// ::= identifier 358 /// ::= identifier '(' expression* ')' 359 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 360 std::string IdName = IdentifierStr; 361 362 getNextToken(); // eat identifier. 363 364 if (CurTok != '(') // Simple variable ref. 365 return llvm::make_unique<VariableExprAST>(IdName); 366 367 // Call. 368 getNextToken(); // eat ( 369 std::vector<std::unique_ptr<ExprAST>> Args; 370 if (CurTok != ')') { 371 while (true) { 372 if (auto Arg = ParseExpression()) 373 Args.push_back(std::move(Arg)); 374 else 375 return nullptr; 376 377 if (CurTok == ')') 378 break; 379 380 if (CurTok != ',') 381 return LogError("Expected ')' or ',' in argument list"); 382 getNextToken(); 383 } 384 } 385 386 // Eat the ')'. 387 getNextToken(); 388 389 return llvm::make_unique<CallExprAST>(IdName, std::move(Args)); 390 } 391 392 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 393 static std::unique_ptr<ExprAST> ParseIfExpr() { 394 getNextToken(); // eat the if. 395 396 // condition. 397 auto Cond = ParseExpression(); 398 if (!Cond) 399 return nullptr; 400 401 if (CurTok != tok_then) 402 return LogError("expected then"); 403 getNextToken(); // eat the then 404 405 auto Then = ParseExpression(); 406 if (!Then) 407 return nullptr; 408 409 if (CurTok != tok_else) 410 return LogError("expected else"); 411 412 getNextToken(); 413 414 auto Else = ParseExpression(); 415 if (!Else) 416 return nullptr; 417 418 return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 419 std::move(Else)); 420 } 421 422 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 423 static std::unique_ptr<ExprAST> ParseForExpr() { 424 getNextToken(); // eat the for. 425 426 if (CurTok != tok_identifier) 427 return LogError("expected identifier after for"); 428 429 std::string IdName = IdentifierStr; 430 getNextToken(); // eat identifier. 431 432 if (CurTok != '=') 433 return LogError("expected '=' after for"); 434 getNextToken(); // eat '='. 435 436 auto Start = ParseExpression(); 437 if (!Start) 438 return nullptr; 439 if (CurTok != ',') 440 return LogError("expected ',' after for start value"); 441 getNextToken(); 442 443 auto End = ParseExpression(); 444 if (!End) 445 return nullptr; 446 447 // The step value is optional. 448 std::unique_ptr<ExprAST> Step; 449 if (CurTok == ',') { 450 getNextToken(); 451 Step = ParseExpression(); 452 if (!Step) 453 return nullptr; 454 } 455 456 if (CurTok != tok_in) 457 return LogError("expected 'in' after for"); 458 getNextToken(); // eat 'in'. 459 460 auto Body = ParseExpression(); 461 if (!Body) 462 return nullptr; 463 464 return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 465 std::move(Step), std::move(Body)); 466 } 467 468 /// varexpr ::= 'var' identifier ('=' expression)? 469 // (',' identifier ('=' expression)?)* 'in' expression 470 static std::unique_ptr<ExprAST> ParseVarExpr() { 471 getNextToken(); // eat the var. 472 473 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 474 475 // At least one variable name is required. 476 if (CurTok != tok_identifier) 477 return LogError("expected identifier after var"); 478 479 while (true) { 480 std::string Name = IdentifierStr; 481 getNextToken(); // eat identifier. 482 483 // Read the optional initializer. 484 std::unique_ptr<ExprAST> Init = nullptr; 485 if (CurTok == '=') { 486 getNextToken(); // eat the '='. 487 488 Init = ParseExpression(); 489 if (!Init) 490 return nullptr; 491 } 492 493 VarNames.push_back(std::make_pair(Name, std::move(Init))); 494 495 // End of var list, exit loop. 496 if (CurTok != ',') 497 break; 498 getNextToken(); // eat the ','. 499 500 if (CurTok != tok_identifier) 501 return LogError("expected identifier list after var"); 502 } 503 504 // At this point, we have to have 'in'. 505 if (CurTok != tok_in) 506 return LogError("expected 'in' keyword after 'var'"); 507 getNextToken(); // eat 'in'. 508 509 auto Body = ParseExpression(); 510 if (!Body) 511 return nullptr; 512 513 return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 514 } 515 516 /// primary 517 /// ::= identifierexpr 518 /// ::= numberexpr 519 /// ::= parenexpr 520 /// ::= ifexpr 521 /// ::= forexpr 522 /// ::= varexpr 523 static std::unique_ptr<ExprAST> ParsePrimary() { 524 switch (CurTok) { 525 default: 526 return LogError("unknown token when expecting an expression"); 527 case tok_identifier: 528 return ParseIdentifierExpr(); 529 case tok_number: 530 return ParseNumberExpr(); 531 case '(': 532 return ParseParenExpr(); 533 case tok_if: 534 return ParseIfExpr(); 535 case tok_for: 536 return ParseForExpr(); 537 case tok_var: 538 return ParseVarExpr(); 539 } 540 } 541 542 /// unary 543 /// ::= primary 544 /// ::= '!' unary 545 static std::unique_ptr<ExprAST> ParseUnary() { 546 // If the current token is not an operator, it must be a primary expr. 547 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 548 return ParsePrimary(); 549 550 // If this is a unary operator, read it. 551 int Opc = CurTok; 552 getNextToken(); 553 if (auto Operand = ParseUnary()) 554 return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 555 return nullptr; 556 } 557 558 /// binoprhs 559 /// ::= ('+' unary)* 560 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 561 std::unique_ptr<ExprAST> LHS) { 562 // If this is a binop, find its precedence. 563 while (true) { 564 int TokPrec = GetTokPrecedence(); 565 566 // If this is a binop that binds at least as tightly as the current binop, 567 // consume it, otherwise we are done. 568 if (TokPrec < ExprPrec) 569 return LHS; 570 571 // Okay, we know this is a binop. 572 int BinOp = CurTok; 573 getNextToken(); // eat binop 574 575 // Parse the unary expression after the binary operator. 576 auto RHS = ParseUnary(); 577 if (!RHS) 578 return nullptr; 579 580 // If BinOp binds less tightly with RHS than the operator after RHS, let 581 // the pending operator take RHS as its LHS. 582 int NextPrec = GetTokPrecedence(); 583 if (TokPrec < NextPrec) { 584 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 585 if (!RHS) 586 return nullptr; 587 } 588 589 // Merge LHS/RHS. 590 LHS = 591 llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 592 } 593 } 594 595 /// expression 596 /// ::= unary binoprhs 597 /// 598 static std::unique_ptr<ExprAST> ParseExpression() { 599 auto LHS = ParseUnary(); 600 if (!LHS) 601 return nullptr; 602 603 return ParseBinOpRHS(0, std::move(LHS)); 604 } 605 606 /// prototype 607 /// ::= id '(' id* ')' 608 /// ::= binary LETTER number? (id, id) 609 /// ::= unary LETTER (id) 610 static std::unique_ptr<PrototypeAST> ParsePrototype() { 611 std::string FnName; 612 613 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 614 unsigned BinaryPrecedence = 30; 615 616 switch (CurTok) { 617 default: 618 return LogErrorP("Expected function name in prototype"); 619 case tok_identifier: 620 FnName = IdentifierStr; 621 Kind = 0; 622 getNextToken(); 623 break; 624 case tok_unary: 625 getNextToken(); 626 if (!isascii(CurTok)) 627 return LogErrorP("Expected unary operator"); 628 FnName = "unary"; 629 FnName += (char)CurTok; 630 Kind = 1; 631 getNextToken(); 632 break; 633 case tok_binary: 634 getNextToken(); 635 if (!isascii(CurTok)) 636 return LogErrorP("Expected binary operator"); 637 FnName = "binary"; 638 FnName += (char)CurTok; 639 Kind = 2; 640 getNextToken(); 641 642 // Read the precedence if present. 643 if (CurTok == tok_number) { 644 if (NumVal < 1 || NumVal > 100) 645 return LogErrorP("Invalid precedecnce: must be 1..100"); 646 BinaryPrecedence = (unsigned)NumVal; 647 getNextToken(); 648 } 649 break; 650 } 651 652 if (CurTok != '(') 653 return LogErrorP("Expected '(' in prototype"); 654 655 std::vector<std::string> ArgNames; 656 while (getNextToken() == tok_identifier) 657 ArgNames.push_back(IdentifierStr); 658 if (CurTok != ')') 659 return LogErrorP("Expected ')' in prototype"); 660 661 // success. 662 getNextToken(); // eat ')'. 663 664 // Verify right number of names for operator. 665 if (Kind && ArgNames.size() != Kind) 666 return LogErrorP("Invalid number of operands for operator"); 667 668 return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 669 BinaryPrecedence); 670 } 671 672 /// definition ::= 'def' prototype expression 673 static std::unique_ptr<FunctionAST> ParseDefinition() { 674 getNextToken(); // eat def. 675 auto Proto = ParsePrototype(); 676 if (!Proto) 677 return nullptr; 678 679 if (auto E = ParseExpression()) 680 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 681 return nullptr; 682 } 683 684 /// toplevelexpr ::= expression 685 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 686 if (auto E = ParseExpression()) { 687 // Make an anonymous proto. 688 auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr", 689 std::vector<std::string>()); 690 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 691 } 692 return nullptr; 693 } 694 695 /// external ::= 'extern' prototype 696 static std::unique_ptr<PrototypeAST> ParseExtern() { 697 getNextToken(); // eat extern. 698 return ParsePrototype(); 699 } 700 701 //===----------------------------------------------------------------------===// 702 // Code Generation 703 //===----------------------------------------------------------------------===// 704 705 static LLVMContext TheContext; 706 static IRBuilder<> Builder(TheContext); 707 static std::unique_ptr<Module> TheModule; 708 static std::map<std::string, AllocaInst *> NamedValues; 709 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 710 711 Value *LogErrorV(const char *Str) { 712 LogError(Str); 713 return nullptr; 714 } 715 716 Function *getFunction(std::string Name) { 717 // First, see if the function has already been added to the current module. 718 if (auto *F = TheModule->getFunction(Name)) 719 return F; 720 721 // If not, check whether we can codegen the declaration from some existing 722 // prototype. 723 auto FI = FunctionProtos.find(Name); 724 if (FI != FunctionProtos.end()) 725 return FI->second->codegen(); 726 727 // If no existing prototype exists, return null. 728 return nullptr; 729 } 730 731 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 732 /// the function. This is used for mutable variables etc. 733 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 734 const std::string &VarName) { 735 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 736 TheFunction->getEntryBlock().begin()); 737 return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); 738 } 739 740 Value *NumberExprAST::codegen() { 741 return ConstantFP::get(TheContext, APFloat(Val)); 742 } 743 744 Value *VariableExprAST::codegen() { 745 // Look this variable up in the function. 746 Value *V = NamedValues[Name]; 747 if (!V) 748 return LogErrorV("Unknown variable name"); 749 750 // Load the value. 751 return Builder.CreateLoad(V, Name.c_str()); 752 } 753 754 Value *UnaryExprAST::codegen() { 755 Value *OperandV = Operand->codegen(); 756 if (!OperandV) 757 return nullptr; 758 759 Function *F = getFunction(std::string("unary") + Opcode); 760 if (!F) 761 return LogErrorV("Unknown unary operator"); 762 763 return Builder.CreateCall(F, OperandV, "unop"); 764 } 765 766 Value *BinaryExprAST::codegen() { 767 // Special case '=' because we don't want to emit the LHS as an expression. 768 if (Op == '=') { 769 // Assignment requires the LHS to be an identifier. 770 // This assume we're building without RTTI because LLVM builds that way by 771 // default. If you build LLVM with RTTI this can be changed to a 772 // dynamic_cast for automatic error checking. 773 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 774 if (!LHSE) 775 return LogErrorV("destination of '=' must be a variable"); 776 // Codegen the RHS. 777 Value *Val = RHS->codegen(); 778 if (!Val) 779 return nullptr; 780 781 // Look up the name. 782 Value *Variable = NamedValues[LHSE->getName()]; 783 if (!Variable) 784 return LogErrorV("Unknown variable name"); 785 786 Builder.CreateStore(Val, Variable); 787 return Val; 788 } 789 790 Value *L = LHS->codegen(); 791 Value *R = RHS->codegen(); 792 if (!L || !R) 793 return nullptr; 794 795 switch (Op) { 796 case '+': 797 return Builder.CreateFAdd(L, R, "addtmp"); 798 case '-': 799 return Builder.CreateFSub(L, R, "subtmp"); 800 case '*': 801 return Builder.CreateFMul(L, R, "multmp"); 802 case '<': 803 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 804 // Convert bool 0/1 to double 0.0 or 1.0 805 return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); 806 default: 807 break; 808 } 809 810 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 811 // a call to it. 812 Function *F = getFunction(std::string("binary") + Op); 813 assert(F && "binary operator not found!"); 814 815 Value *Ops[] = {L, R}; 816 return Builder.CreateCall(F, Ops, "binop"); 817 } 818 819 Value *CallExprAST::codegen() { 820 // Look up the name in the global module table. 821 Function *CalleeF = getFunction(Callee); 822 if (!CalleeF) 823 return LogErrorV("Unknown function referenced"); 824 825 // If argument mismatch error. 826 if (CalleeF->arg_size() != Args.size()) 827 return LogErrorV("Incorrect # arguments passed"); 828 829 std::vector<Value *> ArgsV; 830 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 831 ArgsV.push_back(Args[i]->codegen()); 832 if (!ArgsV.back()) 833 return nullptr; 834 } 835 836 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 837 } 838 839 Value *IfExprAST::codegen() { 840 Value *CondV = Cond->codegen(); 841 if (!CondV) 842 return nullptr; 843 844 // Convert condition to a bool by comparing equal to 0.0. 845 CondV = Builder.CreateFCmpONE( 846 CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); 847 848 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 849 850 // Create blocks for the then and else cases. Insert the 'then' block at the 851 // end of the function. 852 BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); 853 BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); 854 BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); 855 856 Builder.CreateCondBr(CondV, ThenBB, ElseBB); 857 858 // Emit then value. 859 Builder.SetInsertPoint(ThenBB); 860 861 Value *ThenV = Then->codegen(); 862 if (!ThenV) 863 return nullptr; 864 865 Builder.CreateBr(MergeBB); 866 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 867 ThenBB = Builder.GetInsertBlock(); 868 869 // Emit else block. 870 TheFunction->getBasicBlockList().push_back(ElseBB); 871 Builder.SetInsertPoint(ElseBB); 872 873 Value *ElseV = Else->codegen(); 874 if (!ElseV) 875 return nullptr; 876 877 Builder.CreateBr(MergeBB); 878 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 879 ElseBB = Builder.GetInsertBlock(); 880 881 // Emit merge block. 882 TheFunction->getBasicBlockList().push_back(MergeBB); 883 Builder.SetInsertPoint(MergeBB); 884 PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); 885 886 PN->addIncoming(ThenV, ThenBB); 887 PN->addIncoming(ElseV, ElseBB); 888 return PN; 889 } 890 891 // Output for-loop as: 892 // var = alloca double 893 // ... 894 // start = startexpr 895 // store start -> var 896 // goto loop 897 // loop: 898 // ... 899 // bodyexpr 900 // ... 901 // loopend: 902 // step = stepexpr 903 // endcond = endexpr 904 // 905 // curvar = load var 906 // nextvar = curvar + step 907 // store nextvar -> var 908 // br endcond, loop, endloop 909 // outloop: 910 Value *ForExprAST::codegen() { 911 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 912 913 // Create an alloca for the variable in the entry block. 914 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 915 916 // Emit the start code first, without 'variable' in scope. 917 Value *StartVal = Start->codegen(); 918 if (!StartVal) 919 return nullptr; 920 921 // Store the value into the alloca. 922 Builder.CreateStore(StartVal, Alloca); 923 924 // Make the new basic block for the loop header, inserting after current 925 // block. 926 BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); 927 928 // Insert an explicit fall through from the current block to the LoopBB. 929 Builder.CreateBr(LoopBB); 930 931 // Start insertion in LoopBB. 932 Builder.SetInsertPoint(LoopBB); 933 934 // Within the loop, the variable is defined equal to the PHI node. If it 935 // shadows an existing variable, we have to restore it, so save it now. 936 AllocaInst *OldVal = NamedValues[VarName]; 937 NamedValues[VarName] = Alloca; 938 939 // Emit the body of the loop. This, like any other expr, can change the 940 // current BB. Note that we ignore the value computed by the body, but don't 941 // allow an error. 942 if (!Body->codegen()) 943 return nullptr; 944 945 // Emit the step value. 946 Value *StepVal = nullptr; 947 if (Step) { 948 StepVal = Step->codegen(); 949 if (!StepVal) 950 return nullptr; 951 } else { 952 // If not specified, use 1.0. 953 StepVal = ConstantFP::get(TheContext, APFloat(1.0)); 954 } 955 956 // Compute the end condition. 957 Value *EndCond = End->codegen(); 958 if (!EndCond) 959 return nullptr; 960 961 // Reload, increment, and restore the alloca. This handles the case where 962 // the body of the loop mutates the variable. 963 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); 964 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); 965 Builder.CreateStore(NextVar, Alloca); 966 967 // Convert condition to a bool by comparing equal to 0.0. 968 EndCond = Builder.CreateFCmpONE( 969 EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); 970 971 // Create the "after loop" block and insert it. 972 BasicBlock *AfterBB = 973 BasicBlock::Create(TheContext, "afterloop", TheFunction); 974 975 // Insert the conditional branch into the end of LoopEndBB. 976 Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 977 978 // Any new code will be inserted in AfterBB. 979 Builder.SetInsertPoint(AfterBB); 980 981 // Restore the unshadowed variable. 982 if (OldVal) 983 NamedValues[VarName] = OldVal; 984 else 985 NamedValues.erase(VarName); 986 987 // for expr always returns 0.0. 988 return Constant::getNullValue(Type::getDoubleTy(TheContext)); 989 } 990 991 Value *VarExprAST::codegen() { 992 std::vector<AllocaInst *> OldBindings; 993 994 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 995 996 // Register all variables and emit their initializer. 997 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 998 const std::string &VarName = VarNames[i].first; 999 ExprAST *Init = VarNames[i].second.get(); 1000 1001 // Emit the initializer before adding the variable to scope, this prevents 1002 // the initializer from referencing the variable itself, and permits stuff 1003 // like this: 1004 // var a = 1 in 1005 // var a = a in ... # refers to outer 'a'. 1006 Value *InitVal; 1007 if (Init) { 1008 InitVal = Init->codegen(); 1009 if (!InitVal) 1010 return nullptr; 1011 } else { // If not specified, use 0.0. 1012 InitVal = ConstantFP::get(TheContext, APFloat(0.0)); 1013 } 1014 1015 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 1016 Builder.CreateStore(InitVal, Alloca); 1017 1018 // Remember the old variable binding so that we can restore the binding when 1019 // we unrecurse. 1020 OldBindings.push_back(NamedValues[VarName]); 1021 1022 // Remember this binding. 1023 NamedValues[VarName] = Alloca; 1024 } 1025 1026 // Codegen the body, now that all vars are in scope. 1027 Value *BodyVal = Body->codegen(); 1028 if (!BodyVal) 1029 return nullptr; 1030 1031 // Pop all our variables from scope. 1032 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1033 NamedValues[VarNames[i].first] = OldBindings[i]; 1034 1035 // Return the body computation. 1036 return BodyVal; 1037 } 1038 1039 Function *PrototypeAST::codegen() { 1040 // Make the function type: double(double,double) etc. 1041 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); 1042 FunctionType *FT = 1043 FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); 1044 1045 Function *F = 1046 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1047 1048 // Set names for all arguments. 1049 unsigned Idx = 0; 1050 for (auto &Arg : F->args()) 1051 Arg.setName(Args[Idx++]); 1052 1053 return F; 1054 } 1055 1056 Function *FunctionAST::codegen() { 1057 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1058 // reference to it for use below. 1059 auto &P = *Proto; 1060 FunctionProtos[Proto->getName()] = std::move(Proto); 1061 Function *TheFunction = getFunction(P.getName()); 1062 if (!TheFunction) 1063 return nullptr; 1064 1065 // If this is an operator, install it. 1066 if (P.isBinaryOp()) 1067 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1068 1069 // Create a new basic block to start insertion into. 1070 BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); 1071 Builder.SetInsertPoint(BB); 1072 1073 // Record the function arguments in the NamedValues map. 1074 NamedValues.clear(); 1075 for (auto &Arg : TheFunction->args()) { 1076 // Create an alloca for this variable. 1077 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1078 1079 // Store the initial value into the alloca. 1080 Builder.CreateStore(&Arg, Alloca); 1081 1082 // Add arguments to variable symbol table. 1083 NamedValues[Arg.getName()] = Alloca; 1084 } 1085 1086 if (Value *RetVal = Body->codegen()) { 1087 // Finish off the function. 1088 Builder.CreateRet(RetVal); 1089 1090 // Validate the generated code, checking for consistency. 1091 verifyFunction(*TheFunction); 1092 1093 return TheFunction; 1094 } 1095 1096 // Error reading body, remove function. 1097 TheFunction->eraseFromParent(); 1098 1099 if (P.isBinaryOp()) 1100 BinopPrecedence.erase(Proto->getOperatorName()); 1101 return nullptr; 1102 } 1103 1104 //===----------------------------------------------------------------------===// 1105 // Top-Level parsing and JIT Driver 1106 //===----------------------------------------------------------------------===// 1107 1108 static void InitializeModuleAndPassManager() { 1109 // Open a new module. 1110 TheModule = llvm::make_unique<Module>("my cool jit", TheContext); 1111 } 1112 1113 static void HandleDefinition() { 1114 if (auto FnAST = ParseDefinition()) { 1115 if (auto *FnIR = FnAST->codegen()) { 1116 fprintf(stderr, "Read function definition:"); 1117 FnIR->dump(); 1118 } 1119 } else { 1120 // Skip token for error recovery. 1121 getNextToken(); 1122 } 1123 } 1124 1125 static void HandleExtern() { 1126 if (auto ProtoAST = ParseExtern()) { 1127 if (auto *FnIR = ProtoAST->codegen()) { 1128 fprintf(stderr, "Read extern: "); 1129 FnIR->dump(); 1130 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1131 } 1132 } else { 1133 // Skip token for error recovery. 1134 getNextToken(); 1135 } 1136 } 1137 1138 static void HandleTopLevelExpression() { 1139 // Evaluate a top-level expression into an anonymous function. 1140 if (auto FnAST = ParseTopLevelExpr()) { 1141 FnAST->codegen(); 1142 } else { 1143 // Skip token for error recovery. 1144 getNextToken(); 1145 } 1146 } 1147 1148 /// top ::= definition | external | expression | ';' 1149 static void MainLoop() { 1150 while (true) { 1151 switch (CurTok) { 1152 case tok_eof: 1153 return; 1154 case ';': // ignore top-level semicolons. 1155 getNextToken(); 1156 break; 1157 case tok_def: 1158 HandleDefinition(); 1159 break; 1160 case tok_extern: 1161 HandleExtern(); 1162 break; 1163 default: 1164 HandleTopLevelExpression(); 1165 break; 1166 } 1167 } 1168 } 1169 1170 //===----------------------------------------------------------------------===// 1171 // "Library" functions that can be "extern'd" from user code. 1172 //===----------------------------------------------------------------------===// 1173 1174 /// putchard - putchar that takes a double and returns 0. 1175 extern "C" double putchard(double X) { 1176 fputc((char)X, stderr); 1177 return 0; 1178 } 1179 1180 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1181 extern "C" double printd(double X) { 1182 fprintf(stderr, "%f\n", X); 1183 return 0; 1184 } 1185 1186 //===----------------------------------------------------------------------===// 1187 // Main driver code. 1188 //===----------------------------------------------------------------------===// 1189 1190 int main() { 1191 // Install standard binary operators. 1192 // 1 is lowest precedence. 1193 BinopPrecedence['<'] = 10; 1194 BinopPrecedence['+'] = 20; 1195 BinopPrecedence['-'] = 20; 1196 BinopPrecedence['*'] = 40; // highest. 1197 1198 // Prime the first token. 1199 fprintf(stderr, "ready> "); 1200 getNextToken(); 1201 1202 InitializeModuleAndPassManager(); 1203 1204 // Run the main "interpreter loop" now. 1205 MainLoop(); 1206 1207 // Initialize the target registry etc. 1208 InitializeAllTargetInfos(); 1209 InitializeAllTargets(); 1210 InitializeAllTargetMCs(); 1211 InitializeAllAsmParsers(); 1212 InitializeAllAsmPrinters(); 1213 1214 auto TargetTriple = sys::getDefaultTargetTriple(); 1215 TheModule->setTargetTriple(TargetTriple); 1216 1217 std::string Error; 1218 auto Target = TargetRegistry::lookupTarget(TargetTriple, Error); 1219 1220 // Print an error and exit if we couldn't find the requested target. 1221 // This generally occurs if we've forgotten to initialise the 1222 // TargetRegistry or we have a bogus target triple. 1223 if (!Target) { 1224 errs() << Error; 1225 return 1; 1226 } 1227 1228 auto CPU = "generic"; 1229 auto Features = ""; 1230 1231 TargetOptions opt; 1232 auto RM = Optional<Reloc::Model>(); 1233 auto TheTargetMachine = 1234 Target->createTargetMachine(TargetTriple, CPU, Features, opt, RM); 1235 1236 TheModule->setDataLayout(TheTargetMachine->createDataLayout()); 1237 1238 auto Filename = "output.o"; 1239 std::error_code EC; 1240 raw_fd_ostream dest(Filename, EC, sys::fs::F_None); 1241 1242 if (EC) { 1243 errs() << "Could not open file: " << EC.message(); 1244 return 1; 1245 } 1246 1247 legacy::PassManager pass; 1248 auto FileType = TargetMachine::CGFT_ObjectFile; 1249 1250 if (TheTargetMachine->addPassesToEmitFile(pass, dest, FileType)) { 1251 errs() << "TheTargetMachine can't emit a file of this type"; 1252 return 1; 1253 } 1254 1255 pass.run(*TheModule); 1256 dest.flush(); 1257 1258 outs() << "Wrote " << Filename << "\n"; 1259 1260 return 0; 1261 } 1262