1 #include "llvm/ADT/APFloat.h" 2 #include "llvm/ADT/STLExtras.h" 3 #include "llvm/IR/BasicBlock.h" 4 #include "llvm/IR/Constants.h" 5 #include "llvm/IR/DerivedTypes.h" 6 #include "llvm/IR/Function.h" 7 #include "llvm/IR/Instructions.h" 8 #include "llvm/IR/IRBuilder.h" 9 #include "llvm/IR/LLVMContext.h" 10 #include "llvm/IR/LegacyPassManager.h" 11 #include "llvm/IR/Module.h" 12 #include "llvm/IR/Type.h" 13 #include "llvm/IR/Verifier.h" 14 #include "llvm/Support/TargetSelect.h" 15 #include "llvm/Target/TargetMachine.h" 16 #include "llvm/Transforms/Scalar.h" 17 #include "llvm/Transforms/Scalar/GVN.h" 18 #include "../include/KaleidoscopeJIT.h" 19 #include <algorithm> 20 #include <cassert> 21 #include <cctype> 22 #include <cstdint> 23 #include <cstdio> 24 #include <cstdlib> 25 #include <map> 26 #include <memory> 27 #include <string> 28 #include <utility> 29 #include <vector> 30 31 using namespace llvm; 32 using namespace llvm::orc; 33 34 //===----------------------------------------------------------------------===// 35 // Lexer 36 //===----------------------------------------------------------------------===// 37 38 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one 39 // of these for known things. 40 enum Token { 41 tok_eof = -1, 42 43 // commands 44 tok_def = -2, 45 tok_extern = -3, 46 47 // primary 48 tok_identifier = -4, 49 tok_number = -5, 50 51 // control 52 tok_if = -6, 53 tok_then = -7, 54 tok_else = -8, 55 tok_for = -9, 56 tok_in = -10, 57 58 // operators 59 tok_binary = -11, 60 tok_unary = -12, 61 62 // var definition 63 tok_var = -13 64 }; 65 66 static std::string IdentifierStr; // Filled in if tok_identifier 67 static double NumVal; // Filled in if tok_number 68 69 /// gettok - Return the next token from standard input. 70 static int gettok() { 71 static int LastChar = ' '; 72 73 // Skip any whitespace. 74 while (isspace(LastChar)) 75 LastChar = getchar(); 76 77 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 78 IdentifierStr = LastChar; 79 while (isalnum((LastChar = getchar()))) 80 IdentifierStr += LastChar; 81 82 if (IdentifierStr == "def") 83 return tok_def; 84 if (IdentifierStr == "extern") 85 return tok_extern; 86 if (IdentifierStr == "if") 87 return tok_if; 88 if (IdentifierStr == "then") 89 return tok_then; 90 if (IdentifierStr == "else") 91 return tok_else; 92 if (IdentifierStr == "for") 93 return tok_for; 94 if (IdentifierStr == "in") 95 return tok_in; 96 if (IdentifierStr == "binary") 97 return tok_binary; 98 if (IdentifierStr == "unary") 99 return tok_unary; 100 if (IdentifierStr == "var") 101 return tok_var; 102 return tok_identifier; 103 } 104 105 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 106 std::string NumStr; 107 do { 108 NumStr += LastChar; 109 LastChar = getchar(); 110 } while (isdigit(LastChar) || LastChar == '.'); 111 112 NumVal = strtod(NumStr.c_str(), nullptr); 113 return tok_number; 114 } 115 116 if (LastChar == '#') { 117 // Comment until end of line. 118 do 119 LastChar = getchar(); 120 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 121 122 if (LastChar != EOF) 123 return gettok(); 124 } 125 126 // Check for end of file. Don't eat the EOF. 127 if (LastChar == EOF) 128 return tok_eof; 129 130 // Otherwise, just return the character as its ascii value. 131 int ThisChar = LastChar; 132 LastChar = getchar(); 133 return ThisChar; 134 } 135 136 //===----------------------------------------------------------------------===// 137 // Abstract Syntax Tree (aka Parse Tree) 138 //===----------------------------------------------------------------------===// 139 140 namespace { 141 142 /// ExprAST - Base class for all expression nodes. 143 class ExprAST { 144 public: 145 virtual ~ExprAST() = default; 146 147 virtual Value *codegen() = 0; 148 }; 149 150 /// NumberExprAST - Expression class for numeric literals like "1.0". 151 class NumberExprAST : public ExprAST { 152 double Val; 153 154 public: 155 NumberExprAST(double Val) : Val(Val) {} 156 157 Value *codegen() override; 158 }; 159 160 /// VariableExprAST - Expression class for referencing a variable, like "a". 161 class VariableExprAST : public ExprAST { 162 std::string Name; 163 164 public: 165 VariableExprAST(const std::string &Name) : Name(Name) {} 166 167 Value *codegen() override; 168 const std::string &getName() const { return Name; } 169 }; 170 171 /// UnaryExprAST - Expression class for a unary operator. 172 class UnaryExprAST : public ExprAST { 173 char Opcode; 174 std::unique_ptr<ExprAST> Operand; 175 176 public: 177 UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 178 : Opcode(Opcode), Operand(std::move(Operand)) {} 179 180 Value *codegen() override; 181 }; 182 183 /// BinaryExprAST - Expression class for a binary operator. 184 class BinaryExprAST : public ExprAST { 185 char Op; 186 std::unique_ptr<ExprAST> LHS, RHS; 187 188 public: 189 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 190 std::unique_ptr<ExprAST> RHS) 191 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 192 193 Value *codegen() override; 194 }; 195 196 /// CallExprAST - Expression class for function calls. 197 class CallExprAST : public ExprAST { 198 std::string Callee; 199 std::vector<std::unique_ptr<ExprAST>> Args; 200 201 public: 202 CallExprAST(const std::string &Callee, 203 std::vector<std::unique_ptr<ExprAST>> Args) 204 : Callee(Callee), Args(std::move(Args)) {} 205 206 Value *codegen() override; 207 }; 208 209 /// IfExprAST - Expression class for if/then/else. 210 class IfExprAST : public ExprAST { 211 std::unique_ptr<ExprAST> Cond, Then, Else; 212 213 public: 214 IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, 215 std::unique_ptr<ExprAST> Else) 216 : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} 217 218 Value *codegen() override; 219 }; 220 221 /// ForExprAST - Expression class for for/in. 222 class ForExprAST : public ExprAST { 223 std::string VarName; 224 std::unique_ptr<ExprAST> Start, End, Step, Body; 225 226 public: 227 ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, 228 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, 229 std::unique_ptr<ExprAST> Body) 230 : VarName(VarName), Start(std::move(Start)), End(std::move(End)), 231 Step(std::move(Step)), Body(std::move(Body)) {} 232 233 Value *codegen() override; 234 }; 235 236 /// VarExprAST - Expression class for var/in 237 class VarExprAST : public ExprAST { 238 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 239 std::unique_ptr<ExprAST> Body; 240 241 public: 242 VarExprAST( 243 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, 244 std::unique_ptr<ExprAST> Body) 245 : VarNames(std::move(VarNames)), Body(std::move(Body)) {} 246 247 Value *codegen() override; 248 }; 249 250 /// PrototypeAST - This class represents the "prototype" for a function, 251 /// which captures its name, and its argument names (thus implicitly the number 252 /// of arguments the function takes), as well as if it is an operator. 253 class PrototypeAST { 254 std::string Name; 255 std::vector<std::string> Args; 256 bool IsOperator; 257 unsigned Precedence; // Precedence if a binary op. 258 259 public: 260 PrototypeAST(const std::string &Name, std::vector<std::string> Args, 261 bool IsOperator = false, unsigned Prec = 0) 262 : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), 263 Precedence(Prec) {} 264 265 Function *codegen(); 266 const std::string &getName() const { return Name; } 267 268 bool isUnaryOp() const { return IsOperator && Args.size() == 1; } 269 bool isBinaryOp() const { return IsOperator && Args.size() == 2; } 270 271 char getOperatorName() const { 272 assert(isUnaryOp() || isBinaryOp()); 273 return Name[Name.size() - 1]; 274 } 275 276 unsigned getBinaryPrecedence() const { return Precedence; } 277 }; 278 279 /// FunctionAST - This class represents a function definition itself. 280 class FunctionAST { 281 std::unique_ptr<PrototypeAST> Proto; 282 std::unique_ptr<ExprAST> Body; 283 284 public: 285 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 286 std::unique_ptr<ExprAST> Body) 287 : Proto(std::move(Proto)), Body(std::move(Body)) {} 288 289 Function *codegen(); 290 }; 291 292 } // end anonymous namespace 293 294 //===----------------------------------------------------------------------===// 295 // Parser 296 //===----------------------------------------------------------------------===// 297 298 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 299 /// token the parser is looking at. getNextToken reads another token from the 300 /// lexer and updates CurTok with its results. 301 static int CurTok; 302 static int getNextToken() { return CurTok = gettok(); } 303 304 /// BinopPrecedence - This holds the precedence for each binary operator that is 305 /// defined. 306 static std::map<char, int> BinopPrecedence; 307 308 /// GetTokPrecedence - Get the precedence of the pending binary operator token. 309 static int GetTokPrecedence() { 310 if (!isascii(CurTok)) 311 return -1; 312 313 // Make sure it's a declared binop. 314 int TokPrec = BinopPrecedence[CurTok]; 315 if (TokPrec <= 0) 316 return -1; 317 return TokPrec; 318 } 319 320 /// LogError* - These are little helper functions for error handling. 321 std::unique_ptr<ExprAST> LogError(const char *Str) { 322 fprintf(stderr, "Error: %s\n", Str); 323 return nullptr; 324 } 325 326 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 327 LogError(Str); 328 return nullptr; 329 } 330 331 static std::unique_ptr<ExprAST> ParseExpression(); 332 333 /// numberexpr ::= number 334 static std::unique_ptr<ExprAST> ParseNumberExpr() { 335 auto Result = llvm::make_unique<NumberExprAST>(NumVal); 336 getNextToken(); // consume the number 337 return std::move(Result); 338 } 339 340 /// parenexpr ::= '(' expression ')' 341 static std::unique_ptr<ExprAST> ParseParenExpr() { 342 getNextToken(); // eat (. 343 auto V = ParseExpression(); 344 if (!V) 345 return nullptr; 346 347 if (CurTok != ')') 348 return LogError("expected ')'"); 349 getNextToken(); // eat ). 350 return V; 351 } 352 353 /// identifierexpr 354 /// ::= identifier 355 /// ::= identifier '(' expression* ')' 356 static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 357 std::string IdName = IdentifierStr; 358 359 getNextToken(); // eat identifier. 360 361 if (CurTok != '(') // Simple variable ref. 362 return llvm::make_unique<VariableExprAST>(IdName); 363 364 // Call. 365 getNextToken(); // eat ( 366 std::vector<std::unique_ptr<ExprAST>> Args; 367 if (CurTok != ')') { 368 while (true) { 369 if (auto Arg = ParseExpression()) 370 Args.push_back(std::move(Arg)); 371 else 372 return nullptr; 373 374 if (CurTok == ')') 375 break; 376 377 if (CurTok != ',') 378 return LogError("Expected ')' or ',' in argument list"); 379 getNextToken(); 380 } 381 } 382 383 // Eat the ')'. 384 getNextToken(); 385 386 return llvm::make_unique<CallExprAST>(IdName, std::move(Args)); 387 } 388 389 /// ifexpr ::= 'if' expression 'then' expression 'else' expression 390 static std::unique_ptr<ExprAST> ParseIfExpr() { 391 getNextToken(); // eat the if. 392 393 // condition. 394 auto Cond = ParseExpression(); 395 if (!Cond) 396 return nullptr; 397 398 if (CurTok != tok_then) 399 return LogError("expected then"); 400 getNextToken(); // eat the then 401 402 auto Then = ParseExpression(); 403 if (!Then) 404 return nullptr; 405 406 if (CurTok != tok_else) 407 return LogError("expected else"); 408 409 getNextToken(); 410 411 auto Else = ParseExpression(); 412 if (!Else) 413 return nullptr; 414 415 return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then), 416 std::move(Else)); 417 } 418 419 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression 420 static std::unique_ptr<ExprAST> ParseForExpr() { 421 getNextToken(); // eat the for. 422 423 if (CurTok != tok_identifier) 424 return LogError("expected identifier after for"); 425 426 std::string IdName = IdentifierStr; 427 getNextToken(); // eat identifier. 428 429 if (CurTok != '=') 430 return LogError("expected '=' after for"); 431 getNextToken(); // eat '='. 432 433 auto Start = ParseExpression(); 434 if (!Start) 435 return nullptr; 436 if (CurTok != ',') 437 return LogError("expected ',' after for start value"); 438 getNextToken(); 439 440 auto End = ParseExpression(); 441 if (!End) 442 return nullptr; 443 444 // The step value is optional. 445 std::unique_ptr<ExprAST> Step; 446 if (CurTok == ',') { 447 getNextToken(); 448 Step = ParseExpression(); 449 if (!Step) 450 return nullptr; 451 } 452 453 if (CurTok != tok_in) 454 return LogError("expected 'in' after for"); 455 getNextToken(); // eat 'in'. 456 457 auto Body = ParseExpression(); 458 if (!Body) 459 return nullptr; 460 461 return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), 462 std::move(Step), std::move(Body)); 463 } 464 465 /// varexpr ::= 'var' identifier ('=' expression)? 466 // (',' identifier ('=' expression)?)* 'in' expression 467 static std::unique_ptr<ExprAST> ParseVarExpr() { 468 getNextToken(); // eat the var. 469 470 std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; 471 472 // At least one variable name is required. 473 if (CurTok != tok_identifier) 474 return LogError("expected identifier after var"); 475 476 while (true) { 477 std::string Name = IdentifierStr; 478 getNextToken(); // eat identifier. 479 480 // Read the optional initializer. 481 std::unique_ptr<ExprAST> Init = nullptr; 482 if (CurTok == '=') { 483 getNextToken(); // eat the '='. 484 485 Init = ParseExpression(); 486 if (!Init) 487 return nullptr; 488 } 489 490 VarNames.push_back(std::make_pair(Name, std::move(Init))); 491 492 // End of var list, exit loop. 493 if (CurTok != ',') 494 break; 495 getNextToken(); // eat the ','. 496 497 if (CurTok != tok_identifier) 498 return LogError("expected identifier list after var"); 499 } 500 501 // At this point, we have to have 'in'. 502 if (CurTok != tok_in) 503 return LogError("expected 'in' keyword after 'var'"); 504 getNextToken(); // eat 'in'. 505 506 auto Body = ParseExpression(); 507 if (!Body) 508 return nullptr; 509 510 return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); 511 } 512 513 /// primary 514 /// ::= identifierexpr 515 /// ::= numberexpr 516 /// ::= parenexpr 517 /// ::= ifexpr 518 /// ::= forexpr 519 /// ::= varexpr 520 static std::unique_ptr<ExprAST> ParsePrimary() { 521 switch (CurTok) { 522 default: 523 return LogError("unknown token when expecting an expression"); 524 case tok_identifier: 525 return ParseIdentifierExpr(); 526 case tok_number: 527 return ParseNumberExpr(); 528 case '(': 529 return ParseParenExpr(); 530 case tok_if: 531 return ParseIfExpr(); 532 case tok_for: 533 return ParseForExpr(); 534 case tok_var: 535 return ParseVarExpr(); 536 } 537 } 538 539 /// unary 540 /// ::= primary 541 /// ::= '!' unary 542 static std::unique_ptr<ExprAST> ParseUnary() { 543 // If the current token is not an operator, it must be a primary expr. 544 if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') 545 return ParsePrimary(); 546 547 // If this is a unary operator, read it. 548 int Opc = CurTok; 549 getNextToken(); 550 if (auto Operand = ParseUnary()) 551 return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand)); 552 return nullptr; 553 } 554 555 /// binoprhs 556 /// ::= ('+' unary)* 557 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 558 std::unique_ptr<ExprAST> LHS) { 559 // If this is a binop, find its precedence. 560 while (true) { 561 int TokPrec = GetTokPrecedence(); 562 563 // If this is a binop that binds at least as tightly as the current binop, 564 // consume it, otherwise we are done. 565 if (TokPrec < ExprPrec) 566 return LHS; 567 568 // Okay, we know this is a binop. 569 int BinOp = CurTok; 570 getNextToken(); // eat binop 571 572 // Parse the unary expression after the binary operator. 573 auto RHS = ParseUnary(); 574 if (!RHS) 575 return nullptr; 576 577 // If BinOp binds less tightly with RHS than the operator after RHS, let 578 // the pending operator take RHS as its LHS. 579 int NextPrec = GetTokPrecedence(); 580 if (TokPrec < NextPrec) { 581 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 582 if (!RHS) 583 return nullptr; 584 } 585 586 // Merge LHS/RHS. 587 LHS = 588 llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 589 } 590 } 591 592 /// expression 593 /// ::= unary binoprhs 594 /// 595 static std::unique_ptr<ExprAST> ParseExpression() { 596 auto LHS = ParseUnary(); 597 if (!LHS) 598 return nullptr; 599 600 return ParseBinOpRHS(0, std::move(LHS)); 601 } 602 603 /// prototype 604 /// ::= id '(' id* ')' 605 /// ::= binary LETTER number? (id, id) 606 /// ::= unary LETTER (id) 607 static std::unique_ptr<PrototypeAST> ParsePrototype() { 608 std::string FnName; 609 610 unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. 611 unsigned BinaryPrecedence = 30; 612 613 switch (CurTok) { 614 default: 615 return LogErrorP("Expected function name in prototype"); 616 case tok_identifier: 617 FnName = IdentifierStr; 618 Kind = 0; 619 getNextToken(); 620 break; 621 case tok_unary: 622 getNextToken(); 623 if (!isascii(CurTok)) 624 return LogErrorP("Expected unary operator"); 625 FnName = "unary"; 626 FnName += (char)CurTok; 627 Kind = 1; 628 getNextToken(); 629 break; 630 case tok_binary: 631 getNextToken(); 632 if (!isascii(CurTok)) 633 return LogErrorP("Expected binary operator"); 634 FnName = "binary"; 635 FnName += (char)CurTok; 636 Kind = 2; 637 getNextToken(); 638 639 // Read the precedence if present. 640 if (CurTok == tok_number) { 641 if (NumVal < 1 || NumVal > 100) 642 return LogErrorP("Invalid precedecnce: must be 1..100"); 643 BinaryPrecedence = (unsigned)NumVal; 644 getNextToken(); 645 } 646 break; 647 } 648 649 if (CurTok != '(') 650 return LogErrorP("Expected '(' in prototype"); 651 652 std::vector<std::string> ArgNames; 653 while (getNextToken() == tok_identifier) 654 ArgNames.push_back(IdentifierStr); 655 if (CurTok != ')') 656 return LogErrorP("Expected ')' in prototype"); 657 658 // success. 659 getNextToken(); // eat ')'. 660 661 // Verify right number of names for operator. 662 if (Kind && ArgNames.size() != Kind) 663 return LogErrorP("Invalid number of operands for operator"); 664 665 return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, 666 BinaryPrecedence); 667 } 668 669 /// definition ::= 'def' prototype expression 670 static std::unique_ptr<FunctionAST> ParseDefinition() { 671 getNextToken(); // eat def. 672 auto Proto = ParsePrototype(); 673 if (!Proto) 674 return nullptr; 675 676 if (auto E = ParseExpression()) 677 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 678 return nullptr; 679 } 680 681 /// toplevelexpr ::= expression 682 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 683 if (auto E = ParseExpression()) { 684 // Make an anonymous proto. 685 auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr", 686 std::vector<std::string>()); 687 return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 688 } 689 return nullptr; 690 } 691 692 /// external ::= 'extern' prototype 693 static std::unique_ptr<PrototypeAST> ParseExtern() { 694 getNextToken(); // eat extern. 695 return ParsePrototype(); 696 } 697 698 //===----------------------------------------------------------------------===// 699 // Code Generation 700 //===----------------------------------------------------------------------===// 701 702 static LLVMContext TheContext; 703 static IRBuilder<> Builder(TheContext); 704 static std::unique_ptr<Module> TheModule; 705 static std::map<std::string, AllocaInst *> NamedValues; 706 static std::unique_ptr<legacy::FunctionPassManager> TheFPM; 707 static std::unique_ptr<KaleidoscopeJIT> TheJIT; 708 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; 709 710 Value *LogErrorV(const char *Str) { 711 LogError(Str); 712 return nullptr; 713 } 714 715 Function *getFunction(std::string Name) { 716 // First, see if the function has already been added to the current module. 717 if (auto *F = TheModule->getFunction(Name)) 718 return F; 719 720 // If not, check whether we can codegen the declaration from some existing 721 // prototype. 722 auto FI = FunctionProtos.find(Name); 723 if (FI != FunctionProtos.end()) 724 return FI->second->codegen(); 725 726 // If no existing prototype exists, return null. 727 return nullptr; 728 } 729 730 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of 731 /// the function. This is used for mutable variables etc. 732 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, 733 const std::string &VarName) { 734 IRBuilder<> TmpB(&TheFunction->getEntryBlock(), 735 TheFunction->getEntryBlock().begin()); 736 return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); 737 } 738 739 Value *NumberExprAST::codegen() { 740 return ConstantFP::get(TheContext, APFloat(Val)); 741 } 742 743 Value *VariableExprAST::codegen() { 744 // Look this variable up in the function. 745 Value *V = NamedValues[Name]; 746 if (!V) 747 return LogErrorV("Unknown variable name"); 748 749 // Load the value. 750 return Builder.CreateLoad(V, Name.c_str()); 751 } 752 753 Value *UnaryExprAST::codegen() { 754 Value *OperandV = Operand->codegen(); 755 if (!OperandV) 756 return nullptr; 757 758 Function *F = getFunction(std::string("unary") + Opcode); 759 if (!F) 760 return LogErrorV("Unknown unary operator"); 761 762 return Builder.CreateCall(F, OperandV, "unop"); 763 } 764 765 Value *BinaryExprAST::codegen() { 766 // Special case '=' because we don't want to emit the LHS as an expression. 767 if (Op == '=') { 768 // Assignment requires the LHS to be an identifier. 769 // This assume we're building without RTTI because LLVM builds that way by 770 // default. If you build LLVM with RTTI this can be changed to a 771 // dynamic_cast for automatic error checking. 772 VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); 773 if (!LHSE) 774 return LogErrorV("destination of '=' must be a variable"); 775 // Codegen the RHS. 776 Value *Val = RHS->codegen(); 777 if (!Val) 778 return nullptr; 779 780 // Look up the name. 781 Value *Variable = NamedValues[LHSE->getName()]; 782 if (!Variable) 783 return LogErrorV("Unknown variable name"); 784 785 Builder.CreateStore(Val, Variable); 786 return Val; 787 } 788 789 Value *L = LHS->codegen(); 790 Value *R = RHS->codegen(); 791 if (!L || !R) 792 return nullptr; 793 794 switch (Op) { 795 case '+': 796 return Builder.CreateFAdd(L, R, "addtmp"); 797 case '-': 798 return Builder.CreateFSub(L, R, "subtmp"); 799 case '*': 800 return Builder.CreateFMul(L, R, "multmp"); 801 case '<': 802 L = Builder.CreateFCmpULT(L, R, "cmptmp"); 803 // Convert bool 0/1 to double 0.0 or 1.0 804 return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); 805 default: 806 break; 807 } 808 809 // If it wasn't a builtin binary operator, it must be a user defined one. Emit 810 // a call to it. 811 Function *F = getFunction(std::string("binary") + Op); 812 assert(F && "binary operator not found!"); 813 814 Value *Ops[] = {L, R}; 815 return Builder.CreateCall(F, Ops, "binop"); 816 } 817 818 Value *CallExprAST::codegen() { 819 // Look up the name in the global module table. 820 Function *CalleeF = getFunction(Callee); 821 if (!CalleeF) 822 return LogErrorV("Unknown function referenced"); 823 824 // If argument mismatch error. 825 if (CalleeF->arg_size() != Args.size()) 826 return LogErrorV("Incorrect # arguments passed"); 827 828 std::vector<Value *> ArgsV; 829 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 830 ArgsV.push_back(Args[i]->codegen()); 831 if (!ArgsV.back()) 832 return nullptr; 833 } 834 835 return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); 836 } 837 838 Value *IfExprAST::codegen() { 839 Value *CondV = Cond->codegen(); 840 if (!CondV) 841 return nullptr; 842 843 // Convert condition to a bool by comparing equal to 0.0. 844 CondV = Builder.CreateFCmpONE( 845 CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); 846 847 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 848 849 // Create blocks for the then and else cases. Insert the 'then' block at the 850 // end of the function. 851 BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); 852 BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); 853 BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); 854 855 Builder.CreateCondBr(CondV, ThenBB, ElseBB); 856 857 // Emit then value. 858 Builder.SetInsertPoint(ThenBB); 859 860 Value *ThenV = Then->codegen(); 861 if (!ThenV) 862 return nullptr; 863 864 Builder.CreateBr(MergeBB); 865 // Codegen of 'Then' can change the current block, update ThenBB for the PHI. 866 ThenBB = Builder.GetInsertBlock(); 867 868 // Emit else block. 869 TheFunction->getBasicBlockList().push_back(ElseBB); 870 Builder.SetInsertPoint(ElseBB); 871 872 Value *ElseV = Else->codegen(); 873 if (!ElseV) 874 return nullptr; 875 876 Builder.CreateBr(MergeBB); 877 // Codegen of 'Else' can change the current block, update ElseBB for the PHI. 878 ElseBB = Builder.GetInsertBlock(); 879 880 // Emit merge block. 881 TheFunction->getBasicBlockList().push_back(MergeBB); 882 Builder.SetInsertPoint(MergeBB); 883 PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); 884 885 PN->addIncoming(ThenV, ThenBB); 886 PN->addIncoming(ElseV, ElseBB); 887 return PN; 888 } 889 890 // Output for-loop as: 891 // var = alloca double 892 // ... 893 // start = startexpr 894 // store start -> var 895 // goto loop 896 // loop: 897 // ... 898 // bodyexpr 899 // ... 900 // loopend: 901 // step = stepexpr 902 // endcond = endexpr 903 // 904 // curvar = load var 905 // nextvar = curvar + step 906 // store nextvar -> var 907 // br endcond, loop, endloop 908 // outloop: 909 Value *ForExprAST::codegen() { 910 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 911 912 // Create an alloca for the variable in the entry block. 913 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 914 915 // Emit the start code first, without 'variable' in scope. 916 Value *StartVal = Start->codegen(); 917 if (!StartVal) 918 return nullptr; 919 920 // Store the value into the alloca. 921 Builder.CreateStore(StartVal, Alloca); 922 923 // Make the new basic block for the loop header, inserting after current 924 // block. 925 BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); 926 927 // Insert an explicit fall through from the current block to the LoopBB. 928 Builder.CreateBr(LoopBB); 929 930 // Start insertion in LoopBB. 931 Builder.SetInsertPoint(LoopBB); 932 933 // Within the loop, the variable is defined equal to the PHI node. If it 934 // shadows an existing variable, we have to restore it, so save it now. 935 AllocaInst *OldVal = NamedValues[VarName]; 936 NamedValues[VarName] = Alloca; 937 938 // Emit the body of the loop. This, like any other expr, can change the 939 // current BB. Note that we ignore the value computed by the body, but don't 940 // allow an error. 941 if (!Body->codegen()) 942 return nullptr; 943 944 // Emit the step value. 945 Value *StepVal = nullptr; 946 if (Step) { 947 StepVal = Step->codegen(); 948 if (!StepVal) 949 return nullptr; 950 } else { 951 // If not specified, use 1.0. 952 StepVal = ConstantFP::get(TheContext, APFloat(1.0)); 953 } 954 955 // Compute the end condition. 956 Value *EndCond = End->codegen(); 957 if (!EndCond) 958 return nullptr; 959 960 // Reload, increment, and restore the alloca. This handles the case where 961 // the body of the loop mutates the variable. 962 Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); 963 Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); 964 Builder.CreateStore(NextVar, Alloca); 965 966 // Convert condition to a bool by comparing equal to 0.0. 967 EndCond = Builder.CreateFCmpONE( 968 EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); 969 970 // Create the "after loop" block and insert it. 971 BasicBlock *AfterBB = 972 BasicBlock::Create(TheContext, "afterloop", TheFunction); 973 974 // Insert the conditional branch into the end of LoopEndBB. 975 Builder.CreateCondBr(EndCond, LoopBB, AfterBB); 976 977 // Any new code will be inserted in AfterBB. 978 Builder.SetInsertPoint(AfterBB); 979 980 // Restore the unshadowed variable. 981 if (OldVal) 982 NamedValues[VarName] = OldVal; 983 else 984 NamedValues.erase(VarName); 985 986 // for expr always returns 0.0. 987 return Constant::getNullValue(Type::getDoubleTy(TheContext)); 988 } 989 990 Value *VarExprAST::codegen() { 991 std::vector<AllocaInst *> OldBindings; 992 993 Function *TheFunction = Builder.GetInsertBlock()->getParent(); 994 995 // Register all variables and emit their initializer. 996 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { 997 const std::string &VarName = VarNames[i].first; 998 ExprAST *Init = VarNames[i].second.get(); 999 1000 // Emit the initializer before adding the variable to scope, this prevents 1001 // the initializer from referencing the variable itself, and permits stuff 1002 // like this: 1003 // var a = 1 in 1004 // var a = a in ... # refers to outer 'a'. 1005 Value *InitVal; 1006 if (Init) { 1007 InitVal = Init->codegen(); 1008 if (!InitVal) 1009 return nullptr; 1010 } else { // If not specified, use 0.0. 1011 InitVal = ConstantFP::get(TheContext, APFloat(0.0)); 1012 } 1013 1014 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); 1015 Builder.CreateStore(InitVal, Alloca); 1016 1017 // Remember the old variable binding so that we can restore the binding when 1018 // we unrecurse. 1019 OldBindings.push_back(NamedValues[VarName]); 1020 1021 // Remember this binding. 1022 NamedValues[VarName] = Alloca; 1023 } 1024 1025 // Codegen the body, now that all vars are in scope. 1026 Value *BodyVal = Body->codegen(); 1027 if (!BodyVal) 1028 return nullptr; 1029 1030 // Pop all our variables from scope. 1031 for (unsigned i = 0, e = VarNames.size(); i != e; ++i) 1032 NamedValues[VarNames[i].first] = OldBindings[i]; 1033 1034 // Return the body computation. 1035 return BodyVal; 1036 } 1037 1038 Function *PrototypeAST::codegen() { 1039 // Make the function type: double(double,double) etc. 1040 std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); 1041 FunctionType *FT = 1042 FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); 1043 1044 Function *F = 1045 Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); 1046 1047 // Set names for all arguments. 1048 unsigned Idx = 0; 1049 for (auto &Arg : F->args()) 1050 Arg.setName(Args[Idx++]); 1051 1052 return F; 1053 } 1054 1055 Function *FunctionAST::codegen() { 1056 // Transfer ownership of the prototype to the FunctionProtos map, but keep a 1057 // reference to it for use below. 1058 auto &P = *Proto; 1059 FunctionProtos[Proto->getName()] = std::move(Proto); 1060 Function *TheFunction = getFunction(P.getName()); 1061 if (!TheFunction) 1062 return nullptr; 1063 1064 // If this is an operator, install it. 1065 if (P.isBinaryOp()) 1066 BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); 1067 1068 // Create a new basic block to start insertion into. 1069 BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); 1070 Builder.SetInsertPoint(BB); 1071 1072 // Record the function arguments in the NamedValues map. 1073 NamedValues.clear(); 1074 for (auto &Arg : TheFunction->args()) { 1075 // Create an alloca for this variable. 1076 AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); 1077 1078 // Store the initial value into the alloca. 1079 Builder.CreateStore(&Arg, Alloca); 1080 1081 // Add arguments to variable symbol table. 1082 NamedValues[Arg.getName()] = Alloca; 1083 } 1084 1085 if (Value *RetVal = Body->codegen()) { 1086 // Finish off the function. 1087 Builder.CreateRet(RetVal); 1088 1089 // Validate the generated code, checking for consistency. 1090 verifyFunction(*TheFunction); 1091 1092 // Run the optimizer on the function. 1093 TheFPM->run(*TheFunction); 1094 1095 return TheFunction; 1096 } 1097 1098 // Error reading body, remove function. 1099 TheFunction->eraseFromParent(); 1100 1101 if (P.isBinaryOp()) 1102 BinopPrecedence.erase(Proto->getOperatorName()); 1103 return nullptr; 1104 } 1105 1106 //===----------------------------------------------------------------------===// 1107 // Top-Level parsing and JIT Driver 1108 //===----------------------------------------------------------------------===// 1109 1110 static void InitializeModuleAndPassManager() { 1111 // Open a new module. 1112 TheModule = llvm::make_unique<Module>("my cool jit", TheContext); 1113 TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); 1114 1115 // Create a new pass manager attached to it. 1116 TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get()); 1117 1118 // Do simple "peephole" optimizations and bit-twiddling optzns. 1119 TheFPM->add(createInstructionCombiningPass()); 1120 // Reassociate expressions. 1121 TheFPM->add(createReassociatePass()); 1122 // Eliminate Common SubExpressions. 1123 TheFPM->add(createGVNPass()); 1124 // Simplify the control flow graph (deleting unreachable blocks, etc). 1125 TheFPM->add(createCFGSimplificationPass()); 1126 1127 TheFPM->doInitialization(); 1128 } 1129 1130 static void HandleDefinition() { 1131 if (auto FnAST = ParseDefinition()) { 1132 if (auto *FnIR = FnAST->codegen()) { 1133 fprintf(stderr, "Read function definition:"); 1134 FnIR->dump(); 1135 TheJIT->addModule(std::move(TheModule)); 1136 InitializeModuleAndPassManager(); 1137 } 1138 } else { 1139 // Skip token for error recovery. 1140 getNextToken(); 1141 } 1142 } 1143 1144 static void HandleExtern() { 1145 if (auto ProtoAST = ParseExtern()) { 1146 if (auto *FnIR = ProtoAST->codegen()) { 1147 fprintf(stderr, "Read extern: "); 1148 FnIR->dump(); 1149 FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); 1150 } 1151 } else { 1152 // Skip token for error recovery. 1153 getNextToken(); 1154 } 1155 } 1156 1157 static void HandleTopLevelExpression() { 1158 // Evaluate a top-level expression into an anonymous function. 1159 if (auto FnAST = ParseTopLevelExpr()) { 1160 if (FnAST->codegen()) { 1161 // JIT the module containing the anonymous expression, keeping a handle so 1162 // we can free it later. 1163 auto H = TheJIT->addModule(std::move(TheModule)); 1164 InitializeModuleAndPassManager(); 1165 1166 // Search the JIT for the __anon_expr symbol. 1167 auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); 1168 assert(ExprSymbol && "Function not found"); 1169 1170 // Get the symbol's address and cast it to the right type (takes no 1171 // arguments, returns a double) so we can call it as a native function. 1172 double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); 1173 fprintf(stderr, "Evaluated to %f\n", FP()); 1174 1175 // Delete the anonymous expression module from the JIT. 1176 TheJIT->removeModule(H); 1177 } 1178 } else { 1179 // Skip token for error recovery. 1180 getNextToken(); 1181 } 1182 } 1183 1184 /// top ::= definition | external | expression | ';' 1185 static void MainLoop() { 1186 while (true) { 1187 fprintf(stderr, "ready> "); 1188 switch (CurTok) { 1189 case tok_eof: 1190 return; 1191 case ';': // ignore top-level semicolons. 1192 getNextToken(); 1193 break; 1194 case tok_def: 1195 HandleDefinition(); 1196 break; 1197 case tok_extern: 1198 HandleExtern(); 1199 break; 1200 default: 1201 HandleTopLevelExpression(); 1202 break; 1203 } 1204 } 1205 } 1206 1207 //===----------------------------------------------------------------------===// 1208 // "Library" functions that can be "extern'd" from user code. 1209 //===----------------------------------------------------------------------===// 1210 1211 /// putchard - putchar that takes a double and returns 0. 1212 extern "C" double putchard(double X) { 1213 fputc((char)X, stderr); 1214 return 0; 1215 } 1216 1217 /// printd - printf that takes a double prints it as "%f\n", returning 0. 1218 extern "C" double printd(double X) { 1219 fprintf(stderr, "%f\n", X); 1220 return 0; 1221 } 1222 1223 //===----------------------------------------------------------------------===// 1224 // Main driver code. 1225 //===----------------------------------------------------------------------===// 1226 1227 int main() { 1228 InitializeNativeTarget(); 1229 InitializeNativeTargetAsmPrinter(); 1230 InitializeNativeTargetAsmParser(); 1231 1232 // Install standard binary operators. 1233 // 1 is lowest precedence. 1234 BinopPrecedence['='] = 2; 1235 BinopPrecedence['<'] = 10; 1236 BinopPrecedence['+'] = 20; 1237 BinopPrecedence['-'] = 20; 1238 BinopPrecedence['*'] = 40; // highest. 1239 1240 // Prime the first token. 1241 fprintf(stderr, "ready> "); 1242 getNextToken(); 1243 1244 TheJIT = llvm::make_unique<KaleidoscopeJIT>(); 1245 1246 InitializeModuleAndPassManager(); 1247 1248 // Run the main "interpreter loop" now. 1249 MainLoop(); 1250 1251 return 0; 1252 } 1253