1 #include "llvm/ADT/APFloat.h"
2 #include "llvm/ADT/STLExtras.h"
3 #include "llvm/IR/BasicBlock.h"
4 #include "llvm/IR/Constants.h"
5 #include "llvm/IR/DerivedTypes.h"
6 #include "llvm/IR/Function.h"
7 #include "llvm/IR/Instructions.h"
8 #include "llvm/IR/IRBuilder.h"
9 #include "llvm/IR/LLVMContext.h"
10 #include "llvm/IR/LegacyPassManager.h"
11 #include "llvm/IR/Module.h"
12 #include "llvm/IR/Type.h"
13 #include "llvm/IR/Verifier.h"
14 #include "llvm/Support/TargetSelect.h"
15 #include "llvm/Target/TargetMachine.h"
16 #include "llvm/Transforms/Scalar.h"
17 #include "llvm/Transforms/Scalar/GVN.h"
18 #include "../include/KaleidoscopeJIT.h"
19 #include <algorithm>
20 #include <cassert>
21 #include <cctype>
22 #include <cstdint>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <map>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 using namespace llvm;
32 using namespace llvm::orc;
33 
34 //===----------------------------------------------------------------------===//
35 // Lexer
36 //===----------------------------------------------------------------------===//
37 
38 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
39 // of these for known things.
40 enum Token {
41   tok_eof = -1,
42 
43   // commands
44   tok_def = -2,
45   tok_extern = -3,
46 
47   // primary
48   tok_identifier = -4,
49   tok_number = -5,
50 
51   // control
52   tok_if = -6,
53   tok_then = -7,
54   tok_else = -8,
55   tok_for = -9,
56   tok_in = -10,
57 
58   // operators
59   tok_binary = -11,
60   tok_unary = -12,
61 
62   // var definition
63   tok_var = -13
64 };
65 
66 static std::string IdentifierStr; // Filled in if tok_identifier
67 static double NumVal;             // Filled in if tok_number
68 
69 /// gettok - Return the next token from standard input.
70 static int gettok() {
71   static int LastChar = ' ';
72 
73   // Skip any whitespace.
74   while (isspace(LastChar))
75     LastChar = getchar();
76 
77   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
78     IdentifierStr = LastChar;
79     while (isalnum((LastChar = getchar())))
80       IdentifierStr += LastChar;
81 
82     if (IdentifierStr == "def")
83       return tok_def;
84     if (IdentifierStr == "extern")
85       return tok_extern;
86     if (IdentifierStr == "if")
87       return tok_if;
88     if (IdentifierStr == "then")
89       return tok_then;
90     if (IdentifierStr == "else")
91       return tok_else;
92     if (IdentifierStr == "for")
93       return tok_for;
94     if (IdentifierStr == "in")
95       return tok_in;
96     if (IdentifierStr == "binary")
97       return tok_binary;
98     if (IdentifierStr == "unary")
99       return tok_unary;
100     if (IdentifierStr == "var")
101       return tok_var;
102     return tok_identifier;
103   }
104 
105   if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
106     std::string NumStr;
107     do {
108       NumStr += LastChar;
109       LastChar = getchar();
110     } while (isdigit(LastChar) || LastChar == '.');
111 
112     NumVal = strtod(NumStr.c_str(), nullptr);
113     return tok_number;
114   }
115 
116   if (LastChar == '#') {
117     // Comment until end of line.
118     do
119       LastChar = getchar();
120     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
121 
122     if (LastChar != EOF)
123       return gettok();
124   }
125 
126   // Check for end of file.  Don't eat the EOF.
127   if (LastChar == EOF)
128     return tok_eof;
129 
130   // Otherwise, just return the character as its ascii value.
131   int ThisChar = LastChar;
132   LastChar = getchar();
133   return ThisChar;
134 }
135 
136 //===----------------------------------------------------------------------===//
137 // Abstract Syntax Tree (aka Parse Tree)
138 //===----------------------------------------------------------------------===//
139 
140 namespace {
141 
142 /// ExprAST - Base class for all expression nodes.
143 class ExprAST {
144 public:
145   virtual ~ExprAST() = default;
146 
147   virtual Value *codegen() = 0;
148 };
149 
150 /// NumberExprAST - Expression class for numeric literals like "1.0".
151 class NumberExprAST : public ExprAST {
152   double Val;
153 
154 public:
155   NumberExprAST(double Val) : Val(Val) {}
156 
157   Value *codegen() override;
158 };
159 
160 /// VariableExprAST - Expression class for referencing a variable, like "a".
161 class VariableExprAST : public ExprAST {
162   std::string Name;
163 
164 public:
165   VariableExprAST(const std::string &Name) : Name(Name) {}
166 
167   Value *codegen() override;
168   const std::string &getName() const { return Name; }
169 };
170 
171 /// UnaryExprAST - Expression class for a unary operator.
172 class UnaryExprAST : public ExprAST {
173   char Opcode;
174   std::unique_ptr<ExprAST> Operand;
175 
176 public:
177   UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
178       : Opcode(Opcode), Operand(std::move(Operand)) {}
179 
180   Value *codegen() override;
181 };
182 
183 /// BinaryExprAST - Expression class for a binary operator.
184 class BinaryExprAST : public ExprAST {
185   char Op;
186   std::unique_ptr<ExprAST> LHS, RHS;
187 
188 public:
189   BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
190                 std::unique_ptr<ExprAST> RHS)
191       : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
192 
193   Value *codegen() override;
194 };
195 
196 /// CallExprAST - Expression class for function calls.
197 class CallExprAST : public ExprAST {
198   std::string Callee;
199   std::vector<std::unique_ptr<ExprAST>> Args;
200 
201 public:
202   CallExprAST(const std::string &Callee,
203               std::vector<std::unique_ptr<ExprAST>> Args)
204       : Callee(Callee), Args(std::move(Args)) {}
205 
206   Value *codegen() override;
207 };
208 
209 /// IfExprAST - Expression class for if/then/else.
210 class IfExprAST : public ExprAST {
211   std::unique_ptr<ExprAST> Cond, Then, Else;
212 
213 public:
214   IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
215             std::unique_ptr<ExprAST> Else)
216       : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
217 
218   Value *codegen() override;
219 };
220 
221 /// ForExprAST - Expression class for for/in.
222 class ForExprAST : public ExprAST {
223   std::string VarName;
224   std::unique_ptr<ExprAST> Start, End, Step, Body;
225 
226 public:
227   ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
228              std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
229              std::unique_ptr<ExprAST> Body)
230       : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
231         Step(std::move(Step)), Body(std::move(Body)) {}
232 
233   Value *codegen() override;
234 };
235 
236 /// VarExprAST - Expression class for var/in
237 class VarExprAST : public ExprAST {
238   std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
239   std::unique_ptr<ExprAST> Body;
240 
241 public:
242   VarExprAST(
243       std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
244       std::unique_ptr<ExprAST> Body)
245       : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
246 
247   Value *codegen() override;
248 };
249 
250 /// PrototypeAST - This class represents the "prototype" for a function,
251 /// which captures its name, and its argument names (thus implicitly the number
252 /// of arguments the function takes), as well as if it is an operator.
253 class PrototypeAST {
254   std::string Name;
255   std::vector<std::string> Args;
256   bool IsOperator;
257   unsigned Precedence; // Precedence if a binary op.
258 
259 public:
260   PrototypeAST(const std::string &Name, std::vector<std::string> Args,
261                bool IsOperator = false, unsigned Prec = 0)
262       : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
263         Precedence(Prec) {}
264 
265   Function *codegen();
266   const std::string &getName() const { return Name; }
267 
268   bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
269   bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
270 
271   char getOperatorName() const {
272     assert(isUnaryOp() || isBinaryOp());
273     return Name[Name.size() - 1];
274   }
275 
276   unsigned getBinaryPrecedence() const { return Precedence; }
277 };
278 
279 /// FunctionAST - This class represents a function definition itself.
280 class FunctionAST {
281   std::unique_ptr<PrototypeAST> Proto;
282   std::unique_ptr<ExprAST> Body;
283 
284 public:
285   FunctionAST(std::unique_ptr<PrototypeAST> Proto,
286               std::unique_ptr<ExprAST> Body)
287       : Proto(std::move(Proto)), Body(std::move(Body)) {}
288 
289   Function *codegen();
290 };
291 
292 } // end anonymous namespace
293 
294 //===----------------------------------------------------------------------===//
295 // Parser
296 //===----------------------------------------------------------------------===//
297 
298 /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
299 /// token the parser is looking at.  getNextToken reads another token from the
300 /// lexer and updates CurTok with its results.
301 static int CurTok;
302 static int getNextToken() { return CurTok = gettok(); }
303 
304 /// BinopPrecedence - This holds the precedence for each binary operator that is
305 /// defined.
306 static std::map<char, int> BinopPrecedence;
307 
308 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
309 static int GetTokPrecedence() {
310   if (!isascii(CurTok))
311     return -1;
312 
313   // Make sure it's a declared binop.
314   int TokPrec = BinopPrecedence[CurTok];
315   if (TokPrec <= 0)
316     return -1;
317   return TokPrec;
318 }
319 
320 /// LogError* - These are little helper functions for error handling.
321 std::unique_ptr<ExprAST> LogError(const char *Str) {
322   fprintf(stderr, "Error: %s\n", Str);
323   return nullptr;
324 }
325 
326 std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
327   LogError(Str);
328   return nullptr;
329 }
330 
331 static std::unique_ptr<ExprAST> ParseExpression();
332 
333 /// numberexpr ::= number
334 static std::unique_ptr<ExprAST> ParseNumberExpr() {
335   auto Result = llvm::make_unique<NumberExprAST>(NumVal);
336   getNextToken(); // consume the number
337   return std::move(Result);
338 }
339 
340 /// parenexpr ::= '(' expression ')'
341 static std::unique_ptr<ExprAST> ParseParenExpr() {
342   getNextToken(); // eat (.
343   auto V = ParseExpression();
344   if (!V)
345     return nullptr;
346 
347   if (CurTok != ')')
348     return LogError("expected ')'");
349   getNextToken(); // eat ).
350   return V;
351 }
352 
353 /// identifierexpr
354 ///   ::= identifier
355 ///   ::= identifier '(' expression* ')'
356 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
357   std::string IdName = IdentifierStr;
358 
359   getNextToken(); // eat identifier.
360 
361   if (CurTok != '(') // Simple variable ref.
362     return llvm::make_unique<VariableExprAST>(IdName);
363 
364   // Call.
365   getNextToken(); // eat (
366   std::vector<std::unique_ptr<ExprAST>> Args;
367   if (CurTok != ')') {
368     while (true) {
369       if (auto Arg = ParseExpression())
370         Args.push_back(std::move(Arg));
371       else
372         return nullptr;
373 
374       if (CurTok == ')')
375         break;
376 
377       if (CurTok != ',')
378         return LogError("Expected ')' or ',' in argument list");
379       getNextToken();
380     }
381   }
382 
383   // Eat the ')'.
384   getNextToken();
385 
386   return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
387 }
388 
389 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
390 static std::unique_ptr<ExprAST> ParseIfExpr() {
391   getNextToken(); // eat the if.
392 
393   // condition.
394   auto Cond = ParseExpression();
395   if (!Cond)
396     return nullptr;
397 
398   if (CurTok != tok_then)
399     return LogError("expected then");
400   getNextToken(); // eat the then
401 
402   auto Then = ParseExpression();
403   if (!Then)
404     return nullptr;
405 
406   if (CurTok != tok_else)
407     return LogError("expected else");
408 
409   getNextToken();
410 
411   auto Else = ParseExpression();
412   if (!Else)
413     return nullptr;
414 
415   return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
416                                       std::move(Else));
417 }
418 
419 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
420 static std::unique_ptr<ExprAST> ParseForExpr() {
421   getNextToken(); // eat the for.
422 
423   if (CurTok != tok_identifier)
424     return LogError("expected identifier after for");
425 
426   std::string IdName = IdentifierStr;
427   getNextToken(); // eat identifier.
428 
429   if (CurTok != '=')
430     return LogError("expected '=' after for");
431   getNextToken(); // eat '='.
432 
433   auto Start = ParseExpression();
434   if (!Start)
435     return nullptr;
436   if (CurTok != ',')
437     return LogError("expected ',' after for start value");
438   getNextToken();
439 
440   auto End = ParseExpression();
441   if (!End)
442     return nullptr;
443 
444   // The step value is optional.
445   std::unique_ptr<ExprAST> Step;
446   if (CurTok == ',') {
447     getNextToken();
448     Step = ParseExpression();
449     if (!Step)
450       return nullptr;
451   }
452 
453   if (CurTok != tok_in)
454     return LogError("expected 'in' after for");
455   getNextToken(); // eat 'in'.
456 
457   auto Body = ParseExpression();
458   if (!Body)
459     return nullptr;
460 
461   return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
462                                        std::move(Step), std::move(Body));
463 }
464 
465 /// varexpr ::= 'var' identifier ('=' expression)?
466 //                    (',' identifier ('=' expression)?)* 'in' expression
467 static std::unique_ptr<ExprAST> ParseVarExpr() {
468   getNextToken(); // eat the var.
469 
470   std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
471 
472   // At least one variable name is required.
473   if (CurTok != tok_identifier)
474     return LogError("expected identifier after var");
475 
476   while (true) {
477     std::string Name = IdentifierStr;
478     getNextToken(); // eat identifier.
479 
480     // Read the optional initializer.
481     std::unique_ptr<ExprAST> Init = nullptr;
482     if (CurTok == '=') {
483       getNextToken(); // eat the '='.
484 
485       Init = ParseExpression();
486       if (!Init)
487         return nullptr;
488     }
489 
490     VarNames.push_back(std::make_pair(Name, std::move(Init)));
491 
492     // End of var list, exit loop.
493     if (CurTok != ',')
494       break;
495     getNextToken(); // eat the ','.
496 
497     if (CurTok != tok_identifier)
498       return LogError("expected identifier list after var");
499   }
500 
501   // At this point, we have to have 'in'.
502   if (CurTok != tok_in)
503     return LogError("expected 'in' keyword after 'var'");
504   getNextToken(); // eat 'in'.
505 
506   auto Body = ParseExpression();
507   if (!Body)
508     return nullptr;
509 
510   return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
511 }
512 
513 /// primary
514 ///   ::= identifierexpr
515 ///   ::= numberexpr
516 ///   ::= parenexpr
517 ///   ::= ifexpr
518 ///   ::= forexpr
519 ///   ::= varexpr
520 static std::unique_ptr<ExprAST> ParsePrimary() {
521   switch (CurTok) {
522   default:
523     return LogError("unknown token when expecting an expression");
524   case tok_identifier:
525     return ParseIdentifierExpr();
526   case tok_number:
527     return ParseNumberExpr();
528   case '(':
529     return ParseParenExpr();
530   case tok_if:
531     return ParseIfExpr();
532   case tok_for:
533     return ParseForExpr();
534   case tok_var:
535     return ParseVarExpr();
536   }
537 }
538 
539 /// unary
540 ///   ::= primary
541 ///   ::= '!' unary
542 static std::unique_ptr<ExprAST> ParseUnary() {
543   // If the current token is not an operator, it must be a primary expr.
544   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
545     return ParsePrimary();
546 
547   // If this is a unary operator, read it.
548   int Opc = CurTok;
549   getNextToken();
550   if (auto Operand = ParseUnary())
551     return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
552   return nullptr;
553 }
554 
555 /// binoprhs
556 ///   ::= ('+' unary)*
557 static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
558                                               std::unique_ptr<ExprAST> LHS) {
559   // If this is a binop, find its precedence.
560   while (true) {
561     int TokPrec = GetTokPrecedence();
562 
563     // If this is a binop that binds at least as tightly as the current binop,
564     // consume it, otherwise we are done.
565     if (TokPrec < ExprPrec)
566       return LHS;
567 
568     // Okay, we know this is a binop.
569     int BinOp = CurTok;
570     getNextToken(); // eat binop
571 
572     // Parse the unary expression after the binary operator.
573     auto RHS = ParseUnary();
574     if (!RHS)
575       return nullptr;
576 
577     // If BinOp binds less tightly with RHS than the operator after RHS, let
578     // the pending operator take RHS as its LHS.
579     int NextPrec = GetTokPrecedence();
580     if (TokPrec < NextPrec) {
581       RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
582       if (!RHS)
583         return nullptr;
584     }
585 
586     // Merge LHS/RHS.
587     LHS =
588         llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
589   }
590 }
591 
592 /// expression
593 ///   ::= unary binoprhs
594 ///
595 static std::unique_ptr<ExprAST> ParseExpression() {
596   auto LHS = ParseUnary();
597   if (!LHS)
598     return nullptr;
599 
600   return ParseBinOpRHS(0, std::move(LHS));
601 }
602 
603 /// prototype
604 ///   ::= id '(' id* ')'
605 ///   ::= binary LETTER number? (id, id)
606 ///   ::= unary LETTER (id)
607 static std::unique_ptr<PrototypeAST> ParsePrototype() {
608   std::string FnName;
609 
610   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
611   unsigned BinaryPrecedence = 30;
612 
613   switch (CurTok) {
614   default:
615     return LogErrorP("Expected function name in prototype");
616   case tok_identifier:
617     FnName = IdentifierStr;
618     Kind = 0;
619     getNextToken();
620     break;
621   case tok_unary:
622     getNextToken();
623     if (!isascii(CurTok))
624       return LogErrorP("Expected unary operator");
625     FnName = "unary";
626     FnName += (char)CurTok;
627     Kind = 1;
628     getNextToken();
629     break;
630   case tok_binary:
631     getNextToken();
632     if (!isascii(CurTok))
633       return LogErrorP("Expected binary operator");
634     FnName = "binary";
635     FnName += (char)CurTok;
636     Kind = 2;
637     getNextToken();
638 
639     // Read the precedence if present.
640     if (CurTok == tok_number) {
641       if (NumVal < 1 || NumVal > 100)
642         return LogErrorP("Invalid precedecnce: must be 1..100");
643       BinaryPrecedence = (unsigned)NumVal;
644       getNextToken();
645     }
646     break;
647   }
648 
649   if (CurTok != '(')
650     return LogErrorP("Expected '(' in prototype");
651 
652   std::vector<std::string> ArgNames;
653   while (getNextToken() == tok_identifier)
654     ArgNames.push_back(IdentifierStr);
655   if (CurTok != ')')
656     return LogErrorP("Expected ')' in prototype");
657 
658   // success.
659   getNextToken(); // eat ')'.
660 
661   // Verify right number of names for operator.
662   if (Kind && ArgNames.size() != Kind)
663     return LogErrorP("Invalid number of operands for operator");
664 
665   return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
666                                          BinaryPrecedence);
667 }
668 
669 /// definition ::= 'def' prototype expression
670 static std::unique_ptr<FunctionAST> ParseDefinition() {
671   getNextToken(); // eat def.
672   auto Proto = ParsePrototype();
673   if (!Proto)
674     return nullptr;
675 
676   if (auto E = ParseExpression())
677     return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
678   return nullptr;
679 }
680 
681 /// toplevelexpr ::= expression
682 static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
683   if (auto E = ParseExpression()) {
684     // Make an anonymous proto.
685     auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
686                                                  std::vector<std::string>());
687     return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
688   }
689   return nullptr;
690 }
691 
692 /// external ::= 'extern' prototype
693 static std::unique_ptr<PrototypeAST> ParseExtern() {
694   getNextToken(); // eat extern.
695   return ParsePrototype();
696 }
697 
698 //===----------------------------------------------------------------------===//
699 // Code Generation
700 //===----------------------------------------------------------------------===//
701 
702 static LLVMContext TheContext;
703 static IRBuilder<> Builder(TheContext);
704 static std::unique_ptr<Module> TheModule;
705 static std::map<std::string, AllocaInst *> NamedValues;
706 static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
707 static std::unique_ptr<KaleidoscopeJIT> TheJIT;
708 static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
709 
710 Value *LogErrorV(const char *Str) {
711   LogError(Str);
712   return nullptr;
713 }
714 
715 Function *getFunction(std::string Name) {
716   // First, see if the function has already been added to the current module.
717   if (auto *F = TheModule->getFunction(Name))
718     return F;
719 
720   // If not, check whether we can codegen the declaration from some existing
721   // prototype.
722   auto FI = FunctionProtos.find(Name);
723   if (FI != FunctionProtos.end())
724     return FI->second->codegen();
725 
726   // If no existing prototype exists, return null.
727   return nullptr;
728 }
729 
730 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
731 /// the function.  This is used for mutable variables etc.
732 static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
733                                           const std::string &VarName) {
734   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
735                    TheFunction->getEntryBlock().begin());
736   return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName);
737 }
738 
739 Value *NumberExprAST::codegen() {
740   return ConstantFP::get(TheContext, APFloat(Val));
741 }
742 
743 Value *VariableExprAST::codegen() {
744   // Look this variable up in the function.
745   Value *V = NamedValues[Name];
746   if (!V)
747     return LogErrorV("Unknown variable name");
748 
749   // Load the value.
750   return Builder.CreateLoad(V, Name.c_str());
751 }
752 
753 Value *UnaryExprAST::codegen() {
754   Value *OperandV = Operand->codegen();
755   if (!OperandV)
756     return nullptr;
757 
758   Function *F = getFunction(std::string("unary") + Opcode);
759   if (!F)
760     return LogErrorV("Unknown unary operator");
761 
762   return Builder.CreateCall(F, OperandV, "unop");
763 }
764 
765 Value *BinaryExprAST::codegen() {
766   // Special case '=' because we don't want to emit the LHS as an expression.
767   if (Op == '=') {
768     // Assignment requires the LHS to be an identifier.
769     // This assume we're building without RTTI because LLVM builds that way by
770     // default.  If you build LLVM with RTTI this can be changed to a
771     // dynamic_cast for automatic error checking.
772     VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
773     if (!LHSE)
774       return LogErrorV("destination of '=' must be a variable");
775     // Codegen the RHS.
776     Value *Val = RHS->codegen();
777     if (!Val)
778       return nullptr;
779 
780     // Look up the name.
781     Value *Variable = NamedValues[LHSE->getName()];
782     if (!Variable)
783       return LogErrorV("Unknown variable name");
784 
785     Builder.CreateStore(Val, Variable);
786     return Val;
787   }
788 
789   Value *L = LHS->codegen();
790   Value *R = RHS->codegen();
791   if (!L || !R)
792     return nullptr;
793 
794   switch (Op) {
795   case '+':
796     return Builder.CreateFAdd(L, R, "addtmp");
797   case '-':
798     return Builder.CreateFSub(L, R, "subtmp");
799   case '*':
800     return Builder.CreateFMul(L, R, "multmp");
801   case '<':
802     L = Builder.CreateFCmpULT(L, R, "cmptmp");
803     // Convert bool 0/1 to double 0.0 or 1.0
804     return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp");
805   default:
806     break;
807   }
808 
809   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
810   // a call to it.
811   Function *F = getFunction(std::string("binary") + Op);
812   assert(F && "binary operator not found!");
813 
814   Value *Ops[] = {L, R};
815   return Builder.CreateCall(F, Ops, "binop");
816 }
817 
818 Value *CallExprAST::codegen() {
819   // Look up the name in the global module table.
820   Function *CalleeF = getFunction(Callee);
821   if (!CalleeF)
822     return LogErrorV("Unknown function referenced");
823 
824   // If argument mismatch error.
825   if (CalleeF->arg_size() != Args.size())
826     return LogErrorV("Incorrect # arguments passed");
827 
828   std::vector<Value *> ArgsV;
829   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
830     ArgsV.push_back(Args[i]->codegen());
831     if (!ArgsV.back())
832       return nullptr;
833   }
834 
835   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
836 }
837 
838 Value *IfExprAST::codegen() {
839   Value *CondV = Cond->codegen();
840   if (!CondV)
841     return nullptr;
842 
843   // Convert condition to a bool by comparing equal to 0.0.
844   CondV = Builder.CreateFCmpONE(
845       CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
846 
847   Function *TheFunction = Builder.GetInsertBlock()->getParent();
848 
849   // Create blocks for the then and else cases.  Insert the 'then' block at the
850   // end of the function.
851   BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction);
852   BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else");
853   BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont");
854 
855   Builder.CreateCondBr(CondV, ThenBB, ElseBB);
856 
857   // Emit then value.
858   Builder.SetInsertPoint(ThenBB);
859 
860   Value *ThenV = Then->codegen();
861   if (!ThenV)
862     return nullptr;
863 
864   Builder.CreateBr(MergeBB);
865   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
866   ThenBB = Builder.GetInsertBlock();
867 
868   // Emit else block.
869   TheFunction->getBasicBlockList().push_back(ElseBB);
870   Builder.SetInsertPoint(ElseBB);
871 
872   Value *ElseV = Else->codegen();
873   if (!ElseV)
874     return nullptr;
875 
876   Builder.CreateBr(MergeBB);
877   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
878   ElseBB = Builder.GetInsertBlock();
879 
880   // Emit merge block.
881   TheFunction->getBasicBlockList().push_back(MergeBB);
882   Builder.SetInsertPoint(MergeBB);
883   PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp");
884 
885   PN->addIncoming(ThenV, ThenBB);
886   PN->addIncoming(ElseV, ElseBB);
887   return PN;
888 }
889 
890 // Output for-loop as:
891 //   var = alloca double
892 //   ...
893 //   start = startexpr
894 //   store start -> var
895 //   goto loop
896 // loop:
897 //   ...
898 //   bodyexpr
899 //   ...
900 // loopend:
901 //   step = stepexpr
902 //   endcond = endexpr
903 //
904 //   curvar = load var
905 //   nextvar = curvar + step
906 //   store nextvar -> var
907 //   br endcond, loop, endloop
908 // outloop:
909 Value *ForExprAST::codegen() {
910   Function *TheFunction = Builder.GetInsertBlock()->getParent();
911 
912   // Create an alloca for the variable in the entry block.
913   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
914 
915   // Emit the start code first, without 'variable' in scope.
916   Value *StartVal = Start->codegen();
917   if (!StartVal)
918     return nullptr;
919 
920   // Store the value into the alloca.
921   Builder.CreateStore(StartVal, Alloca);
922 
923   // Make the new basic block for the loop header, inserting after current
924   // block.
925   BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction);
926 
927   // Insert an explicit fall through from the current block to the LoopBB.
928   Builder.CreateBr(LoopBB);
929 
930   // Start insertion in LoopBB.
931   Builder.SetInsertPoint(LoopBB);
932 
933   // Within the loop, the variable is defined equal to the PHI node.  If it
934   // shadows an existing variable, we have to restore it, so save it now.
935   AllocaInst *OldVal = NamedValues[VarName];
936   NamedValues[VarName] = Alloca;
937 
938   // Emit the body of the loop.  This, like any other expr, can change the
939   // current BB.  Note that we ignore the value computed by the body, but don't
940   // allow an error.
941   if (!Body->codegen())
942     return nullptr;
943 
944   // Emit the step value.
945   Value *StepVal = nullptr;
946   if (Step) {
947     StepVal = Step->codegen();
948     if (!StepVal)
949       return nullptr;
950   } else {
951     // If not specified, use 1.0.
952     StepVal = ConstantFP::get(TheContext, APFloat(1.0));
953   }
954 
955   // Compute the end condition.
956   Value *EndCond = End->codegen();
957   if (!EndCond)
958     return nullptr;
959 
960   // Reload, increment, and restore the alloca.  This handles the case where
961   // the body of the loop mutates the variable.
962   Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
963   Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
964   Builder.CreateStore(NextVar, Alloca);
965 
966   // Convert condition to a bool by comparing equal to 0.0.
967   EndCond = Builder.CreateFCmpONE(
968       EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
969 
970   // Create the "after loop" block and insert it.
971   BasicBlock *AfterBB =
972       BasicBlock::Create(TheContext, "afterloop", TheFunction);
973 
974   // Insert the conditional branch into the end of LoopEndBB.
975   Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
976 
977   // Any new code will be inserted in AfterBB.
978   Builder.SetInsertPoint(AfterBB);
979 
980   // Restore the unshadowed variable.
981   if (OldVal)
982     NamedValues[VarName] = OldVal;
983   else
984     NamedValues.erase(VarName);
985 
986   // for expr always returns 0.0.
987   return Constant::getNullValue(Type::getDoubleTy(TheContext));
988 }
989 
990 Value *VarExprAST::codegen() {
991   std::vector<AllocaInst *> OldBindings;
992 
993   Function *TheFunction = Builder.GetInsertBlock()->getParent();
994 
995   // Register all variables and emit their initializer.
996   for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
997     const std::string &VarName = VarNames[i].first;
998     ExprAST *Init = VarNames[i].second.get();
999 
1000     // Emit the initializer before adding the variable to scope, this prevents
1001     // the initializer from referencing the variable itself, and permits stuff
1002     // like this:
1003     //  var a = 1 in
1004     //    var a = a in ...   # refers to outer 'a'.
1005     Value *InitVal;
1006     if (Init) {
1007       InitVal = Init->codegen();
1008       if (!InitVal)
1009         return nullptr;
1010     } else { // If not specified, use 0.0.
1011       InitVal = ConstantFP::get(TheContext, APFloat(0.0));
1012     }
1013 
1014     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
1015     Builder.CreateStore(InitVal, Alloca);
1016 
1017     // Remember the old variable binding so that we can restore the binding when
1018     // we unrecurse.
1019     OldBindings.push_back(NamedValues[VarName]);
1020 
1021     // Remember this binding.
1022     NamedValues[VarName] = Alloca;
1023   }
1024 
1025   // Codegen the body, now that all vars are in scope.
1026   Value *BodyVal = Body->codegen();
1027   if (!BodyVal)
1028     return nullptr;
1029 
1030   // Pop all our variables from scope.
1031   for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
1032     NamedValues[VarNames[i].first] = OldBindings[i];
1033 
1034   // Return the body computation.
1035   return BodyVal;
1036 }
1037 
1038 Function *PrototypeAST::codegen() {
1039   // Make the function type:  double(double,double) etc.
1040   std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext));
1041   FunctionType *FT =
1042       FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false);
1043 
1044   Function *F =
1045       Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
1046 
1047   // Set names for all arguments.
1048   unsigned Idx = 0;
1049   for (auto &Arg : F->args())
1050     Arg.setName(Args[Idx++]);
1051 
1052   return F;
1053 }
1054 
1055 Function *FunctionAST::codegen() {
1056   // Transfer ownership of the prototype to the FunctionProtos map, but keep a
1057   // reference to it for use below.
1058   auto &P = *Proto;
1059   FunctionProtos[Proto->getName()] = std::move(Proto);
1060   Function *TheFunction = getFunction(P.getName());
1061   if (!TheFunction)
1062     return nullptr;
1063 
1064   // If this is an operator, install it.
1065   if (P.isBinaryOp())
1066     BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
1067 
1068   // Create a new basic block to start insertion into.
1069   BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
1070   Builder.SetInsertPoint(BB);
1071 
1072   // Record the function arguments in the NamedValues map.
1073   NamedValues.clear();
1074   for (auto &Arg : TheFunction->args()) {
1075     // Create an alloca for this variable.
1076     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
1077 
1078     // Store the initial value into the alloca.
1079     Builder.CreateStore(&Arg, Alloca);
1080 
1081     // Add arguments to variable symbol table.
1082     NamedValues[Arg.getName()] = Alloca;
1083   }
1084 
1085   if (Value *RetVal = Body->codegen()) {
1086     // Finish off the function.
1087     Builder.CreateRet(RetVal);
1088 
1089     // Validate the generated code, checking for consistency.
1090     verifyFunction(*TheFunction);
1091 
1092     // Run the optimizer on the function.
1093     TheFPM->run(*TheFunction);
1094 
1095     return TheFunction;
1096   }
1097 
1098   // Error reading body, remove function.
1099   TheFunction->eraseFromParent();
1100 
1101   if (P.isBinaryOp())
1102     BinopPrecedence.erase(Proto->getOperatorName());
1103   return nullptr;
1104 }
1105 
1106 //===----------------------------------------------------------------------===//
1107 // Top-Level parsing and JIT Driver
1108 //===----------------------------------------------------------------------===//
1109 
1110 static void InitializeModuleAndPassManager() {
1111   // Open a new module.
1112   TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
1113   TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
1114 
1115   // Create a new pass manager attached to it.
1116   TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
1117 
1118   // Do simple "peephole" optimizations and bit-twiddling optzns.
1119   TheFPM->add(createInstructionCombiningPass());
1120   // Reassociate expressions.
1121   TheFPM->add(createReassociatePass());
1122   // Eliminate Common SubExpressions.
1123   TheFPM->add(createGVNPass());
1124   // Simplify the control flow graph (deleting unreachable blocks, etc).
1125   TheFPM->add(createCFGSimplificationPass());
1126 
1127   TheFPM->doInitialization();
1128 }
1129 
1130 static void HandleDefinition() {
1131   if (auto FnAST = ParseDefinition()) {
1132     if (auto *FnIR = FnAST->codegen()) {
1133       fprintf(stderr, "Read function definition:");
1134       FnIR->dump();
1135       TheJIT->addModule(std::move(TheModule));
1136       InitializeModuleAndPassManager();
1137     }
1138   } else {
1139     // Skip token for error recovery.
1140     getNextToken();
1141   }
1142 }
1143 
1144 static void HandleExtern() {
1145   if (auto ProtoAST = ParseExtern()) {
1146     if (auto *FnIR = ProtoAST->codegen()) {
1147       fprintf(stderr, "Read extern: ");
1148       FnIR->dump();
1149       FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
1150     }
1151   } else {
1152     // Skip token for error recovery.
1153     getNextToken();
1154   }
1155 }
1156 
1157 static void HandleTopLevelExpression() {
1158   // Evaluate a top-level expression into an anonymous function.
1159   if (auto FnAST = ParseTopLevelExpr()) {
1160     if (FnAST->codegen()) {
1161       // JIT the module containing the anonymous expression, keeping a handle so
1162       // we can free it later.
1163       auto H = TheJIT->addModule(std::move(TheModule));
1164       InitializeModuleAndPassManager();
1165 
1166       // Search the JIT for the __anon_expr symbol.
1167       auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
1168       assert(ExprSymbol && "Function not found");
1169 
1170       // Get the symbol's address and cast it to the right type (takes no
1171       // arguments, returns a double) so we can call it as a native function.
1172       double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
1173       fprintf(stderr, "Evaluated to %f\n", FP());
1174 
1175       // Delete the anonymous expression module from the JIT.
1176       TheJIT->removeModule(H);
1177     }
1178   } else {
1179     // Skip token for error recovery.
1180     getNextToken();
1181   }
1182 }
1183 
1184 /// top ::= definition | external | expression | ';'
1185 static void MainLoop() {
1186   while (true) {
1187     fprintf(stderr, "ready> ");
1188     switch (CurTok) {
1189     case tok_eof:
1190       return;
1191     case ';': // ignore top-level semicolons.
1192       getNextToken();
1193       break;
1194     case tok_def:
1195       HandleDefinition();
1196       break;
1197     case tok_extern:
1198       HandleExtern();
1199       break;
1200     default:
1201       HandleTopLevelExpression();
1202       break;
1203     }
1204   }
1205 }
1206 
1207 //===----------------------------------------------------------------------===//
1208 // "Library" functions that can be "extern'd" from user code.
1209 //===----------------------------------------------------------------------===//
1210 
1211 /// putchard - putchar that takes a double and returns 0.
1212 extern "C" double putchard(double X) {
1213   fputc((char)X, stderr);
1214   return 0;
1215 }
1216 
1217 /// printd - printf that takes a double prints it as "%f\n", returning 0.
1218 extern "C" double printd(double X) {
1219   fprintf(stderr, "%f\n", X);
1220   return 0;
1221 }
1222 
1223 //===----------------------------------------------------------------------===//
1224 // Main driver code.
1225 //===----------------------------------------------------------------------===//
1226 
1227 int main() {
1228   InitializeNativeTarget();
1229   InitializeNativeTargetAsmPrinter();
1230   InitializeNativeTargetAsmParser();
1231 
1232   // Install standard binary operators.
1233   // 1 is lowest precedence.
1234   BinopPrecedence['='] = 2;
1235   BinopPrecedence['<'] = 10;
1236   BinopPrecedence['+'] = 20;
1237   BinopPrecedence['-'] = 20;
1238   BinopPrecedence['*'] = 40; // highest.
1239 
1240   // Prime the first token.
1241   fprintf(stderr, "ready> ");
1242   getNextToken();
1243 
1244   TheJIT = llvm::make_unique<KaleidoscopeJIT>();
1245 
1246   InitializeModuleAndPassManager();
1247 
1248   // Run the main "interpreter loop" now.
1249   MainLoop();
1250 
1251   return 0;
1252 }
1253