1 //===---- AVRAsmParser.cpp - Parse AVR assembly to MCInst instructions ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AVR.h"
11 #include "AVRRegisterInfo.h"
12 #include "MCTargetDesc/AVRMCExpr.h"
13 #include "MCTargetDesc/AVRMCTargetDesc.h"
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstBuilder.h"
21 #include "llvm/MC/MCParser/MCAsmLexer.h"
22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
23 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/TargetRegistry.h"
31 
32 #include <sstream>
33 
34 #define DEBUG_TYPE "avr-asm-parser"
35 
36 namespace llvm {
37 
38 /// Parses AVR assembly from a stream.
39 class AVRAsmParser : public MCTargetAsmParser {
40   const MCSubtargetInfo &STI;
41   MCAsmParser &Parser;
42   const MCRegisterInfo *MRI;
43 
44 #define GET_ASSEMBLER_HEADER
45 #include "AVRGenAsmMatcher.inc"
46 
47   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
48                                OperandVector &Operands, MCStreamer &Out,
49                                uint64_t &ErrorInfo,
50                                bool MatchingInlineAsm) override;
51 
52   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
53 
54   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
55                         SMLoc NameLoc, OperandVector &Operands) override;
56 
57   bool ParseDirective(AsmToken directiveID) override;
58 
59   OperandMatchResultTy parseMemriOperand(OperandVector &Operands);
60 
61   bool parseOperand(OperandVector &Operands);
62   int parseRegisterName(unsigned (*matchFn)(StringRef));
63   int parseRegisterName();
64   int parseRegister();
65   bool tryParseRegisterOperand(OperandVector &Operands);
66   bool tryParseExpression(OperandVector &Operands);
67   bool tryParseRelocExpression(OperandVector &Operands);
68   void eatComma();
69 
70   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
71                                       unsigned Kind) override;
72 
73   unsigned toDREG(unsigned Reg, unsigned From = AVR::sub_lo) {
74     MCRegisterClass const *Class = &AVRMCRegisterClasses[AVR::DREGSRegClassID];
75     return MRI->getMatchingSuperReg(Reg, From, Class);
76   }
77 
78   bool emit(MCInst &Instruction, SMLoc const &Loc, MCStreamer &Out) const;
79   bool invalidOperand(SMLoc const &Loc, OperandVector const &Operands,
80                       uint64_t const &ErrorInfo);
81   bool missingFeature(SMLoc const &Loc, uint64_t const &ErrorInfo);
82 
83 public:
84   AVRAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
85                const MCInstrInfo &MII, const MCTargetOptions &Options)
86       : MCTargetAsmParser(Options, STI, MII), STI(STI), Parser(Parser) {
87     MCAsmParserExtension::Initialize(Parser);
88     MRI = getContext().getRegisterInfo();
89 
90     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
91   }
92 
93   MCAsmParser &getParser() const { return Parser; }
94   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
95 };
96 
97 /// An parsed AVR assembly operand.
98 class AVROperand : public MCParsedAsmOperand {
99   typedef MCParsedAsmOperand Base;
100   enum KindTy { k_Immediate, k_Register, k_Token, k_Memri } Kind;
101 
102 public:
103   AVROperand(StringRef Tok, SMLoc const &S)
104       : Base(), Kind(k_Token), Tok(Tok), Start(S), End(S) {}
105   AVROperand(unsigned Reg, SMLoc const &S, SMLoc const &E)
106       : Base(), Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {}
107   AVROperand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
108       : Base(), Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {}
109   AVROperand(unsigned Reg, MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
110       : Base(), Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {}
111 
112   struct RegisterImmediate {
113     unsigned Reg;
114     MCExpr const *Imm;
115   };
116   union {
117     StringRef Tok;
118     RegisterImmediate RegImm;
119   };
120 
121   SMLoc Start, End;
122 
123 public:
124   void addRegOperands(MCInst &Inst, unsigned N) const {
125     assert(Kind == k_Register && "Unexpected operand kind");
126     assert(N == 1 && "Invalid number of operands!");
127 
128     Inst.addOperand(MCOperand::createReg(getReg()));
129   }
130 
131   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
132     // Add as immediate when possible
133     if (!Expr)
134       Inst.addOperand(MCOperand::createImm(0));
135     else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
136       Inst.addOperand(MCOperand::createImm(CE->getValue()));
137     else
138       Inst.addOperand(MCOperand::createExpr(Expr));
139   }
140 
141   void addImmOperands(MCInst &Inst, unsigned N) const {
142     assert(Kind == k_Immediate && "Unexpected operand kind");
143     assert(N == 1 && "Invalid number of operands!");
144 
145     const MCExpr *Expr = getImm();
146     addExpr(Inst, Expr);
147   }
148 
149   /// Adds the contained reg+imm operand to an instruction.
150   void addMemriOperands(MCInst &Inst, unsigned N) const {
151     assert(Kind == k_Memri && "Unexpected operand kind");
152     assert(N == 2 && "Invalid number of operands");
153 
154     Inst.addOperand(MCOperand::createReg(getReg()));
155     addExpr(Inst, getImm());
156   }
157 
158   bool isReg() const { return Kind == k_Register; }
159   bool isImm() const { return Kind == k_Immediate; }
160   bool isToken() const { return Kind == k_Token; }
161   bool isMem() const { return Kind == k_Memri; }
162   bool isMemri() const { return Kind == k_Memri; }
163 
164   StringRef getToken() const {
165     assert(Kind == k_Token && "Invalid access!");
166     return Tok;
167   }
168 
169   unsigned getReg() const {
170     assert((Kind == k_Register || Kind == k_Memri) && "Invalid access!");
171 
172     return RegImm.Reg;
173   }
174 
175   const MCExpr *getImm() const {
176     assert((Kind == k_Immediate || Kind == k_Memri) && "Invalid access!");
177     return RegImm.Imm;
178   }
179 
180   static std::unique_ptr<AVROperand> CreateToken(StringRef Str, SMLoc S) {
181     return make_unique<AVROperand>(Str, S);
182   }
183 
184   static std::unique_ptr<AVROperand> CreateReg(unsigned RegNum, SMLoc S,
185                                                SMLoc E) {
186     return make_unique<AVROperand>(RegNum, S, E);
187   }
188 
189   static std::unique_ptr<AVROperand> CreateImm(const MCExpr *Val, SMLoc S,
190                                                SMLoc E) {
191     return make_unique<AVROperand>(Val, S, E);
192   }
193 
194   static std::unique_ptr<AVROperand>
195   CreateMemri(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) {
196     return make_unique<AVROperand>(RegNum, Val, S, E);
197   }
198 
199   void makeToken(StringRef Token) {
200     Kind = k_Token;
201     Tok = Token;
202   }
203 
204   void makeReg(unsigned RegNo) {
205     Kind = k_Register;
206     RegImm = {RegNo, nullptr};
207   }
208 
209   void makeImm(MCExpr const *Ex) {
210     Kind = k_Immediate;
211     RegImm = {0, Ex};
212   }
213 
214   void makeMemri(unsigned RegNo, MCExpr const *Imm) {
215     Kind = k_Memri;
216     RegImm = {RegNo, Imm};
217   }
218 
219   SMLoc getStartLoc() const { return Start; }
220   SMLoc getEndLoc() const { return End; }
221 
222   virtual void print(raw_ostream &O) const {
223     switch (Kind) {
224     case k_Token:
225       O << "Token: \"" << getToken() << "\"";
226       break;
227     case k_Register:
228       O << "Register: " << getReg();
229       break;
230     case k_Immediate:
231       O << "Immediate: \"" << *getImm() << "\"";
232       break;
233     case k_Memri: {
234       // only manually print the size for non-negative values,
235       // as the sign is inserted automatically.
236       O << "Memri: \"" << getReg() << '+' << *getImm() << "\"";
237       break;
238     }
239     }
240     O << "\n";
241   }
242 };
243 
244 // Auto-generated Match Functions
245 
246 /// Maps from the set of all register names to a register number.
247 /// \note Generated by TableGen.
248 static unsigned MatchRegisterName(StringRef Name);
249 
250 /// Maps from the set of all alternative registernames to a register number.
251 /// \note Generated by TableGen.
252 static unsigned MatchRegisterAltName(StringRef Name);
253 
254 bool AVRAsmParser::invalidOperand(SMLoc const &Loc,
255                                   OperandVector const &Operands,
256                                   uint64_t const &ErrorInfo) {
257   SMLoc ErrorLoc = Loc;
258   char const *Diag = 0;
259 
260   if (ErrorInfo != ~0U) {
261     if (ErrorInfo >= Operands.size()) {
262       Diag = "too few operands for instruction.";
263     } else {
264       AVROperand const &Op = (AVROperand const &)*Operands[ErrorInfo];
265 
266       // TODO: See if we can do a better error than just "invalid ...".
267       if (Op.getStartLoc() != SMLoc()) {
268         ErrorLoc = Op.getStartLoc();
269       }
270     }
271   }
272 
273   if (!Diag) {
274     Diag = "invalid operand for instruction";
275   }
276 
277   return Error(ErrorLoc, Diag);
278 }
279 
280 bool AVRAsmParser::missingFeature(llvm::SMLoc const &Loc,
281                                   uint64_t const &ErrorInfo) {
282   return Error(Loc, "instruction requires a CPU feature not currently enabled");
283 }
284 
285 bool AVRAsmParser::emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const {
286   Inst.setLoc(Loc);
287   Out.EmitInstruction(Inst, STI);
288 
289   return false;
290 }
291 
292 bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
293                                            OperandVector &Operands,
294                                            MCStreamer &Out, uint64_t &ErrorInfo,
295                                            bool MatchingInlineAsm) {
296   MCInst Inst;
297   unsigned MatchResult =
298       MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
299 
300   switch (MatchResult) {
301   case Match_Success:        return emit(Inst, Loc, Out);
302   case Match_MissingFeature: return missingFeature(Loc, ErrorInfo);
303   case Match_InvalidOperand: return invalidOperand(Loc, Operands, ErrorInfo);
304   case Match_MnemonicFail:   return Error(Loc, "invalid instruction");
305   default:                   return true;
306   }
307 }
308 
309 /// Parses a register name using a given matching function.
310 /// Checks for lowercase or uppercase if necessary.
311 int AVRAsmParser::parseRegisterName(unsigned (*matchFn)(StringRef)) {
312   StringRef Name = Parser.getTok().getString();
313 
314   int RegNum = matchFn(Name);
315 
316   // GCC supports case insensitive register names. Some of the AVR registers
317   // are all lower case, some are all upper case but non are mixed. We prefer
318   // to use the original names in the register definitions. That is why we
319   // have to test both upper and lower case here.
320   if (RegNum == AVR::NoRegister) {
321     RegNum = matchFn(Name.lower());
322   }
323   if (RegNum == AVR::NoRegister) {
324     RegNum = matchFn(Name.upper());
325   }
326 
327   return RegNum;
328 }
329 
330 int AVRAsmParser::parseRegisterName() {
331   int RegNum = parseRegisterName(&MatchRegisterName);
332 
333   if (RegNum == AVR::NoRegister)
334     RegNum = parseRegisterName(&MatchRegisterAltName);
335 
336   return RegNum;
337 }
338 
339 int AVRAsmParser::parseRegister() {
340   int RegNum = AVR::NoRegister;
341 
342   if (Parser.getTok().is(AsmToken::Identifier)) {
343     // Check for register pair syntax
344     if (Parser.getLexer().peekTok().is(AsmToken::Colon)) {
345       Parser.Lex();
346       Parser.Lex(); // Eat high (odd) register and colon
347 
348       if (Parser.getTok().is(AsmToken::Identifier)) {
349         // Convert lower (even) register to DREG
350         RegNum = toDREG(parseRegisterName());
351       }
352     } else {
353       RegNum = parseRegisterName();
354     }
355   }
356   return RegNum;
357 }
358 
359 bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) {
360   int RegNo = parseRegister();
361 
362   if (RegNo == AVR::NoRegister)
363     return true;
364 
365   AsmToken const &T = Parser.getTok();
366   Operands.push_back(AVROperand::CreateReg(RegNo, T.getLoc(), T.getEndLoc()));
367   Parser.Lex(); // Eat register token.
368 
369   return false;
370 }
371 
372 bool AVRAsmParser::tryParseExpression(OperandVector &Operands) {
373   SMLoc S = Parser.getTok().getLoc();
374 
375   if (!tryParseRelocExpression(Operands))
376     return false;
377 
378   if ((Parser.getTok().getKind() == AsmToken::Plus ||
379        Parser.getTok().getKind() == AsmToken::Minus) &&
380       Parser.getLexer().peekTok().getKind() == AsmToken::Identifier) {
381     // Don't handle this case - it should be split into two
382     // separate tokens.
383     return true;
384   }
385 
386   // Parse (potentially inner) expression
387   MCExpr const *Expression;
388   if (getParser().parseExpression(Expression))
389     return true;
390 
391   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
392   Operands.push_back(AVROperand::CreateImm(Expression, S, E));
393   return false;
394 }
395 
396 bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
397   bool isNegated = false;
398   AVRMCExpr::VariantKind ModifierKind = AVRMCExpr::VK_AVR_None;
399 
400   SMLoc S = Parser.getTok().getLoc();
401 
402   // Check for sign
403   AsmToken tokens[2];
404   size_t ReadCount = Parser.getLexer().peekTokens(tokens);
405 
406   if (ReadCount == 2) {
407     if (tokens[0].getKind() == AsmToken::Identifier &&
408         tokens[1].getKind() == AsmToken::LParen) {
409 
410       AsmToken::TokenKind CurTok = Parser.getLexer().getKind();
411       if (CurTok == AsmToken::Minus) {
412         isNegated = true;
413       } else {
414         assert(CurTok == AsmToken::Plus);
415         isNegated = false;
416       }
417 
418       // Eat the sign
419       Parser.Lex();
420     }
421   }
422 
423   // Check if we have a target specific modifier (lo8, hi8, &c)
424   if (Parser.getTok().getKind() != AsmToken::Identifier ||
425       Parser.getLexer().peekTok().getKind() != AsmToken::LParen) {
426     // Not a reloc expr
427     return true;
428   }
429   StringRef ModifierName = Parser.getTok().getString();
430   ModifierKind = AVRMCExpr::getKindByName(ModifierName.str().c_str());
431 
432   if (ModifierKind != AVRMCExpr::VK_AVR_None) {
433     Parser.Lex();
434     Parser.Lex(); // Eat modifier name and parenthesis
435   } else {
436     return Error(Parser.getTok().getLoc(), "unknown modifier");
437   }
438 
439   MCExpr const *InnerExpression;
440   if (getParser().parseExpression(InnerExpression))
441     return true;
442 
443   // If we have a modifier wrap the inner expression
444   assert(Parser.getTok().getKind() == AsmToken::RParen);
445   Parser.Lex(); // Eat closing parenthesis
446 
447   MCExpr const *Expression = AVRMCExpr::create(ModifierKind, InnerExpression,
448                                                isNegated, getContext());
449 
450   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
451   Operands.push_back(AVROperand::CreateImm(Expression, S, E));
452 
453   return false;
454 }
455 
456 bool AVRAsmParser::parseOperand(OperandVector &Operands) {
457   DEBUG(dbgs() << "parseOperand\n");
458 
459   switch (getLexer().getKind()) {
460   default:
461     return Error(Parser.getTok().getLoc(), "unexpected token in operand");
462 
463   case AsmToken::Identifier:
464     // Try to parse a register, if it fails,
465     // fall through to the next case.
466     if (!tryParseRegisterOperand(Operands)) {
467       return false;
468     }
469     LLVM_FALLTHROUGH;
470   case AsmToken::LParen:
471   case AsmToken::Integer:
472   case AsmToken::Dot:
473     return tryParseExpression(Operands);
474   case AsmToken::Plus:
475   case AsmToken::Minus: {
476     // If the sign preceeds a number, parse the number,
477     // otherwise treat the sign a an independent token.
478     switch (getLexer().peekTok().getKind()) {
479     case AsmToken::Integer:
480     case AsmToken::BigNum:
481     case AsmToken::Identifier:
482     case AsmToken::Real:
483       if (!tryParseExpression(Operands))
484         return false;
485     default:
486       break;
487     }
488     // Treat the token as an independent token.
489     Operands.push_back(AVROperand::CreateToken(Parser.getTok().getString(),
490                                                Parser.getTok().getLoc()));
491     Parser.Lex(); // Eat the token.
492     return false;
493   }
494   }
495 
496   // Could not parse operand
497   return true;
498 }
499 
500 OperandMatchResultTy
501 AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
502   DEBUG(dbgs() << "parseMemriOperand()\n");
503 
504   SMLoc E, S;
505   MCExpr const *Expression;
506   int RegNo;
507 
508   // Parse register.
509   {
510     RegNo = parseRegister();
511 
512     if (RegNo == AVR::NoRegister)
513       return MatchOperand_ParseFail;
514 
515     S = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
516     Parser.Lex(); // Eat register token.
517   }
518 
519   // Parse immediate;
520   {
521     if (getParser().parseExpression(Expression))
522       return MatchOperand_ParseFail;
523 
524     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
525   }
526 
527   Operands.push_back(AVROperand::CreateMemri(RegNo, Expression, S, E));
528 
529   return MatchOperand_Success;
530 }
531 
532 bool AVRAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
533                                  SMLoc &EndLoc) {
534   StartLoc = Parser.getTok().getLoc();
535   RegNo = parseRegister();
536   EndLoc = Parser.getTok().getLoc();
537 
538   return (RegNo == AVR::NoRegister);
539 }
540 
541 void AVRAsmParser::eatComma() {
542   if (getLexer().is(AsmToken::Comma)) {
543     Parser.Lex();
544   } else {
545     // GCC allows commas to be omitted.
546   }
547 }
548 
549 bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
550                                     StringRef Mnemonic, SMLoc NameLoc,
551                                     OperandVector &Operands) {
552   Operands.push_back(AVROperand::CreateToken(Mnemonic, NameLoc));
553 
554   bool first = true;
555   while (getLexer().isNot(AsmToken::EndOfStatement)) {
556     if (!first) eatComma();
557 
558     first = false;
559 
560     auto MatchResult = MatchOperandParserImpl(Operands, Mnemonic);
561 
562     if (MatchResult == MatchOperand_Success) {
563       continue;
564     }
565 
566     if (MatchResult == MatchOperand_ParseFail) {
567       SMLoc Loc = getLexer().getLoc();
568       Parser.eatToEndOfStatement();
569 
570       return Error(Loc, "failed to parse register and immediate pair");
571     }
572 
573     if (parseOperand(Operands)) {
574       SMLoc Loc = getLexer().getLoc();
575       Parser.eatToEndOfStatement();
576       return Error(Loc, "unexpected token in argument list");
577     }
578   }
579   Parser.Lex(); // Consume the EndOfStatement
580   return false;
581 }
582 
583 bool AVRAsmParser::ParseDirective(llvm::AsmToken DirectiveID) { return true; }
584 
585 extern "C" void LLVMInitializeAVRAsmParser() {
586   RegisterMCAsmParser<AVRAsmParser> X(getTheAVRTarget());
587 }
588 
589 #define GET_REGISTER_MATCHER
590 #define GET_MATCHER_IMPLEMENTATION
591 #include "AVRGenAsmMatcher.inc"
592 
593 // Uses enums defined in AVRGenAsmMatcher.inc
594 unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
595                                                   unsigned ExpectedKind) {
596   AVROperand &Op = static_cast<AVROperand &>(AsmOp);
597   MatchClassKind Expected = static_cast<MatchClassKind>(ExpectedKind);
598 
599   // If need be, GCC converts bare numbers to register names
600   // It's ugly, but GCC supports it.
601   if (Op.isImm()) {
602     if (MCConstantExpr const *Const = dyn_cast<MCConstantExpr>(Op.getImm())) {
603       int64_t RegNum = Const->getValue();
604       std::ostringstream RegName;
605       RegName << "r" << RegNum;
606       RegNum = MatchRegisterName(RegName.str().c_str());
607       if (RegNum != AVR::NoRegister) {
608         Op.makeReg(RegNum);
609         if (validateOperandClass(Op, Expected) == Match_Success) {
610           return Match_Success;
611         }
612       }
613       // Let the other quirks try their magic.
614     }
615   }
616 
617   if (Op.isReg()) {
618     // If the instruction uses a register pair but we got a single, lower
619     // register we perform a "class cast".
620     if (isSubclass(Expected, MCK_DREGS)) {
621       unsigned correspondingDREG = toDREG(Op.getReg());
622 
623       if (correspondingDREG != AVR::NoRegister) {
624         Op.makeReg(correspondingDREG);
625         return validateOperandClass(Op, Expected);
626       }
627     }
628   }
629   return Match_InvalidOperand;
630 }
631 
632 } // end of namespace llvm
633