1 //===---- AVRAsmParser.cpp - Parse AVR assembly to MCInst instructions ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AVR.h"
11 #include "AVRRegisterInfo.h"
12 #include "MCTargetDesc/AVRMCExpr.h"
13 #include "MCTargetDesc/AVRMCTargetDesc.h"
14 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstBuilder.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/MCSymbol.h"
24 #include "llvm/MC/MCParser/MCAsmLexer.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/TargetRegistry.h"
31 
32 #include <sstream>
33 
34 #define DEBUG_TYPE "avr-asm-parser"
35 
36 namespace llvm {
37 
38 /// Parses AVR assembly from a stream.
39 class AVRAsmParser : public MCTargetAsmParser {
40   const MCSubtargetInfo &STI;
41   MCAsmParser &Parser;
42   const MCRegisterInfo *MRI;
43 
44 #define GET_ASSEMBLER_HEADER
45 #include "AVRGenAsmMatcher.inc"
46 
47   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
48                                OperandVector &Operands, MCStreamer &Out,
49                                uint64_t &ErrorInfo,
50                                bool MatchingInlineAsm) override;
51 
52   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
53 
54   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
55                         SMLoc NameLoc, OperandVector &Operands) override;
56 
57   bool ParseDirective(AsmToken directiveID) override;
58 
59   OperandMatchResultTy parseMemriOperand(OperandVector &Operands);
60 
61   bool parseOperand(OperandVector &Operands);
62   int parseRegisterName(unsigned (*matchFn)(StringRef));
63   int parseRegisterName();
64   int parseRegister();
65   bool tryParseRegisterOperand(OperandVector &Operands);
66   bool tryParseExpression(OperandVector &Operands);
67   bool tryParseRelocExpression(OperandVector &Operands);
68   void eatComma();
69 
70   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
71                                       unsigned Kind) override;
72 
73   unsigned toDREG(unsigned Reg, unsigned From = AVR::sub_lo) {
74     MCRegisterClass const *Class = &AVRMCRegisterClasses[AVR::DREGSRegClassID];
75     return MRI->getMatchingSuperReg(Reg, From, Class);
76   }
77 
78   bool emit(MCInst &Instruction, SMLoc const &Loc, MCStreamer &Out) const;
79   bool invalidOperand(SMLoc const &Loc, OperandVector const &Operands,
80                       uint64_t const &ErrorInfo);
81   bool missingFeature(SMLoc const &Loc, uint64_t const &ErrorInfo);
82 
83 public:
84   AVRAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
85                const MCInstrInfo &MII, const MCTargetOptions &Options)
86       : MCTargetAsmParser(Options, STI), STI(STI), Parser(Parser) {
87     MCAsmParserExtension::Initialize(Parser);
88     MRI = getContext().getRegisterInfo();
89 
90     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
91   }
92 
93   MCAsmParser &getParser() const { return Parser; }
94   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
95 };
96 
97 /// An parsed AVR assembly operand.
98 class AVROperand : public MCParsedAsmOperand {
99   typedef MCParsedAsmOperand Base;
100   enum KindTy { k_Immediate, k_Register, k_Token, k_Memri } Kind;
101 
102 public:
103   AVROperand(StringRef Tok, SMLoc const &S)
104       : Base(), Kind(k_Token), Tok(Tok), Start(S), End(S) {}
105   AVROperand(unsigned Reg, SMLoc const &S, SMLoc const &E)
106       : Base(), Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {}
107   AVROperand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
108       : Base(), Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {}
109   AVROperand(unsigned Reg, MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
110       : Base(), Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {}
111 
112   struct RegisterImmediate {
113     unsigned Reg;
114     MCExpr const *Imm;
115   };
116   union {
117     StringRef Tok;
118     RegisterImmediate RegImm;
119   };
120 
121   SMLoc Start, End;
122 
123 public:
124   void addRegOperands(MCInst &Inst, unsigned N) const {
125     assert(Kind == k_Register && "Unexpected operand kind");
126     assert(N == 1 && "Invalid number of operands!");
127 
128     Inst.addOperand(MCOperand::createReg(getReg()));
129   }
130 
131   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
132     // Add as immediate when possible
133     if (!Expr)
134       Inst.addOperand(MCOperand::createImm(0));
135     else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
136       Inst.addOperand(MCOperand::createImm(CE->getValue()));
137     else
138       Inst.addOperand(MCOperand::createExpr(Expr));
139   }
140 
141   void addImmOperands(MCInst &Inst, unsigned N) const {
142     assert(Kind == k_Immediate && "Unexpected operand kind");
143     assert(N == 1 && "Invalid number of operands!");
144 
145     const MCExpr *Expr = getImm();
146     addExpr(Inst, Expr);
147   }
148 
149   /// Adds the contained reg+imm operand to an instruction.
150   void addMemriOperands(MCInst &Inst, unsigned N) const {
151     assert(Kind == k_Memri && "Unexpected operand kind");
152     assert(N == 2 && "Invalid number of operands");
153 
154     Inst.addOperand(MCOperand::createReg(getReg()));
155     addExpr(Inst, getImm());
156   }
157 
158   bool isReg() const { return Kind == k_Register; }
159   bool isImm() const { return Kind == k_Immediate; }
160   bool isToken() const { return Kind == k_Token; }
161   bool isMem() const { return Kind == k_Memri; }
162   bool isMemri() const { return Kind == k_Memri; }
163 
164   StringRef getToken() const {
165     assert(Kind == k_Token && "Invalid access!");
166     return Tok;
167   }
168 
169   unsigned getReg() const {
170     assert((Kind == k_Register || Kind == k_Memri) && "Invalid access!");
171 
172     return RegImm.Reg;
173   }
174 
175   const MCExpr *getImm() const {
176     assert((Kind == k_Immediate || Kind == k_Memri) && "Invalid access!");
177     return RegImm.Imm;
178   }
179 
180   static std::unique_ptr<AVROperand> CreateToken(StringRef Str, SMLoc S) {
181     return make_unique<AVROperand>(Str, S);
182   }
183 
184   static std::unique_ptr<AVROperand> CreateReg(unsigned RegNum, SMLoc S,
185                                                SMLoc E) {
186     return make_unique<AVROperand>(RegNum, S, E);
187   }
188 
189   static std::unique_ptr<AVROperand> CreateImm(const MCExpr *Val, SMLoc S,
190                                                SMLoc E) {
191     return make_unique<AVROperand>(Val, S, E);
192   }
193 
194   static std::unique_ptr<AVROperand>
195   CreateMemri(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) {
196     return make_unique<AVROperand>(RegNum, Val, S, E);
197   }
198 
199   void makeToken(StringRef Token) {
200     Kind = k_Token;
201     Tok = Token;
202   }
203 
204   void makeReg(unsigned RegNo) {
205     Kind = k_Register;
206     RegImm = {RegNo, nullptr};
207   }
208 
209   void makeImm(MCExpr const *Ex) {
210     Kind = k_Immediate;
211     RegImm = {0, Ex};
212   }
213 
214   void makeMemri(unsigned RegNo, MCExpr const *Imm) {
215     Kind = k_Memri;
216     RegImm = {RegNo, Imm};
217   }
218 
219   SMLoc getStartLoc() const { return Start; }
220   SMLoc getEndLoc() const { return End; }
221 
222   virtual void print(raw_ostream &O) const {
223     switch (Kind) {
224     case k_Token:
225       O << "Token: \"" << getToken() << "\"";
226       break;
227     case k_Register:
228       O << "Register: " << getReg();
229       break;
230     case k_Immediate:
231       O << "Immediate: \"" << *getImm() << "\"";
232       break;
233     case k_Memri: {
234       // only manually print the size for non-negative values,
235       // as the sign is inserted automatically.
236       O << "Memri: \"" << getReg() << '+' << *getImm() << "\"";
237       break;
238     }
239     }
240     O << "\n";
241   }
242 };
243 
244 // Auto-generated Match Functions
245 
246 /// Maps from the set of all register names to a register number.
247 /// \note Generated by TableGen.
248 static unsigned MatchRegisterName(StringRef Name);
249 
250 /// Maps from the set of all alternative registernames to a register number.
251 /// \note Generated by TableGen.
252 static unsigned MatchRegisterAltName(StringRef Name);
253 
254 bool AVRAsmParser::invalidOperand(SMLoc const &Loc,
255                                   OperandVector const &Operands,
256                                   uint64_t const &ErrorInfo) {
257   SMLoc ErrorLoc = Loc;
258   char const *Diag = 0;
259 
260   if (ErrorInfo != ~0U) {
261     if (ErrorInfo >= Operands.size()) {
262       Diag = "too few operands for instruction.";
263     } else {
264       AVROperand const &Op = (AVROperand const &)*Operands[ErrorInfo];
265 
266       // TODO: See if we can do a better error than just "invalid ...".
267       if (Op.getStartLoc() != SMLoc()) {
268         ErrorLoc = Op.getStartLoc();
269       }
270     }
271   }
272 
273   if (!Diag) {
274     Diag = "invalid operand for instruction";
275   }
276 
277   return Error(ErrorLoc, Diag);
278 }
279 
280 bool AVRAsmParser::missingFeature(llvm::SMLoc const &Loc,
281                                   uint64_t const &ErrorInfo) {
282   return Error(Loc, "instruction requires a CPU feature not currently enabled");
283 }
284 
285 bool AVRAsmParser::emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const {
286   Inst.setLoc(Loc);
287   Out.EmitInstruction(Inst, STI);
288 
289   return false;
290 }
291 
292 bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
293                                            OperandVector &Operands,
294                                            MCStreamer &Out, uint64_t &ErrorInfo,
295                                            bool MatchingInlineAsm) {
296   MCInst Inst;
297   unsigned MatchResult =
298       MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
299 
300   switch (MatchResult) {
301   case Match_Success:        return emit(Inst, Loc, Out);
302   case Match_MissingFeature: return missingFeature(Loc, ErrorInfo);
303   case Match_InvalidOperand: return invalidOperand(Loc, Operands, ErrorInfo);
304   case Match_MnemonicFail:   return Error(Loc, "invalid instruction");
305   default:                   return true;
306   }
307 }
308 
309 /// Parses a register name using a given matching function.
310 /// Checks for lowercase or uppercase if necessary.
311 int AVRAsmParser::parseRegisterName(unsigned (*matchFn)(StringRef)) {
312   StringRef Name = Parser.getTok().getString();
313 
314   int RegNum = matchFn(Name);
315 
316   // GCC supports case insensitive register names. Some of the AVR registers
317   // are all lower case, some are all upper case but non are mixed. We prefer
318   // to use the original names in the register definitions. That is why we
319   // have to test both upper and lower case here.
320   if (RegNum == AVR::NoRegister) {
321     RegNum = matchFn(Name.lower());
322   }
323   if (RegNum == AVR::NoRegister) {
324     RegNum = matchFn(Name.upper());
325   }
326 
327   return RegNum;
328 }
329 
330 int AVRAsmParser::parseRegisterName() {
331   int RegNum = parseRegisterName(&MatchRegisterName);
332 
333   if (RegNum == AVR::NoRegister)
334     RegNum = parseRegisterName(&MatchRegisterAltName);
335 
336   return RegNum;
337 }
338 
339 int AVRAsmParser::parseRegister() {
340   int RegNum = AVR::NoRegister;
341 
342   if (Parser.getTok().is(AsmToken::Identifier)) {
343     // Check for register pair syntax
344     if (Parser.getLexer().peekTok().is(AsmToken::Colon)) {
345       Parser.Lex();
346       Parser.Lex(); // Eat high (odd) register and colon
347 
348       if (Parser.getTok().is(AsmToken::Identifier)) {
349         // Convert lower (even) register to DREG
350         RegNum = toDREG(parseRegisterName());
351       }
352     } else {
353       RegNum = parseRegisterName();
354     }
355   }
356   return RegNum;
357 }
358 
359 bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) {
360   int RegNo = parseRegister();
361 
362   if (RegNo == AVR::NoRegister)
363     return true;
364 
365   AsmToken const &T = Parser.getTok();
366   Operands.push_back(AVROperand::CreateReg(RegNo, T.getLoc(), T.getEndLoc()));
367   Parser.Lex(); // Eat register token.
368 
369   return false;
370 }
371 
372 bool AVRAsmParser::tryParseExpression(OperandVector &Operands) {
373   SMLoc S = Parser.getTok().getLoc();
374 
375   if (!tryParseRelocExpression(Operands))
376     return false;
377 
378   if ((Parser.getTok().getKind() == AsmToken::Plus ||
379        Parser.getTok().getKind() == AsmToken::Minus) &&
380       Parser.getLexer().peekTok().getKind() == AsmToken::Identifier) {
381     // Don't handle this case - it should be split into two
382     // separate tokens.
383     return true;
384   }
385 
386   // Parse (potentially inner) expression
387   MCExpr const *Expression;
388   if (getParser().parseExpression(Expression))
389     return true;
390 
391   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
392   Operands.push_back(AVROperand::CreateImm(Expression, S, E));
393   return false;
394 }
395 
396 bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
397   bool isNegated = false;
398   AVRMCExpr::VariantKind ModifierKind = AVRMCExpr::VK_AVR_None;
399 
400   SMLoc S = Parser.getTok().getLoc();
401 
402   // Check for sign
403   AsmToken tokens[2];
404   size_t ReadCount = Parser.getLexer().peekTokens(tokens);
405 
406   if (ReadCount == 2) {
407     if (tokens[0].getKind() == AsmToken::Identifier &&
408         tokens[1].getKind() == AsmToken::LParen) {
409 
410       AsmToken::TokenKind CurTok = Parser.getLexer().getKind();
411       if (CurTok == AsmToken::Minus) {
412         isNegated = true;
413       } else {
414         assert(CurTok == AsmToken::Plus);
415         isNegated = false;
416       }
417 
418       // Eat the sign
419       Parser.Lex();
420     }
421   }
422 
423   // Check if we have a target specific modifier (lo8, hi8, &c)
424   if (Parser.getTok().getKind() != AsmToken::Identifier ||
425       Parser.getLexer().peekTok().getKind() != AsmToken::LParen) {
426     // Not a reloc expr
427     return true;
428   }
429   StringRef ModifierName = Parser.getTok().getString();
430   ModifierKind = AVRMCExpr::getKindByName(ModifierName.str().c_str());
431 
432   if (ModifierKind != AVRMCExpr::VK_AVR_None) {
433     Parser.Lex();
434     Parser.Lex(); // Eat modifier name and parenthesis
435   } else {
436     return Error(Parser.getTok().getLoc(), "unknown modifier");
437   }
438 
439   MCExpr const *InnerExpression;
440   if (getParser().parseExpression(InnerExpression))
441     return true;
442 
443   // If we have a modifier wrap the inner expression
444   assert(Parser.getTok().getKind() == AsmToken::RParen);
445   Parser.Lex(); // Eat closing parenthesis
446 
447   MCExpr const *Expression = AVRMCExpr::create(ModifierKind, InnerExpression,
448                                                isNegated, getContext());
449 
450   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
451   Operands.push_back(AVROperand::CreateImm(Expression, S, E));
452 
453   return false;
454 }
455 
456 bool AVRAsmParser::parseOperand(OperandVector &Operands) {
457   DEBUG(dbgs() << "parseOperand\n");
458 
459   switch (getLexer().getKind()) {
460   default:
461     return Error(Parser.getTok().getLoc(), "unexpected token in operand");
462 
463   case AsmToken::Identifier:
464     // Try to parse a register, if it fails,
465     // fall through to the next case.
466     if (!tryParseRegisterOperand(Operands)) {
467       return false;
468     }
469   case AsmToken::LParen:
470   case AsmToken::Integer:
471   case AsmToken::Dot:
472     return tryParseExpression(Operands);
473   case AsmToken::Plus:
474   case AsmToken::Minus: {
475     // If the sign preceeds a number, parse the number,
476     // otherwise treat the sign a an independent token.
477     switch (getLexer().peekTok().getKind()) {
478     case AsmToken::Integer:
479     case AsmToken::BigNum:
480     case AsmToken::Identifier:
481     case AsmToken::Real:
482       if (!tryParseExpression(Operands))
483         return false;
484     default:
485       break;
486     }
487     // Treat the token as an independent token.
488     Operands.push_back(AVROperand::CreateToken(Parser.getTok().getString(),
489                                                Parser.getTok().getLoc()));
490     Parser.Lex(); // Eat the token.
491     return false;
492   }
493   }
494 
495   // Could not parse operand
496   return true;
497 }
498 
499 AVRAsmParser::OperandMatchResultTy
500 AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
501   DEBUG(dbgs() << "parseMemriOperand()\n");
502 
503   SMLoc E, S;
504   MCExpr const *Expression;
505   int RegNo;
506 
507   // Parse register.
508   {
509     RegNo = parseRegister();
510 
511     if (RegNo == AVR::NoRegister)
512       return MatchOperand_ParseFail;
513 
514     S = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
515     Parser.Lex(); // Eat register token.
516   }
517 
518   // Parse immediate;
519   {
520     if (getParser().parseExpression(Expression))
521       return MatchOperand_ParseFail;
522 
523     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
524   }
525 
526   Operands.push_back(AVROperand::CreateMemri(RegNo, Expression, S, E));
527 
528   return MatchOperand_Success;
529 }
530 
531 bool AVRAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
532                                  SMLoc &EndLoc) {
533   StartLoc = Parser.getTok().getLoc();
534   RegNo = parseRegister();
535   EndLoc = Parser.getTok().getLoc();
536 
537   return (RegNo == AVR::NoRegister);
538 }
539 
540 void AVRAsmParser::eatComma() {
541   if (getLexer().is(AsmToken::Comma)) {
542     Parser.Lex();
543   } else {
544     // GCC allows commas to be omitted.
545   }
546 }
547 
548 bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
549                                     StringRef Mnemonic, SMLoc NameLoc,
550                                     OperandVector &Operands) {
551   Operands.push_back(AVROperand::CreateToken(Mnemonic, NameLoc));
552 
553   bool first = true;
554   while (getLexer().isNot(AsmToken::EndOfStatement)) {
555     if (!first) eatComma();
556 
557     first = false;
558 
559     auto MatchResult = MatchOperandParserImpl(Operands, Mnemonic);
560 
561     if (MatchResult == MatchOperand_Success) {
562       continue;
563     }
564 
565     if (MatchResult == MatchOperand_ParseFail) {
566       SMLoc Loc = getLexer().getLoc();
567       Parser.eatToEndOfStatement();
568 
569       return Error(Loc, "failed to parse register and immediate pair");
570     }
571 
572     if (parseOperand(Operands)) {
573       SMLoc Loc = getLexer().getLoc();
574       Parser.eatToEndOfStatement();
575       return Error(Loc, "unexpected token in argument list");
576     }
577   }
578   Parser.Lex(); // Consume the EndOfStatement
579   return false;
580 }
581 
582 bool AVRAsmParser::ParseDirective(llvm::AsmToken DirectiveID) { return true; }
583 
584 extern "C" void LLVMInitializeAVRAsmParser() {
585   RegisterMCAsmParser<AVRAsmParser> X(getTheAVRTarget());
586 }
587 
588 #define GET_REGISTER_MATCHER
589 #define GET_MATCHER_IMPLEMENTATION
590 #include "AVRGenAsmMatcher.inc"
591 
592 // Uses enums defined in AVRGenAsmMatcher.inc
593 unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
594                                                   unsigned ExpectedKind) {
595   AVROperand &Op = static_cast<AVROperand &>(AsmOp);
596   MatchClassKind Expected = static_cast<MatchClassKind>(ExpectedKind);
597 
598   // If need be, GCC converts bare numbers to register names
599   // It's ugly, but GCC supports it.
600   if (Op.isImm()) {
601     if (MCConstantExpr const *Const = dyn_cast<MCConstantExpr>(Op.getImm())) {
602       int64_t RegNum = Const->getValue();
603       std::ostringstream RegName;
604       RegName << "r" << RegNum;
605       RegNum = MatchRegisterName(RegName.str().c_str());
606       if (RegNum != AVR::NoRegister) {
607         Op.makeReg(RegNum);
608         if (validateOperandClass(Op, Expected) == Match_Success) {
609           return Match_Success;
610         }
611       }
612       // Let the other quirks try their magic.
613     }
614   }
615 
616   if (Op.isReg()) {
617     // If the instruction uses a register pair but we got a single, lower
618     // register we perform a "class cast".
619     if (isSubclass(Expected, MCK_DREGS)) {
620       unsigned correspondingDREG = toDREG(Op.getReg());
621 
622       if (correspondingDREG != AVR::NoRegister) {
623         Op.makeReg(correspondingDREG);
624         return validateOperandClass(Op, Expected);
625       }
626     }
627   }
628   return Match_InvalidOperand;
629 }
630 
631 } // end of namespace llvm
632