1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/MCAsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/TargetRegistry.h"
24 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
getLoc() const33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
44                                         SMLoc &EndLoc) override;
45 
46   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
47                         SMLoc NameLoc, OperandVector &Operands) override;
48 
49   bool ParseDirective(AsmToken DirectiveID) override;
50 
51   // "=" is used as assignment operator for assembly statment, so can't be used
52   // for symbol assignment.
equalIsAsmAssignment()53   bool equalIsAsmAssignment() override { return false; }
54   // "*" is used for dereferencing memory that it will be the start of
55   // statement.
starIsStartOfStatement()56   bool starIsStartOfStatement() override { return true; }
57 
58 #define GET_ASSEMBLER_HEADER
59 #include "BPFGenAsmMatcher.inc"
60 
61   OperandMatchResultTy parseImmediate(OperandVector &Operands);
62   OperandMatchResultTy parseRegister(OperandVector &Operands);
63   OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
64 
65 public:
66   enum BPFMatchResultTy {
67     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
68 #define GET_OPERAND_DIAGNOSTIC_TYPES
69 #include "BPFGenAsmMatcher.inc"
70 #undef GET_OPERAND_DIAGNOSTIC_TYPES
71   };
72 
BPFAsmParser(const MCSubtargetInfo & STI,MCAsmParser & Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)73   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
74                const MCInstrInfo &MII, const MCTargetOptions &Options)
75       : MCTargetAsmParser(Options, STI, MII) {
76     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
77   }
78 };
79 
80 /// BPFOperand - Instances of this class represent a parsed machine
81 /// instruction
82 struct BPFOperand : public MCParsedAsmOperand {
83 
84   enum KindTy {
85     Token,
86     Register,
87     Immediate,
88   } Kind;
89 
90   struct RegOp {
91     unsigned RegNum;
92   };
93 
94   struct ImmOp {
95     const MCExpr *Val;
96   };
97 
98   SMLoc StartLoc, EndLoc;
99   union {
100     StringRef Tok;
101     RegOp Reg;
102     ImmOp Imm;
103   };
104 
BPFOperand__anonaca8c6c00111::BPFOperand105   BPFOperand(KindTy K) : Kind(K) {}
106 
107 public:
BPFOperand__anonaca8c6c00111::BPFOperand108   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
109     Kind = o.Kind;
110     StartLoc = o.StartLoc;
111     EndLoc = o.EndLoc;
112 
113     switch (Kind) {
114     case Register:
115       Reg = o.Reg;
116       break;
117     case Immediate:
118       Imm = o.Imm;
119       break;
120     case Token:
121       Tok = o.Tok;
122       break;
123     }
124   }
125 
isToken__anonaca8c6c00111::BPFOperand126   bool isToken() const override { return Kind == Token; }
isReg__anonaca8c6c00111::BPFOperand127   bool isReg() const override { return Kind == Register; }
isImm__anonaca8c6c00111::BPFOperand128   bool isImm() const override { return Kind == Immediate; }
isMem__anonaca8c6c00111::BPFOperand129   bool isMem() const override { return false; }
130 
isConstantImm__anonaca8c6c00111::BPFOperand131   bool isConstantImm() const {
132     return isImm() && isa<MCConstantExpr>(getImm());
133   }
134 
getConstantImm__anonaca8c6c00111::BPFOperand135   int64_t getConstantImm() const {
136     const MCExpr *Val = getImm();
137     return static_cast<const MCConstantExpr *>(Val)->getValue();
138   }
139 
isSImm12__anonaca8c6c00111::BPFOperand140   bool isSImm12() const {
141     return (isConstantImm() && isInt<12>(getConstantImm()));
142   }
143 
144   /// getStartLoc - Gets location of the first token of this operand
getStartLoc__anonaca8c6c00111::BPFOperand145   SMLoc getStartLoc() const override { return StartLoc; }
146   /// getEndLoc - Gets location of the last token of this operand
getEndLoc__anonaca8c6c00111::BPFOperand147   SMLoc getEndLoc() const override { return EndLoc; }
148 
getReg__anonaca8c6c00111::BPFOperand149   unsigned getReg() const override {
150     assert(Kind == Register && "Invalid type access!");
151     return Reg.RegNum;
152   }
153 
getImm__anonaca8c6c00111::BPFOperand154   const MCExpr *getImm() const {
155     assert(Kind == Immediate && "Invalid type access!");
156     return Imm.Val;
157   }
158 
getToken__anonaca8c6c00111::BPFOperand159   StringRef getToken() const {
160     assert(Kind == Token && "Invalid type access!");
161     return Tok;
162   }
163 
print__anonaca8c6c00111::BPFOperand164   void print(raw_ostream &OS) const override {
165     switch (Kind) {
166     case Immediate:
167       OS << *getImm();
168       break;
169     case Register:
170       OS << "<register x";
171       OS << getReg() << ">";
172       break;
173     case Token:
174       OS << "'" << getToken() << "'";
175       break;
176     }
177   }
178 
addExpr__anonaca8c6c00111::BPFOperand179   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
180     assert(Expr && "Expr shouldn't be null!");
181 
182     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
183       Inst.addOperand(MCOperand::createImm(CE->getValue()));
184     else
185       Inst.addOperand(MCOperand::createExpr(Expr));
186   }
187 
188   // Used by the TableGen Code
addRegOperands__anonaca8c6c00111::BPFOperand189   void addRegOperands(MCInst &Inst, unsigned N) const {
190     assert(N == 1 && "Invalid number of operands!");
191     Inst.addOperand(MCOperand::createReg(getReg()));
192   }
193 
addImmOperands__anonaca8c6c00111::BPFOperand194   void addImmOperands(MCInst &Inst, unsigned N) const {
195     assert(N == 1 && "Invalid number of operands!");
196     addExpr(Inst, getImm());
197   }
198 
createToken__anonaca8c6c00111::BPFOperand199   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
200     auto Op = std::make_unique<BPFOperand>(Token);
201     Op->Tok = Str;
202     Op->StartLoc = S;
203     Op->EndLoc = S;
204     return Op;
205   }
206 
createReg__anonaca8c6c00111::BPFOperand207   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
208                                                SMLoc E) {
209     auto Op = std::make_unique<BPFOperand>(Register);
210     Op->Reg.RegNum = RegNo;
211     Op->StartLoc = S;
212     Op->EndLoc = E;
213     return Op;
214   }
215 
createImm__anonaca8c6c00111::BPFOperand216   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
217                                                SMLoc E) {
218     auto Op = std::make_unique<BPFOperand>(Immediate);
219     Op->Imm.Val = Val;
220     Op->StartLoc = S;
221     Op->EndLoc = E;
222     return Op;
223   }
224 
225   // Identifiers that can be used at the start of a statment.
isValidIdAtStart__anonaca8c6c00111::BPFOperand226   static bool isValidIdAtStart(StringRef Name) {
227     return StringSwitch<bool>(Name.lower())
228         .Case("if", true)
229         .Case("call", true)
230         .Case("goto", true)
231         .Case("*", true)
232         .Case("exit", true)
233         .Case("lock", true)
234         .Case("ld_pseudo", true)
235         .Default(false);
236   }
237 
238   // Identifiers that can be used in the middle of a statment.
isValidIdInMiddle__anonaca8c6c00111::BPFOperand239   static bool isValidIdInMiddle(StringRef Name) {
240     return StringSwitch<bool>(Name.lower())
241         .Case("u64", true)
242         .Case("u32", true)
243         .Case("u16", true)
244         .Case("u8", true)
245         .Case("be64", true)
246         .Case("be32", true)
247         .Case("be16", true)
248         .Case("le64", true)
249         .Case("le32", true)
250         .Case("le16", true)
251         .Case("goto", true)
252         .Case("ll", true)
253         .Case("skb", true)
254         .Case("s", true)
255         .Default(false);
256   }
257 };
258 } // end anonymous namespace.
259 
260 #define GET_REGISTER_MATCHER
261 #define GET_MATCHER_IMPLEMENTATION
262 #include "BPFGenAsmMatcher.inc"
263 
PreMatchCheck(OperandVector & Operands)264 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
265 
266   if (Operands.size() == 4) {
267     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
268     // reg1 must be the same as reg2
269     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
270     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
271     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
272     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
273     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
274         && Op1.getToken() == "="
275         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
276             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
277             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
278             || Op2.getToken() == "le64")
279         && Op0.getReg() != Op3.getReg())
280       return true;
281   }
282 
283   return false;
284 }
285 
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)286 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
287                                            OperandVector &Operands,
288                                            MCStreamer &Out, uint64_t &ErrorInfo,
289                                            bool MatchingInlineAsm) {
290   MCInst Inst;
291   SMLoc ErrorLoc;
292 
293   if (PreMatchCheck(Operands))
294     return Error(IDLoc, "additional inst constraint not met");
295 
296   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
297   default:
298     break;
299   case Match_Success:
300     Inst.setLoc(IDLoc);
301     Out.emitInstruction(Inst, getSTI());
302     return false;
303   case Match_MissingFeature:
304     return Error(IDLoc, "instruction use requires an option to be enabled");
305   case Match_MnemonicFail:
306     return Error(IDLoc, "unrecognized instruction mnemonic");
307   case Match_InvalidOperand:
308     ErrorLoc = IDLoc;
309 
310     if (ErrorInfo != ~0U) {
311       if (ErrorInfo >= Operands.size())
312         return Error(ErrorLoc, "too few operands for instruction");
313 
314       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
315 
316       if (ErrorLoc == SMLoc())
317         ErrorLoc = IDLoc;
318     }
319 
320     return Error(ErrorLoc, "invalid operand for instruction");
321   }
322 
323   llvm_unreachable("Unknown match type detected!");
324 }
325 
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)326 bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
327                                  SMLoc &EndLoc) {
328   if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
329     return Error(StartLoc, "invalid register name");
330   return false;
331 }
332 
tryParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)333 OperandMatchResultTy BPFAsmParser::tryParseRegister(unsigned &RegNo,
334                                                     SMLoc &StartLoc,
335                                                     SMLoc &EndLoc) {
336   const AsmToken &Tok = getParser().getTok();
337   StartLoc = Tok.getLoc();
338   EndLoc = Tok.getEndLoc();
339   RegNo = 0;
340   StringRef Name = getLexer().getTok().getIdentifier();
341 
342   if (!MatchRegisterName(Name)) {
343     getParser().Lex(); // Eat identifier token.
344     return MatchOperand_Success;
345   }
346 
347   return MatchOperand_NoMatch;
348 }
349 
350 OperandMatchResultTy
parseOperandAsOperator(OperandVector & Operands)351 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
352   SMLoc S = getLoc();
353 
354   if (getLexer().getKind() == AsmToken::Identifier) {
355     StringRef Name = getLexer().getTok().getIdentifier();
356 
357     if (BPFOperand::isValidIdInMiddle(Name)) {
358       getLexer().Lex();
359       Operands.push_back(BPFOperand::createToken(Name, S));
360       return MatchOperand_Success;
361     }
362 
363     return MatchOperand_NoMatch;
364   }
365 
366   switch (getLexer().getKind()) {
367   case AsmToken::Minus:
368   case AsmToken::Plus: {
369     if (getLexer().peekTok().is(AsmToken::Integer))
370       return MatchOperand_NoMatch;
371     LLVM_FALLTHROUGH;
372   }
373 
374   case AsmToken::Equal:
375   case AsmToken::Greater:
376   case AsmToken::Less:
377   case AsmToken::Pipe:
378   case AsmToken::Star:
379   case AsmToken::LParen:
380   case AsmToken::RParen:
381   case AsmToken::LBrac:
382   case AsmToken::RBrac:
383   case AsmToken::Slash:
384   case AsmToken::Amp:
385   case AsmToken::Percent:
386   case AsmToken::Caret: {
387     StringRef Name = getLexer().getTok().getString();
388     getLexer().Lex();
389     Operands.push_back(BPFOperand::createToken(Name, S));
390 
391     return MatchOperand_Success;
392   }
393 
394   case AsmToken::EqualEqual:
395   case AsmToken::ExclaimEqual:
396   case AsmToken::GreaterEqual:
397   case AsmToken::GreaterGreater:
398   case AsmToken::LessEqual:
399   case AsmToken::LessLess: {
400     Operands.push_back(BPFOperand::createToken(
401         getLexer().getTok().getString().substr(0, 1), S));
402     Operands.push_back(BPFOperand::createToken(
403         getLexer().getTok().getString().substr(1, 1), S));
404     getLexer().Lex();
405 
406     return MatchOperand_Success;
407   }
408 
409   default:
410     break;
411   }
412 
413   return MatchOperand_NoMatch;
414 }
415 
parseRegister(OperandVector & Operands)416 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
417   SMLoc S = getLoc();
418   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
419 
420   switch (getLexer().getKind()) {
421   default:
422     return MatchOperand_NoMatch;
423   case AsmToken::Identifier:
424     StringRef Name = getLexer().getTok().getIdentifier();
425     unsigned RegNo = MatchRegisterName(Name);
426 
427     if (RegNo == 0)
428       return MatchOperand_NoMatch;
429 
430     getLexer().Lex();
431     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
432   }
433   return MatchOperand_Success;
434 }
435 
parseImmediate(OperandVector & Operands)436 OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
437   switch (getLexer().getKind()) {
438   default:
439     return MatchOperand_NoMatch;
440   case AsmToken::LParen:
441   case AsmToken::Minus:
442   case AsmToken::Plus:
443   case AsmToken::Integer:
444   case AsmToken::String:
445   case AsmToken::Identifier:
446     break;
447   }
448 
449   const MCExpr *IdVal;
450   SMLoc S = getLoc();
451 
452   if (getParser().parseExpression(IdVal))
453     return MatchOperand_ParseFail;
454 
455   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
456   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
457 
458   return MatchOperand_Success;
459 }
460 
461 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
462 /// format.
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)463 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
464                                     SMLoc NameLoc, OperandVector &Operands) {
465   // The first operand could be either register or actually an operator.
466   unsigned RegNo = MatchRegisterName(Name);
467 
468   if (RegNo != 0) {
469     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
470     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
471   } else if (BPFOperand::isValidIdAtStart (Name))
472     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
473   else
474     return Error(NameLoc, "invalid register/token name");
475 
476   while (!getLexer().is(AsmToken::EndOfStatement)) {
477     // Attempt to parse token as operator
478     if (parseOperandAsOperator(Operands) == MatchOperand_Success)
479       continue;
480 
481     // Attempt to parse token as register
482     if (parseRegister(Operands) == MatchOperand_Success)
483       continue;
484 
485     // Attempt to parse token as an immediate
486     if (parseImmediate(Operands) != MatchOperand_Success) {
487       SMLoc Loc = getLexer().getLoc();
488       return Error(Loc, "unexpected token");
489     }
490   }
491 
492   if (getLexer().isNot(AsmToken::EndOfStatement)) {
493     SMLoc Loc = getLexer().getLoc();
494 
495     getParser().eatToEndOfStatement();
496 
497     return Error(Loc, "unexpected token");
498   }
499 
500   // Consume the EndOfStatement.
501   getParser().Lex();
502   return false;
503 }
504 
ParseDirective(AsmToken DirectiveID)505 bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
506 
LLVMInitializeBPFAsmParser()507 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
508   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
509   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
510   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
511 }
512