1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/MCAsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/TargetRegistry.h"
24 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
getLoc() const33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool parseRegister(MCRegister &Reo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
44                                SMLoc &EndLoc) override;
45 
46   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
47                         SMLoc NameLoc, OperandVector &Operands) override;
48 
49   // "=" is used as assignment operator for assembly statment, so can't be used
50   // for symbol assignment.
equalIsAsmAssignment()51   bool equalIsAsmAssignment() override { return false; }
52   // "*" is used for dereferencing memory that it will be the start of
53   // statement.
starIsStartOfStatement()54   bool starIsStartOfStatement() override { return true; }
55 
56 #define GET_ASSEMBLER_HEADER
57 #include "BPFGenAsmMatcher.inc"
58 
59   ParseStatus parseImmediate(OperandVector &Operands);
60   ParseStatus parseRegister(OperandVector &Operands);
61   ParseStatus parseOperandAsOperator(OperandVector &Operands);
62 
63 public:
64   enum BPFMatchResultTy {
65     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
66 #define GET_OPERAND_DIAGNOSTIC_TYPES
67 #include "BPFGenAsmMatcher.inc"
68 #undef GET_OPERAND_DIAGNOSTIC_TYPES
69   };
70 
BPFAsmParser(const MCSubtargetInfo & STI,MCAsmParser & Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)71   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
72                const MCInstrInfo &MII, const MCTargetOptions &Options)
73       : MCTargetAsmParser(Options, STI, MII) {
74     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
75   }
76 };
77 
78 /// BPFOperand - Instances of this class represent a parsed machine
79 /// instruction
80 struct BPFOperand : public MCParsedAsmOperand {
81 
82   enum KindTy {
83     Token,
84     Register,
85     Immediate,
86   } Kind;
87 
88   struct RegOp {
89     unsigned RegNum;
90   };
91 
92   struct ImmOp {
93     const MCExpr *Val;
94   };
95 
96   SMLoc StartLoc, EndLoc;
97   union {
98     StringRef Tok;
99     RegOp Reg;
100     ImmOp Imm;
101   };
102 
BPFOperand__anonb18517060111::BPFOperand103   BPFOperand(KindTy K) : Kind(K) {}
104 
105 public:
BPFOperand__anonb18517060111::BPFOperand106   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
107     Kind = o.Kind;
108     StartLoc = o.StartLoc;
109     EndLoc = o.EndLoc;
110 
111     switch (Kind) {
112     case Register:
113       Reg = o.Reg;
114       break;
115     case Immediate:
116       Imm = o.Imm;
117       break;
118     case Token:
119       Tok = o.Tok;
120       break;
121     }
122   }
123 
isToken__anonb18517060111::BPFOperand124   bool isToken() const override { return Kind == Token; }
isReg__anonb18517060111::BPFOperand125   bool isReg() const override { return Kind == Register; }
isImm__anonb18517060111::BPFOperand126   bool isImm() const override { return Kind == Immediate; }
isMem__anonb18517060111::BPFOperand127   bool isMem() const override { return false; }
128 
isConstantImm__anonb18517060111::BPFOperand129   bool isConstantImm() const {
130     return isImm() && isa<MCConstantExpr>(getImm());
131   }
132 
getConstantImm__anonb18517060111::BPFOperand133   int64_t getConstantImm() const {
134     const MCExpr *Val = getImm();
135     return static_cast<const MCConstantExpr *>(Val)->getValue();
136   }
137 
isSImm16__anonb18517060111::BPFOperand138   bool isSImm16() const {
139     return (isConstantImm() && isInt<16>(getConstantImm()));
140   }
141 
isSymbolRef__anonb18517060111::BPFOperand142   bool isSymbolRef() const { return isImm() && isa<MCSymbolRefExpr>(getImm()); }
143 
isBrTarget__anonb18517060111::BPFOperand144   bool isBrTarget() const { return isSymbolRef() || isSImm16(); }
145 
146   /// getStartLoc - Gets location of the first token of this operand
getStartLoc__anonb18517060111::BPFOperand147   SMLoc getStartLoc() const override { return StartLoc; }
148   /// getEndLoc - Gets location of the last token of this operand
getEndLoc__anonb18517060111::BPFOperand149   SMLoc getEndLoc() const override { return EndLoc; }
150 
getReg__anonb18517060111::BPFOperand151   unsigned getReg() const override {
152     assert(Kind == Register && "Invalid type access!");
153     return Reg.RegNum;
154   }
155 
getImm__anonb18517060111::BPFOperand156   const MCExpr *getImm() const {
157     assert(Kind == Immediate && "Invalid type access!");
158     return Imm.Val;
159   }
160 
getToken__anonb18517060111::BPFOperand161   StringRef getToken() const {
162     assert(Kind == Token && "Invalid type access!");
163     return Tok;
164   }
165 
print__anonb18517060111::BPFOperand166   void print(raw_ostream &OS) const override {
167     switch (Kind) {
168     case Immediate:
169       OS << *getImm();
170       break;
171     case Register:
172       OS << "<register x";
173       OS << getReg() << ">";
174       break;
175     case Token:
176       OS << "'" << getToken() << "'";
177       break;
178     }
179   }
180 
addExpr__anonb18517060111::BPFOperand181   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
182     assert(Expr && "Expr shouldn't be null!");
183 
184     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
185       Inst.addOperand(MCOperand::createImm(CE->getValue()));
186     else
187       Inst.addOperand(MCOperand::createExpr(Expr));
188   }
189 
190   // Used by the TableGen Code
addRegOperands__anonb18517060111::BPFOperand191   void addRegOperands(MCInst &Inst, unsigned N) const {
192     assert(N == 1 && "Invalid number of operands!");
193     Inst.addOperand(MCOperand::createReg(getReg()));
194   }
195 
addImmOperands__anonb18517060111::BPFOperand196   void addImmOperands(MCInst &Inst, unsigned N) const {
197     assert(N == 1 && "Invalid number of operands!");
198     addExpr(Inst, getImm());
199   }
200 
createToken__anonb18517060111::BPFOperand201   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
202     auto Op = std::make_unique<BPFOperand>(Token);
203     Op->Tok = Str;
204     Op->StartLoc = S;
205     Op->EndLoc = S;
206     return Op;
207   }
208 
createReg__anonb18517060111::BPFOperand209   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
210                                                SMLoc E) {
211     auto Op = std::make_unique<BPFOperand>(Register);
212     Op->Reg.RegNum = RegNo;
213     Op->StartLoc = S;
214     Op->EndLoc = E;
215     return Op;
216   }
217 
createImm__anonb18517060111::BPFOperand218   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
219                                                SMLoc E) {
220     auto Op = std::make_unique<BPFOperand>(Immediate);
221     Op->Imm.Val = Val;
222     Op->StartLoc = S;
223     Op->EndLoc = E;
224     return Op;
225   }
226 
227   // Identifiers that can be used at the start of a statment.
isValidIdAtStart__anonb18517060111::BPFOperand228   static bool isValidIdAtStart(StringRef Name) {
229     return StringSwitch<bool>(Name.lower())
230         .Case("if", true)
231         .Case("call", true)
232         .Case("goto", true)
233         .Case("gotol", true)
234         .Case("*", true)
235         .Case("exit", true)
236         .Case("lock", true)
237         .Case("ld_pseudo", true)
238         .Default(false);
239   }
240 
241   // Identifiers that can be used in the middle of a statment.
isValidIdInMiddle__anonb18517060111::BPFOperand242   static bool isValidIdInMiddle(StringRef Name) {
243     return StringSwitch<bool>(Name.lower())
244         .Case("u64", true)
245         .Case("u32", true)
246         .Case("u16", true)
247         .Case("u8", true)
248         .Case("s32", true)
249         .Case("s16", true)
250         .Case("s8", true)
251         .Case("be64", true)
252         .Case("be32", true)
253         .Case("be16", true)
254         .Case("le64", true)
255         .Case("le32", true)
256         .Case("le16", true)
257         .Case("bswap16", true)
258         .Case("bswap32", true)
259         .Case("bswap64", true)
260         .Case("goto", true)
261         .Case("gotol", true)
262         .Case("ll", true)
263         .Case("skb", true)
264         .Case("s", true)
265         .Case("atomic_fetch_add", true)
266         .Case("atomic_fetch_and", true)
267         .Case("atomic_fetch_or", true)
268         .Case("atomic_fetch_xor", true)
269         .Case("xchg_64", true)
270         .Case("xchg32_32", true)
271         .Case("cmpxchg_64", true)
272         .Case("cmpxchg32_32", true)
273         .Default(false);
274   }
275 };
276 } // end anonymous namespace.
277 
278 #define GET_REGISTER_MATCHER
279 #define GET_MATCHER_IMPLEMENTATION
280 #include "BPFGenAsmMatcher.inc"
281 
PreMatchCheck(OperandVector & Operands)282 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
283 
284   if (Operands.size() == 4) {
285     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
286     // reg1 must be the same as reg2
287     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
288     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
289     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
290     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
291     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
292         && Op1.getToken() == "="
293         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
294             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
295             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
296             || Op2.getToken() == "le64")
297         && Op0.getReg() != Op3.getReg())
298       return true;
299   }
300 
301   return false;
302 }
303 
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)304 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
305                                            OperandVector &Operands,
306                                            MCStreamer &Out, uint64_t &ErrorInfo,
307                                            bool MatchingInlineAsm) {
308   MCInst Inst;
309   SMLoc ErrorLoc;
310 
311   if (PreMatchCheck(Operands))
312     return Error(IDLoc, "additional inst constraint not met");
313 
314   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
315   default:
316     break;
317   case Match_Success:
318     Inst.setLoc(IDLoc);
319     Out.emitInstruction(Inst, getSTI());
320     return false;
321   case Match_MissingFeature:
322     return Error(IDLoc, "instruction use requires an option to be enabled");
323   case Match_MnemonicFail:
324     return Error(IDLoc, "unrecognized instruction mnemonic");
325   case Match_InvalidOperand:
326     ErrorLoc = IDLoc;
327 
328     if (ErrorInfo != ~0U) {
329       if (ErrorInfo >= Operands.size())
330         return Error(ErrorLoc, "too few operands for instruction");
331 
332       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
333 
334       if (ErrorLoc == SMLoc())
335         ErrorLoc = IDLoc;
336     }
337 
338     return Error(ErrorLoc, "invalid operand for instruction");
339   case Match_InvalidBrTarget:
340     return Error(Operands[ErrorInfo]->getStartLoc(),
341                  "operand is not an identifier or 16-bit signed integer");
342   case Match_InvalidSImm16:
343     return Error(Operands[ErrorInfo]->getStartLoc(),
344                  "operand is not a 16-bit signed integer");
345   }
346 
347   llvm_unreachable("Unknown match type detected!");
348 }
349 
parseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)350 bool BPFAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
351                                  SMLoc &EndLoc) {
352   if (!tryParseRegister(Reg, StartLoc, EndLoc).isSuccess())
353     return Error(StartLoc, "invalid register name");
354   return false;
355 }
356 
tryParseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)357 ParseStatus BPFAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
358                                            SMLoc &EndLoc) {
359   const AsmToken &Tok = getParser().getTok();
360   StartLoc = Tok.getLoc();
361   EndLoc = Tok.getEndLoc();
362   Reg = BPF::NoRegister;
363   StringRef Name = getLexer().getTok().getIdentifier();
364 
365   if (!MatchRegisterName(Name)) {
366     getParser().Lex(); // Eat identifier token.
367     return ParseStatus::Success;
368   }
369 
370   return ParseStatus::NoMatch;
371 }
372 
parseOperandAsOperator(OperandVector & Operands)373 ParseStatus BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
374   SMLoc S = getLoc();
375 
376   if (getLexer().getKind() == AsmToken::Identifier) {
377     StringRef Name = getLexer().getTok().getIdentifier();
378 
379     if (BPFOperand::isValidIdInMiddle(Name)) {
380       getLexer().Lex();
381       Operands.push_back(BPFOperand::createToken(Name, S));
382       return ParseStatus::Success;
383     }
384 
385     return ParseStatus::NoMatch;
386   }
387 
388   switch (getLexer().getKind()) {
389   case AsmToken::Minus:
390   case AsmToken::Plus: {
391     if (getLexer().peekTok().is(AsmToken::Integer))
392       return ParseStatus::NoMatch;
393     [[fallthrough]];
394   }
395 
396   case AsmToken::Equal:
397   case AsmToken::Greater:
398   case AsmToken::Less:
399   case AsmToken::Pipe:
400   case AsmToken::Star:
401   case AsmToken::LParen:
402   case AsmToken::RParen:
403   case AsmToken::LBrac:
404   case AsmToken::RBrac:
405   case AsmToken::Slash:
406   case AsmToken::Amp:
407   case AsmToken::Percent:
408   case AsmToken::Caret: {
409     StringRef Name = getLexer().getTok().getString();
410     getLexer().Lex();
411     Operands.push_back(BPFOperand::createToken(Name, S));
412 
413     return ParseStatus::Success;
414   }
415 
416   case AsmToken::EqualEqual:
417   case AsmToken::ExclaimEqual:
418   case AsmToken::GreaterEqual:
419   case AsmToken::GreaterGreater:
420   case AsmToken::LessEqual:
421   case AsmToken::LessLess: {
422     Operands.push_back(BPFOperand::createToken(
423         getLexer().getTok().getString().substr(0, 1), S));
424     Operands.push_back(BPFOperand::createToken(
425         getLexer().getTok().getString().substr(1, 1), S));
426     getLexer().Lex();
427 
428     return ParseStatus::Success;
429   }
430 
431   default:
432     break;
433   }
434 
435   return ParseStatus::NoMatch;
436 }
437 
parseRegister(OperandVector & Operands)438 ParseStatus BPFAsmParser::parseRegister(OperandVector &Operands) {
439   SMLoc S = getLoc();
440   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
441 
442   switch (getLexer().getKind()) {
443   default:
444     return ParseStatus::NoMatch;
445   case AsmToken::Identifier:
446     StringRef Name = getLexer().getTok().getIdentifier();
447     unsigned RegNo = MatchRegisterName(Name);
448 
449     if (RegNo == 0)
450       return ParseStatus::NoMatch;
451 
452     getLexer().Lex();
453     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
454   }
455   return ParseStatus::Success;
456 }
457 
parseImmediate(OperandVector & Operands)458 ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
459   switch (getLexer().getKind()) {
460   default:
461     return ParseStatus::NoMatch;
462   case AsmToken::LParen:
463   case AsmToken::Minus:
464   case AsmToken::Plus:
465   case AsmToken::Integer:
466   case AsmToken::String:
467   case AsmToken::Identifier:
468     break;
469   }
470 
471   const MCExpr *IdVal;
472   SMLoc S = getLoc();
473 
474   if (getParser().parseExpression(IdVal))
475     return ParseStatus::Failure;
476 
477   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
478   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
479 
480   return ParseStatus::Success;
481 }
482 
483 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
484 /// format.
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)485 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
486                                     SMLoc NameLoc, OperandVector &Operands) {
487   // The first operand could be either register or actually an operator.
488   unsigned RegNo = MatchRegisterName(Name);
489 
490   if (RegNo != 0) {
491     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
492     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
493   } else if (BPFOperand::isValidIdAtStart (Name))
494     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
495   else
496     return Error(NameLoc, "invalid register/token name");
497 
498   while (!getLexer().is(AsmToken::EndOfStatement)) {
499     // Attempt to parse token as operator
500     if (parseOperandAsOperator(Operands).isSuccess())
501       continue;
502 
503     // Attempt to parse token as register
504     if (parseRegister(Operands).isSuccess())
505       continue;
506 
507     if (getLexer().is(AsmToken::Comma)) {
508       getLexer().Lex();
509       continue;
510     }
511 
512     // Attempt to parse token as an immediate
513     if (!parseImmediate(Operands).isSuccess()) {
514       SMLoc Loc = getLexer().getLoc();
515       return Error(Loc, "unexpected token");
516     }
517   }
518 
519   if (getLexer().isNot(AsmToken::EndOfStatement)) {
520     SMLoc Loc = getLexer().getLoc();
521 
522     getParser().eatToEndOfStatement();
523 
524     return Error(Loc, "unexpected token");
525   }
526 
527   // Consume the EndOfStatement.
528   getParser().Lex();
529   return false;
530 }
531 
LLVMInitializeBPFAsmParser()532 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
533   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
534   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
535   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
536 }
537