1b257d24fSChris Lattner //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2b257d24fSChris Lattner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b257d24fSChris Lattner //
7b257d24fSChris Lattner //===----------------------------------------------------------------------===//
8b257d24fSChris Lattner //
9b257d24fSChris Lattner // This class implements the disassembler of strings of bytes written in
10b257d24fSChris Lattner // hexadecimal, from standard input or from a file.
11b257d24fSChris Lattner //
12b257d24fSChris Lattner //===----------------------------------------------------------------------===//
13b257d24fSChris Lattner 
14b257d24fSChris Lattner #include "Disassembler.h"
154d88a1c2SChandler Carruth #include "llvm/ADT/Triple.h"
16a1bc0f56SLang Hames #include "llvm/MC/MCAsmInfo.h"
17a1bc0f56SLang Hames #include "llvm/MC/MCContext.h"
18f57c1977SBenjamin Kramer #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19b257d24fSChris Lattner #include "llvm/MC/MCInst.h"
202cb27072SThomas Lively #include "llvm/MC/MCObjectFileInfo.h"
21a1bc0f56SLang Hames #include "llvm/MC/MCRegisterInfo.h"
22def81b91SRichard Barton #include "llvm/MC/MCStreamer.h"
234c493e80SJames Molloy #include "llvm/MC/MCSubtargetInfo.h"
24*89b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
25b257d24fSChris Lattner #include "llvm/Support/MemoryBuffer.h"
26b257d24fSChris Lattner #include "llvm/Support/SourceMgr.h"
272bb40357SEvan Cheng #include "llvm/Support/raw_ostream.h"
28def81b91SRichard Barton 
29b257d24fSChris Lattner using namespace llvm;
30b257d24fSChris Lattner 
3145a753a4SRafael Espindola typedef std::pair<std::vector<unsigned char>, std::vector<const char *>>
3245a753a4SRafael Espindola     ByteArrayTy;
33b257d24fSChris Lattner 
PrintInsts(const MCDisassembler & DisAsm,const ByteArrayTy & Bytes,SourceMgr & SM,raw_ostream & Out,MCStreamer & Streamer,bool InAtomicBlock,const MCSubtargetInfo & STI)34d9d5b315SDaniel Dunbar static bool PrintInsts(const MCDisassembler &DisAsm,
35def81b91SRichard Barton                        const ByteArrayTy &Bytes,
36def81b91SRichard Barton                        SourceMgr &SM, raw_ostream &Out,
37e6c13e4aSDavid Woodhouse                        MCStreamer &Streamer, bool InAtomicBlock,
38e6c13e4aSDavid Woodhouse                        const MCSubtargetInfo &STI) {
397fc5b874SRafael Espindola   ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size());
40b257d24fSChris Lattner 
416a6f9cc6SSean Callanan   // Disassemble it to strings.
42b257d24fSChris Lattner   uint64_t Size;
436a6f9cc6SSean Callanan   uint64_t Index;
44b257d24fSChris Lattner 
4545a753a4SRafael Espindola   for (Index = 0; Index < Bytes.first.size(); Index += Size) {
466a6f9cc6SSean Callanan     MCInst Inst;
476a6f9cc6SSean Callanan 
48a4043c4bSOwen Anderson     MCDisassembler::DecodeStatus S;
496fdd6a7bSFangrui Song     S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
50a4043c4bSOwen Anderson     switch (S) {
51a4043c4bSOwen Anderson     case MCDisassembler::Fail:
5245a753a4SRafael Espindola       SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
5303b80a40SChris Lattner                       SourceMgr::DK_Warning,
5403b80a40SChris Lattner                       "invalid instruction encoding");
5548cf6cc4STim Northover       // Don't try to resynchronise the stream in a block
5648cf6cc4STim Northover       if (InAtomicBlock)
5748cf6cc4STim Northover         return true;
5848cf6cc4STim Northover 
596a6f9cc6SSean Callanan       if (Size == 0)
606a6f9cc6SSean Callanan         Size = 1; // skip illegible bytes
6148cf6cc4STim Northover 
62a4043c4bSOwen Anderson       break;
63a4043c4bSOwen Anderson 
64a4043c4bSOwen Anderson     case MCDisassembler::SoftFail:
6545a753a4SRafael Espindola       SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
6603b80a40SChris Lattner                       SourceMgr::DK_Warning,
6703b80a40SChris Lattner                       "potentially undefined instruction encoding");
68cd1d5aafSJustin Bogner       LLVM_FALLTHROUGH;
69a4043c4bSOwen Anderson 
70a4043c4bSOwen Anderson     case MCDisassembler::Success:
71bcd24b2dSFangrui Song       Streamer.emitInstruction(Inst, STI);
72a4043c4bSOwen Anderson       break;
736a6f9cc6SSean Callanan     }
74b257d24fSChris Lattner   }
75b257d24fSChris Lattner 
76b257d24fSChris Lattner   return false;
77b257d24fSChris Lattner }
78b257d24fSChris Lattner 
SkipToToken(StringRef & Str)7948cf6cc4STim Northover static bool SkipToToken(StringRef &Str) {
80eb4675fbSColin LeMahieu   for (;;) {
81eb4675fbSColin LeMahieu     if (Str.empty())
82eb4675fbSColin LeMahieu       return false;
832d03d3a8SSean Callanan 
84eb4675fbSColin LeMahieu     // Strip horizontal whitespace and commas.
85eb4675fbSColin LeMahieu     if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) {
86eb4675fbSColin LeMahieu       Str = Str.substr(Pos);
872d03d3a8SSean Callanan       continue;
882d03d3a8SSean Callanan     }
8948cf6cc4STim Northover 
90eb4675fbSColin LeMahieu     // If this is the start of a comment, remove the rest of the line.
91eb4675fbSColin LeMahieu     if (Str[0] == '#') {
92eb4675fbSColin LeMahieu         Str = Str.substr(Str.find_first_of('\n'));
93eb4675fbSColin LeMahieu       continue;
94eb4675fbSColin LeMahieu     }
95eb4675fbSColin LeMahieu     return true;
96eb4675fbSColin LeMahieu   }
9748cf6cc4STim Northover }
9848cf6cc4STim Northover 
9948cf6cc4STim Northover 
ByteArrayFromString(ByteArrayTy & ByteArray,StringRef & Str,SourceMgr & SM)10048cf6cc4STim Northover static bool ByteArrayFromString(ByteArrayTy &ByteArray,
10148cf6cc4STim Northover                                 StringRef &Str,
10248cf6cc4STim Northover                                 SourceMgr &SM) {
10348cf6cc4STim Northover   while (SkipToToken(Str)) {
10448cf6cc4STim Northover     // Handled by higher level
10548cf6cc4STim Northover     if (Str[0] == '[' || Str[0] == ']')
10648cf6cc4STim Northover       return false;
1072d03d3a8SSean Callanan 
1082d03d3a8SSean Callanan     // Get the current token.
10948cf6cc4STim Northover     size_t Next = Str.find_first_of(" \t\n\r,#[]");
1102d03d3a8SSean Callanan     StringRef Value = Str.substr(0, Next);
1112d03d3a8SSean Callanan 
1122d03d3a8SSean Callanan     // Convert to a byte and add to the byte vector.
1132d03d3a8SSean Callanan     unsigned ByteVal;
1142d03d3a8SSean Callanan     if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
1152d03d3a8SSean Callanan       // If we have an error, print it and skip to the end of line.
11603b80a40SChris Lattner       SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
11703b80a40SChris Lattner                       "invalid input token");
1182d03d3a8SSean Callanan       Str = Str.substr(Str.find('\n'));
11945a753a4SRafael Espindola       ByteArray.first.clear();
12045a753a4SRafael Espindola       ByteArray.second.clear();
1212d03d3a8SSean Callanan       continue;
1222d03d3a8SSean Callanan     }
1232d03d3a8SSean Callanan 
12445a753a4SRafael Espindola     ByteArray.first.push_back(ByteVal);
12545a753a4SRafael Espindola     ByteArray.second.push_back(Value.data());
1262d03d3a8SSean Callanan     Str = Str.substr(Next);
1272d03d3a8SSean Callanan   }
1282d03d3a8SSean Callanan 
1292d03d3a8SSean Callanan   return false;
1302d03d3a8SSean Callanan }
1312d03d3a8SSean Callanan 
disassemble(const Target & T,const std::string & Triple,MCSubtargetInfo & STI,MCStreamer & Streamer,MemoryBuffer & Buffer,SourceMgr & SM,MCContext & Ctx,raw_ostream & Out,const MCTargetOptions & MCOptions)1322cb27072SThomas Lively int Disassembler::disassemble(const Target &T, const std::string &Triple,
1332cb27072SThomas Lively                               MCSubtargetInfo &STI, MCStreamer &Streamer,
1342cb27072SThomas Lively                               MemoryBuffer &Buffer, SourceMgr &SM,
1354b63ca13SMirko Brkusanin                               MCContext &Ctx, raw_ostream &Out,
1364b63ca13SMirko Brkusanin                               const MCTargetOptions &MCOptions) {
137a1bc0f56SLang Hames 
138a1bc0f56SLang Hames   std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
139a1bc0f56SLang Hames   if (!MRI) {
140a1bc0f56SLang Hames     errs() << "error: no register info for target " << Triple << "\n";
141a1bc0f56SLang Hames     return -1;
142a1bc0f56SLang Hames   }
143a1bc0f56SLang Hames 
1444b63ca13SMirko Brkusanin   std::unique_ptr<const MCAsmInfo> MAI(
1454b63ca13SMirko Brkusanin       T.createMCAsmInfo(*MRI, Triple, MCOptions));
146a1bc0f56SLang Hames   if (!MAI) {
147a1bc0f56SLang Hames     errs() << "error: no assembly info for target " << Triple << "\n";
148a1bc0f56SLang Hames     return -1;
149a1bc0f56SLang Hames   }
150a1bc0f56SLang Hames 
151a1bc0f56SLang Hames   std::unique_ptr<const MCDisassembler> DisAsm(
152a1bc0f56SLang Hames     T.createMCDisassembler(STI, Ctx));
153b257d24fSChris Lattner   if (!DisAsm) {
154b257d24fSChris Lattner     errs() << "error: no disassembler for target " << Triple << "\n";
155b257d24fSChris Lattner     return -1;
156b257d24fSChris Lattner   }
157b257d24fSChris Lattner 
158def81b91SRichard Barton   // Set up initial section manually here
1595e71839fSPeter Smith   Streamer.initSections(false, STI);
160b257d24fSChris Lattner 
161b257d24fSChris Lattner   bool ErrorOccurred = false;
162b257d24fSChris Lattner 
163b257d24fSChris Lattner   // Convert the input to a vector for disassembly.
164b257d24fSChris Lattner   ByteArrayTy ByteArray;
16555ddc30fSChris Lattner   StringRef Str = Buffer.getBuffer();
16648cf6cc4STim Northover   bool InAtomicBlock = false;
16755ddc30fSChris Lattner 
16848cf6cc4STim Northover   while (SkipToToken(Str)) {
16945a753a4SRafael Espindola     ByteArray.first.clear();
17045a753a4SRafael Espindola     ByteArray.second.clear();
17148cf6cc4STim Northover 
17248cf6cc4STim Northover     if (Str[0] == '[') {
17348cf6cc4STim Northover       if (InAtomicBlock) {
17448cf6cc4STim Northover         SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
17548cf6cc4STim Northover                         "nested atomic blocks make no sense");
17648cf6cc4STim Northover         ErrorOccurred = true;
17748cf6cc4STim Northover       }
17848cf6cc4STim Northover       InAtomicBlock = true;
17948cf6cc4STim Northover       Str = Str.drop_front();
18048cf6cc4STim Northover       continue;
18148cf6cc4STim Northover     } else if (Str[0] == ']') {
18248cf6cc4STim Northover       if (!InAtomicBlock) {
18348cf6cc4STim Northover         SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
18448cf6cc4STim Northover                         "attempt to close atomic block without opening");
18548cf6cc4STim Northover         ErrorOccurred = true;
18648cf6cc4STim Northover       }
18748cf6cc4STim Northover       InAtomicBlock = false;
18848cf6cc4STim Northover       Str = Str.drop_front();
18948cf6cc4STim Northover       continue;
19048cf6cc4STim Northover     }
19148cf6cc4STim Northover 
19248cf6cc4STim Northover     // It's a real token, get the bytes and emit them
1932d03d3a8SSean Callanan     ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
194b257d24fSChris Lattner 
19545a753a4SRafael Espindola     if (!ByteArray.first.empty())
19648cf6cc4STim Northover       ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer,
197e6c13e4aSDavid Woodhouse                                   InAtomicBlock, STI);
19848cf6cc4STim Northover   }
19948cf6cc4STim Northover 
20048cf6cc4STim Northover   if (InAtomicBlock) {
20148cf6cc4STim Northover     SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
20248cf6cc4STim Northover                     "unclosed atomic block");
20348cf6cc4STim Northover     ErrorOccurred = true;
20448cf6cc4STim Northover   }
205b257d24fSChris Lattner 
206b257d24fSChris Lattner   return ErrorOccurred;
207b257d24fSChris Lattner }
208