1b257d24fSChris Lattner //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
2b257d24fSChris Lattner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b257d24fSChris Lattner //
7b257d24fSChris Lattner //===----------------------------------------------------------------------===//
8b257d24fSChris Lattner //
9b257d24fSChris Lattner // This class implements the disassembler of strings of bytes written in
10b257d24fSChris Lattner // hexadecimal, from standard input or from a file.
11b257d24fSChris Lattner //
12b257d24fSChris Lattner //===----------------------------------------------------------------------===//
13b257d24fSChris Lattner
14b257d24fSChris Lattner #include "Disassembler.h"
154d88a1c2SChandler Carruth #include "llvm/ADT/Triple.h"
16a1bc0f56SLang Hames #include "llvm/MC/MCAsmInfo.h"
17a1bc0f56SLang Hames #include "llvm/MC/MCContext.h"
18f57c1977SBenjamin Kramer #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19b257d24fSChris Lattner #include "llvm/MC/MCInst.h"
202cb27072SThomas Lively #include "llvm/MC/MCObjectFileInfo.h"
21a1bc0f56SLang Hames #include "llvm/MC/MCRegisterInfo.h"
22def81b91SRichard Barton #include "llvm/MC/MCStreamer.h"
234c493e80SJames Molloy #include "llvm/MC/MCSubtargetInfo.h"
24*89b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
25b257d24fSChris Lattner #include "llvm/Support/MemoryBuffer.h"
26b257d24fSChris Lattner #include "llvm/Support/SourceMgr.h"
272bb40357SEvan Cheng #include "llvm/Support/raw_ostream.h"
28def81b91SRichard Barton
29b257d24fSChris Lattner using namespace llvm;
30b257d24fSChris Lattner
3145a753a4SRafael Espindola typedef std::pair<std::vector<unsigned char>, std::vector<const char *>>
3245a753a4SRafael Espindola ByteArrayTy;
33b257d24fSChris Lattner
PrintInsts(const MCDisassembler & DisAsm,const ByteArrayTy & Bytes,SourceMgr & SM,raw_ostream & Out,MCStreamer & Streamer,bool InAtomicBlock,const MCSubtargetInfo & STI)34d9d5b315SDaniel Dunbar static bool PrintInsts(const MCDisassembler &DisAsm,
35def81b91SRichard Barton const ByteArrayTy &Bytes,
36def81b91SRichard Barton SourceMgr &SM, raw_ostream &Out,
37e6c13e4aSDavid Woodhouse MCStreamer &Streamer, bool InAtomicBlock,
38e6c13e4aSDavid Woodhouse const MCSubtargetInfo &STI) {
397fc5b874SRafael Espindola ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size());
40b257d24fSChris Lattner
416a6f9cc6SSean Callanan // Disassemble it to strings.
42b257d24fSChris Lattner uint64_t Size;
436a6f9cc6SSean Callanan uint64_t Index;
44b257d24fSChris Lattner
4545a753a4SRafael Espindola for (Index = 0; Index < Bytes.first.size(); Index += Size) {
466a6f9cc6SSean Callanan MCInst Inst;
476a6f9cc6SSean Callanan
48a4043c4bSOwen Anderson MCDisassembler::DecodeStatus S;
496fdd6a7bSFangrui Song S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
50a4043c4bSOwen Anderson switch (S) {
51a4043c4bSOwen Anderson case MCDisassembler::Fail:
5245a753a4SRafael Espindola SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
5303b80a40SChris Lattner SourceMgr::DK_Warning,
5403b80a40SChris Lattner "invalid instruction encoding");
5548cf6cc4STim Northover // Don't try to resynchronise the stream in a block
5648cf6cc4STim Northover if (InAtomicBlock)
5748cf6cc4STim Northover return true;
5848cf6cc4STim Northover
596a6f9cc6SSean Callanan if (Size == 0)
606a6f9cc6SSean Callanan Size = 1; // skip illegible bytes
6148cf6cc4STim Northover
62a4043c4bSOwen Anderson break;
63a4043c4bSOwen Anderson
64a4043c4bSOwen Anderson case MCDisassembler::SoftFail:
6545a753a4SRafael Espindola SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
6603b80a40SChris Lattner SourceMgr::DK_Warning,
6703b80a40SChris Lattner "potentially undefined instruction encoding");
68cd1d5aafSJustin Bogner LLVM_FALLTHROUGH;
69a4043c4bSOwen Anderson
70a4043c4bSOwen Anderson case MCDisassembler::Success:
71bcd24b2dSFangrui Song Streamer.emitInstruction(Inst, STI);
72a4043c4bSOwen Anderson break;
736a6f9cc6SSean Callanan }
74b257d24fSChris Lattner }
75b257d24fSChris Lattner
76b257d24fSChris Lattner return false;
77b257d24fSChris Lattner }
78b257d24fSChris Lattner
SkipToToken(StringRef & Str)7948cf6cc4STim Northover static bool SkipToToken(StringRef &Str) {
80eb4675fbSColin LeMahieu for (;;) {
81eb4675fbSColin LeMahieu if (Str.empty())
82eb4675fbSColin LeMahieu return false;
832d03d3a8SSean Callanan
84eb4675fbSColin LeMahieu // Strip horizontal whitespace and commas.
85eb4675fbSColin LeMahieu if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) {
86eb4675fbSColin LeMahieu Str = Str.substr(Pos);
872d03d3a8SSean Callanan continue;
882d03d3a8SSean Callanan }
8948cf6cc4STim Northover
90eb4675fbSColin LeMahieu // If this is the start of a comment, remove the rest of the line.
91eb4675fbSColin LeMahieu if (Str[0] == '#') {
92eb4675fbSColin LeMahieu Str = Str.substr(Str.find_first_of('\n'));
93eb4675fbSColin LeMahieu continue;
94eb4675fbSColin LeMahieu }
95eb4675fbSColin LeMahieu return true;
96eb4675fbSColin LeMahieu }
9748cf6cc4STim Northover }
9848cf6cc4STim Northover
9948cf6cc4STim Northover
ByteArrayFromString(ByteArrayTy & ByteArray,StringRef & Str,SourceMgr & SM)10048cf6cc4STim Northover static bool ByteArrayFromString(ByteArrayTy &ByteArray,
10148cf6cc4STim Northover StringRef &Str,
10248cf6cc4STim Northover SourceMgr &SM) {
10348cf6cc4STim Northover while (SkipToToken(Str)) {
10448cf6cc4STim Northover // Handled by higher level
10548cf6cc4STim Northover if (Str[0] == '[' || Str[0] == ']')
10648cf6cc4STim Northover return false;
1072d03d3a8SSean Callanan
1082d03d3a8SSean Callanan // Get the current token.
10948cf6cc4STim Northover size_t Next = Str.find_first_of(" \t\n\r,#[]");
1102d03d3a8SSean Callanan StringRef Value = Str.substr(0, Next);
1112d03d3a8SSean Callanan
1122d03d3a8SSean Callanan // Convert to a byte and add to the byte vector.
1132d03d3a8SSean Callanan unsigned ByteVal;
1142d03d3a8SSean Callanan if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
1152d03d3a8SSean Callanan // If we have an error, print it and skip to the end of line.
11603b80a40SChris Lattner SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
11703b80a40SChris Lattner "invalid input token");
1182d03d3a8SSean Callanan Str = Str.substr(Str.find('\n'));
11945a753a4SRafael Espindola ByteArray.first.clear();
12045a753a4SRafael Espindola ByteArray.second.clear();
1212d03d3a8SSean Callanan continue;
1222d03d3a8SSean Callanan }
1232d03d3a8SSean Callanan
12445a753a4SRafael Espindola ByteArray.first.push_back(ByteVal);
12545a753a4SRafael Espindola ByteArray.second.push_back(Value.data());
1262d03d3a8SSean Callanan Str = Str.substr(Next);
1272d03d3a8SSean Callanan }
1282d03d3a8SSean Callanan
1292d03d3a8SSean Callanan return false;
1302d03d3a8SSean Callanan }
1312d03d3a8SSean Callanan
disassemble(const Target & T,const std::string & Triple,MCSubtargetInfo & STI,MCStreamer & Streamer,MemoryBuffer & Buffer,SourceMgr & SM,MCContext & Ctx,raw_ostream & Out,const MCTargetOptions & MCOptions)1322cb27072SThomas Lively int Disassembler::disassemble(const Target &T, const std::string &Triple,
1332cb27072SThomas Lively MCSubtargetInfo &STI, MCStreamer &Streamer,
1342cb27072SThomas Lively MemoryBuffer &Buffer, SourceMgr &SM,
1354b63ca13SMirko Brkusanin MCContext &Ctx, raw_ostream &Out,
1364b63ca13SMirko Brkusanin const MCTargetOptions &MCOptions) {
137a1bc0f56SLang Hames
138a1bc0f56SLang Hames std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
139a1bc0f56SLang Hames if (!MRI) {
140a1bc0f56SLang Hames errs() << "error: no register info for target " << Triple << "\n";
141a1bc0f56SLang Hames return -1;
142a1bc0f56SLang Hames }
143a1bc0f56SLang Hames
1444b63ca13SMirko Brkusanin std::unique_ptr<const MCAsmInfo> MAI(
1454b63ca13SMirko Brkusanin T.createMCAsmInfo(*MRI, Triple, MCOptions));
146a1bc0f56SLang Hames if (!MAI) {
147a1bc0f56SLang Hames errs() << "error: no assembly info for target " << Triple << "\n";
148a1bc0f56SLang Hames return -1;
149a1bc0f56SLang Hames }
150a1bc0f56SLang Hames
151a1bc0f56SLang Hames std::unique_ptr<const MCDisassembler> DisAsm(
152a1bc0f56SLang Hames T.createMCDisassembler(STI, Ctx));
153b257d24fSChris Lattner if (!DisAsm) {
154b257d24fSChris Lattner errs() << "error: no disassembler for target " << Triple << "\n";
155b257d24fSChris Lattner return -1;
156b257d24fSChris Lattner }
157b257d24fSChris Lattner
158def81b91SRichard Barton // Set up initial section manually here
1595e71839fSPeter Smith Streamer.initSections(false, STI);
160b257d24fSChris Lattner
161b257d24fSChris Lattner bool ErrorOccurred = false;
162b257d24fSChris Lattner
163b257d24fSChris Lattner // Convert the input to a vector for disassembly.
164b257d24fSChris Lattner ByteArrayTy ByteArray;
16555ddc30fSChris Lattner StringRef Str = Buffer.getBuffer();
16648cf6cc4STim Northover bool InAtomicBlock = false;
16755ddc30fSChris Lattner
16848cf6cc4STim Northover while (SkipToToken(Str)) {
16945a753a4SRafael Espindola ByteArray.first.clear();
17045a753a4SRafael Espindola ByteArray.second.clear();
17148cf6cc4STim Northover
17248cf6cc4STim Northover if (Str[0] == '[') {
17348cf6cc4STim Northover if (InAtomicBlock) {
17448cf6cc4STim Northover SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
17548cf6cc4STim Northover "nested atomic blocks make no sense");
17648cf6cc4STim Northover ErrorOccurred = true;
17748cf6cc4STim Northover }
17848cf6cc4STim Northover InAtomicBlock = true;
17948cf6cc4STim Northover Str = Str.drop_front();
18048cf6cc4STim Northover continue;
18148cf6cc4STim Northover } else if (Str[0] == ']') {
18248cf6cc4STim Northover if (!InAtomicBlock) {
18348cf6cc4STim Northover SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
18448cf6cc4STim Northover "attempt to close atomic block without opening");
18548cf6cc4STim Northover ErrorOccurred = true;
18648cf6cc4STim Northover }
18748cf6cc4STim Northover InAtomicBlock = false;
18848cf6cc4STim Northover Str = Str.drop_front();
18948cf6cc4STim Northover continue;
19048cf6cc4STim Northover }
19148cf6cc4STim Northover
19248cf6cc4STim Northover // It's a real token, get the bytes and emit them
1932d03d3a8SSean Callanan ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
194b257d24fSChris Lattner
19545a753a4SRafael Espindola if (!ByteArray.first.empty())
19648cf6cc4STim Northover ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer,
197e6c13e4aSDavid Woodhouse InAtomicBlock, STI);
19848cf6cc4STim Northover }
19948cf6cc4STim Northover
20048cf6cc4STim Northover if (InAtomicBlock) {
20148cf6cc4STim Northover SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error,
20248cf6cc4STim Northover "unclosed atomic block");
20348cf6cc4STim Northover ErrorOccurred = true;
20448cf6cc4STim Northover }
205b257d24fSChris Lattner
206b257d24fSChris Lattner return ErrorOccurred;
207b257d24fSChris Lattner }
208