1 //===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is part of the X86 Disassembler Emitter.
11 // It contains the implementation of the disassembler tables.
12 // Documentation for the disassembler emitter in general can be found in
13 //  X86DisasemblerEmitter.h.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "X86DisassemblerShared.h"
18 #include "X86DisassemblerTables.h"
19 
20 #include "llvm/TableGen/TableGenBackend.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/Format.h"
24 
25 using namespace llvm;
26 using namespace X86Disassembler;
27 
28 /// inheritsFrom - Indicates whether all instructions in one class also belong
29 ///   to another class.
30 ///
31 /// @param child  - The class that may be the subset
32 /// @param parent - The class that may be the superset
33 /// @return       - True if child is a subset of parent, false otherwise.
34 static inline bool inheritsFrom(InstructionContext child,
35                                 InstructionContext parent) {
36   if (child == parent)
37     return true;
38 
39   switch (parent) {
40   case IC:
41     return(inheritsFrom(child, IC_64BIT) ||
42            inheritsFrom(child, IC_OPSIZE) ||
43            inheritsFrom(child, IC_XD) ||
44            inheritsFrom(child, IC_XS));
45   case IC_64BIT:
46     return(inheritsFrom(child, IC_64BIT_REXW)   ||
47            inheritsFrom(child, IC_64BIT_OPSIZE) ||
48            inheritsFrom(child, IC_64BIT_XD)     ||
49            inheritsFrom(child, IC_64BIT_XS));
50   case IC_OPSIZE:
51     return inheritsFrom(child, IC_64BIT_OPSIZE);
52   case IC_XD:
53     return inheritsFrom(child, IC_64BIT_XD);
54   case IC_XS:
55     return inheritsFrom(child, IC_64BIT_XS);
56   case IC_XD_OPSIZE:
57     return inheritsFrom(child, IC_64BIT_XD_OPSIZE);
58   case IC_64BIT_REXW:
59     return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
60            inheritsFrom(child, IC_64BIT_REXW_XD) ||
61            inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
62   case IC_64BIT_OPSIZE:
63     return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
64   case IC_64BIT_XD:
65     return(inheritsFrom(child, IC_64BIT_REXW_XD));
66   case IC_64BIT_XS:
67     return(inheritsFrom(child, IC_64BIT_REXW_XS));
68   case IC_64BIT_XD_OPSIZE:
69     return false;
70   case IC_64BIT_REXW_XD:
71     return false;
72   case IC_64BIT_REXW_XS:
73     return false;
74   case IC_64BIT_REXW_OPSIZE:
75     return false;
76   case IC_VEX:
77     return inheritsFrom(child, IC_VEX_W);
78   case IC_VEX_XS:
79     return inheritsFrom(child, IC_VEX_W_XS);
80   case IC_VEX_XD:
81     return inheritsFrom(child, IC_VEX_W_XD);
82   case IC_VEX_OPSIZE:
83     return inheritsFrom(child, IC_VEX_W_OPSIZE);
84   case IC_VEX_W:
85     return false;
86   case IC_VEX_W_XS:
87     return false;
88   case IC_VEX_W_XD:
89     return false;
90   case IC_VEX_W_OPSIZE:
91     return false;
92   case IC_VEX_L:
93     return false;
94   case IC_VEX_L_XS:
95     return false;
96   case IC_VEX_L_XD:
97     return false;
98   case IC_VEX_L_OPSIZE:
99     return false;
100   default:
101     llvm_unreachable("Unknown instruction class");
102     return false;
103   }
104 }
105 
106 /// outranks - Indicates whether, if an instruction has two different applicable
107 ///   classes, which class should be preferred when performing decode.  This
108 ///   imposes a total ordering (ties are resolved toward "lower")
109 ///
110 /// @param upper  - The class that may be preferable
111 /// @param lower  - The class that may be less preferable
112 /// @return       - True if upper is to be preferred, false otherwise.
113 static inline bool outranks(InstructionContext upper,
114                             InstructionContext lower) {
115   assert(upper < IC_max);
116   assert(lower < IC_max);
117 
118 #define ENUM_ENTRY(n, r, d) r,
119   static int ranks[IC_max] = {
120     INSTRUCTION_CONTEXTS
121   };
122 #undef ENUM_ENTRY
123 
124   return (ranks[upper] > ranks[lower]);
125 }
126 
127 /// stringForContext - Returns a string containing the name of a particular
128 ///   InstructionContext, usually for diagnostic purposes.
129 ///
130 /// @param insnContext  - The instruction class to transform to a string.
131 /// @return           - A statically-allocated string constant that contains the
132 ///                     name of the instruction class.
133 static inline const char* stringForContext(InstructionContext insnContext) {
134   switch (insnContext) {
135   default:
136     llvm_unreachable("Unhandled instruction class");
137 #define ENUM_ENTRY(n, r, d)   case n: return #n; break;
138   INSTRUCTION_CONTEXTS
139 #undef ENUM_ENTRY
140   }
141 
142   return 0;
143 }
144 
145 /// stringForOperandType - Like stringForContext, but for OperandTypes.
146 static inline const char* stringForOperandType(OperandType type) {
147   switch (type) {
148   default:
149     llvm_unreachable("Unhandled type");
150 #define ENUM_ENTRY(i, d) case i: return #i;
151   TYPES
152 #undef ENUM_ENTRY
153   }
154 }
155 
156 /// stringForOperandEncoding - like stringForContext, but for
157 ///   OperandEncodings.
158 static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
159   switch (encoding) {
160   default:
161     llvm_unreachable("Unhandled encoding");
162 #define ENUM_ENTRY(i, d) case i: return #i;
163   ENCODINGS
164 #undef ENUM_ENTRY
165   }
166 }
167 
168 void DisassemblerTables::emitOneID(raw_ostream &o,
169                                    uint32_t &i,
170                                    InstrUID id,
171                                    bool addComma) const {
172   if (id)
173     o.indent(i * 2) << format("0x%hx", id);
174   else
175     o.indent(i * 2) << 0;
176 
177   if (addComma)
178     o << ", ";
179   else
180     o << "  ";
181 
182   o << "/* ";
183   o << InstructionSpecifiers[id].name;
184   o << "*/";
185 
186   o << "\n";
187 }
188 
189 /// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by
190 ///   all ModR/M decisions for instructions that are invalid for all possible
191 ///   ModR/M byte values.
192 ///
193 /// @param o        - The output stream on which to emit the table.
194 /// @param i        - The indentation level for that output stream.
195 static void emitEmptyTable(raw_ostream &o, uint32_t &i)
196 {
197   o.indent(i * 2) << "static const InstrUID modRMEmptyTable[1] = { 0 };\n";
198   o << "\n";
199 }
200 
201 /// getDecisionType - Determines whether a ModRM decision with 255 entries can
202 ///   be compacted by eliminating redundant information.
203 ///
204 /// @param decision - The decision to be compacted.
205 /// @return         - The compactest available representation for the decision.
206 static ModRMDecisionType getDecisionType(ModRMDecision &decision)
207 {
208   bool satisfiesOneEntry = true;
209   bool satisfiesSplitRM = true;
210 
211   uint16_t index;
212 
213   for (index = 0; index < 256; ++index) {
214     if (decision.instructionIDs[index] != decision.instructionIDs[0])
215       satisfiesOneEntry = false;
216 
217     if (((index & 0xc0) == 0xc0) &&
218        (decision.instructionIDs[index] != decision.instructionIDs[0xc0]))
219       satisfiesSplitRM = false;
220 
221     if (((index & 0xc0) != 0xc0) &&
222        (decision.instructionIDs[index] != decision.instructionIDs[0x00]))
223       satisfiesSplitRM = false;
224   }
225 
226   if (satisfiesOneEntry)
227     return MODRM_ONEENTRY;
228 
229   if (satisfiesSplitRM)
230     return MODRM_SPLITRM;
231 
232   return MODRM_FULL;
233 }
234 
235 /// stringForDecisionType - Returns a statically-allocated string corresponding
236 ///   to a particular decision type.
237 ///
238 /// @param dt - The decision type.
239 /// @return   - A pointer to the statically-allocated string (e.g.,
240 ///             "MODRM_ONEENTRY" for MODRM_ONEENTRY).
241 static const char* stringForDecisionType(ModRMDecisionType dt)
242 {
243 #define ENUM_ENTRY(n) case n: return #n;
244   switch (dt) {
245     default:
246       llvm_unreachable("Unknown decision type");
247     MODRMTYPES
248   };
249 #undef ENUM_ENTRY
250 }
251 
252 /// stringForModifierType - Returns a statically-allocated string corresponding
253 ///   to an opcode modifier type.
254 ///
255 /// @param mt - The modifier type.
256 /// @return   - A pointer to the statically-allocated string (e.g.,
257 ///             "MODIFIER_NONE" for MODIFIER_NONE).
258 static const char* stringForModifierType(ModifierType mt)
259 {
260 #define ENUM_ENTRY(n) case n: return #n;
261   switch(mt) {
262     default:
263       llvm_unreachable("Unknown modifier type");
264     MODIFIER_TYPES
265   };
266 #undef ENUM_ENTRY
267 }
268 
269 DisassemblerTables::DisassemblerTables() {
270   unsigned i;
271 
272   for (i = 0; i < array_lengthof(Tables); i++) {
273     Tables[i] = new ContextDecision;
274     memset(Tables[i], 0, sizeof(ContextDecision));
275   }
276 
277   HasConflicts = false;
278 }
279 
280 DisassemblerTables::~DisassemblerTables() {
281   unsigned i;
282 
283   for (i = 0; i < array_lengthof(Tables); i++)
284     delete Tables[i];
285 }
286 
287 void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
288                                            raw_ostream &o2,
289                                            uint32_t &i1,
290                                            uint32_t &i2,
291                                            ModRMDecision &decision)
292   const {
293   static uint64_t sTableNumber = 0;
294   uint64_t thisTableNumber = sTableNumber;
295   ModRMDecisionType dt = getDecisionType(decision);
296   uint16_t index;
297 
298   if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
299   {
300     o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
301     i2++;
302 
303     o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
304     o2.indent(i2) << "modRMEmptyTable";
305 
306     i2--;
307     o2.indent(i2) << "}";
308     return;
309   }
310 
311   o1.indent(i1) << "static const InstrUID modRMTable" << thisTableNumber;
312 
313   switch (dt) {
314     default:
315       llvm_unreachable("Unknown decision type");
316     case MODRM_ONEENTRY:
317       o1 << "[1]";
318       break;
319     case MODRM_SPLITRM:
320       o1 << "[2]";
321       break;
322     case MODRM_FULL:
323       o1 << "[256]";
324       break;
325   }
326 
327   o1 << " = {" << "\n";
328   i1++;
329 
330   switch (dt) {
331     default:
332       llvm_unreachable("Unknown decision type");
333     case MODRM_ONEENTRY:
334       emitOneID(o1, i1, decision.instructionIDs[0], false);
335       break;
336     case MODRM_SPLITRM:
337       emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00
338       emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11
339       break;
340     case MODRM_FULL:
341       for (index = 0; index < 256; ++index)
342         emitOneID(o1, i1, decision.instructionIDs[index], index < 255);
343       break;
344   }
345 
346   i1--;
347   o1.indent(i1) << "};" << "\n";
348   o1 << "\n";
349 
350   o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
351   i2++;
352 
353   o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
354   o2.indent(i2) << "modRMTable" << sTableNumber << "\n";
355 
356   i2--;
357   o2.indent(i2) << "}";
358 
359   ++sTableNumber;
360 }
361 
362 void DisassemblerTables::emitOpcodeDecision(
363   raw_ostream &o1,
364   raw_ostream &o2,
365   uint32_t &i1,
366   uint32_t &i2,
367   OpcodeDecision &decision) const {
368   uint16_t index;
369 
370   o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
371   i2++;
372   o2.indent(i2) << "{" << "\n";
373   i2++;
374 
375   for (index = 0; index < 256; ++index) {
376     o2.indent(i2);
377 
378     o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
379 
380     emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]);
381 
382     if (index <  255)
383       o2 << ",";
384 
385     o2 << "\n";
386   }
387 
388   i2--;
389   o2.indent(i2) << "}" << "\n";
390   i2--;
391   o2.indent(i2) << "}" << "\n";
392 }
393 
394 void DisassemblerTables::emitContextDecision(
395   raw_ostream &o1,
396   raw_ostream &o2,
397   uint32_t &i1,
398   uint32_t &i2,
399   ContextDecision &decision,
400   const char* name) const {
401   o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n";
402   i2++;
403   o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
404   i2++;
405 
406   unsigned index;
407 
408   for (index = 0; index < IC_max; ++index) {
409     o2.indent(i2) << "/* ";
410     o2 << stringForContext((InstructionContext)index);
411     o2 << " */";
412     o2 << "\n";
413 
414     emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]);
415 
416     if (index + 1 < IC_max)
417       o2 << ", ";
418   }
419 
420   i2--;
421   o2.indent(i2) << "}" << "\n";
422   i2--;
423   o2.indent(i2) << "};" << "\n";
424 }
425 
426 void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
427   const {
428   o.indent(i * 2) << "static const struct InstructionSpecifier ";
429   o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n";
430 
431   i++;
432 
433   uint16_t numInstructions = InstructionSpecifiers.size();
434   uint16_t index, operandIndex;
435 
436   for (index = 0; index < numInstructions; ++index) {
437     o.indent(i * 2) << "{ /* " << index << " */" << "\n";
438     i++;
439 
440     o.indent(i * 2) <<
441       stringForModifierType(InstructionSpecifiers[index].modifierType);
442     o << "," << "\n";
443 
444     o.indent(i * 2) << "0x";
445     o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
446     o << "," << "\n";
447 
448     o.indent(i * 2) << "{" << "\n";
449     i++;
450 
451     for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
452       o.indent(i * 2) << "{ ";
453       o << stringForOperandEncoding(InstructionSpecifiers[index]
454                                     .operands[operandIndex]
455                                     .encoding);
456       o << ", ";
457       o << stringForOperandType(InstructionSpecifiers[index]
458                                 .operands[operandIndex]
459                                 .type);
460       o << " }";
461 
462       if (operandIndex < X86_MAX_OPERANDS - 1)
463         o << ",";
464 
465       o << "\n";
466     }
467 
468     i--;
469     o.indent(i * 2) << "}," << "\n";
470 
471     o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\"";
472     o << "\n";
473 
474     i--;
475     o.indent(i * 2) << "}";
476 
477     if (index + 1 < numInstructions)
478       o << ",";
479 
480     o << "\n";
481   }
482 
483   i--;
484   o.indent(i * 2) << "};" << "\n";
485 }
486 
487 void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
488   uint16_t index;
489 
490   o.indent(i * 2) << "static const InstructionContext " CONTEXTS_STR
491                      "[256] = {\n";
492   i++;
493 
494   for (index = 0; index < 256; ++index) {
495     o.indent(i * 2);
496 
497     if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
498       o << "IC_VEX_L_OPSIZE";
499     else if ((index & ATTR_VEXL) && (index & ATTR_XD))
500       o << "IC_VEX_L_XD";
501     else if ((index & ATTR_VEXL) && (index & ATTR_XS))
502       o << "IC_VEX_L_XS";
503     else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
504       o << "IC_VEX_W_OPSIZE";
505     else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD))
506       o << "IC_VEX_W_XD";
507     else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS))
508       o << "IC_VEX_W_XS";
509     else if (index & ATTR_VEXL)
510       o << "IC_VEX_L";
511     else if ((index & ATTR_VEX) && (index & ATTR_REXW))
512       o << "IC_VEX_W";
513     else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE))
514       o << "IC_VEX_OPSIZE";
515     else if ((index & ATTR_VEX) && (index & ATTR_XD))
516       o << "IC_VEX_XD";
517     else if ((index & ATTR_VEX) && (index & ATTR_XS))
518       o << "IC_VEX_XS";
519     else if (index & ATTR_VEX)
520       o << "IC_VEX";
521     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
522       o << "IC_64BIT_REXW_XS";
523     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
524       o << "IC_64BIT_REXW_XD";
525     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) &&
526              (index & ATTR_OPSIZE))
527       o << "IC_64BIT_REXW_OPSIZE";
528     else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_OPSIZE))
529       o << "IC_64BIT_XD_OPSIZE";
530     else if ((index & ATTR_64BIT) && (index & ATTR_XS))
531       o << "IC_64BIT_XS";
532     else if ((index & ATTR_64BIT) && (index & ATTR_XD))
533       o << "IC_64BIT_XD";
534     else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE))
535       o << "IC_64BIT_OPSIZE";
536     else if ((index & ATTR_64BIT) && (index & ATTR_REXW))
537       o << "IC_64BIT_REXW";
538     else if ((index & ATTR_64BIT))
539       o << "IC_64BIT";
540     else if ((index & ATTR_XD) && (index & ATTR_OPSIZE))
541       o << "IC_XD_OPSIZE";
542     else if (index & ATTR_XS)
543       o << "IC_XS";
544     else if (index & ATTR_XD)
545       o << "IC_XD";
546     else if (index & ATTR_OPSIZE)
547       o << "IC_OPSIZE";
548     else
549       o << "IC";
550 
551     if (index < 255)
552       o << ",";
553     else
554       o << " ";
555 
556     o << " /* " << index << " */";
557 
558     o << "\n";
559   }
560 
561   i--;
562   o.indent(i * 2) << "};" << "\n";
563 }
564 
565 void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
566                                             raw_ostream &o2,
567                                             uint32_t &i1,
568                                             uint32_t &i2)
569   const {
570   emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR);
571   emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
572   emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
573   emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR);
574   emitContextDecision(o1, o2, i1, i2, *Tables[4], THREEBYTEA6_STR);
575   emitContextDecision(o1, o2, i1, i2, *Tables[5], THREEBYTEA7_STR);
576 }
577 
578 void DisassemblerTables::emit(raw_ostream &o) const {
579   uint32_t i1 = 0;
580   uint32_t i2 = 0;
581 
582   std::string s1;
583   std::string s2;
584 
585   raw_string_ostream o1(s1);
586   raw_string_ostream o2(s2);
587 
588   emitInstructionInfo(o, i2);
589   o << "\n";
590 
591   emitContextTable(o, i2);
592   o << "\n";
593 
594   emitEmptyTable(o1, i1);
595   emitContextDecisions(o1, o2, i1, i2);
596 
597   o << o1.str();
598   o << "\n";
599   o << o2.str();
600   o << "\n";
601   o << "\n";
602 }
603 
604 void DisassemblerTables::setTableFields(ModRMDecision     &decision,
605                                         const ModRMFilter &filter,
606                                         InstrUID          uid,
607                                         uint8_t           opcode) {
608   unsigned index;
609 
610   for (index = 0; index < 256; ++index) {
611     if (filter.accepts(index)) {
612       if (decision.instructionIDs[index] == uid)
613         continue;
614 
615       if (decision.instructionIDs[index] != 0) {
616         InstructionSpecifier &newInfo =
617           InstructionSpecifiers[uid];
618         InstructionSpecifier &previousInfo =
619           InstructionSpecifiers[decision.instructionIDs[index]];
620 
621         if(newInfo.filtered)
622           continue; // filtered instructions get lowest priority
623 
624         if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" ||
625                                            newInfo.name == "XCHG32ar" ||
626                                            newInfo.name == "XCHG64ar"))
627           continue; // special case for XCHG*ar and NOOP
628 
629         if (outranks(previousInfo.insnContext, newInfo.insnContext))
630           continue;
631 
632         if (previousInfo.insnContext == newInfo.insnContext &&
633             !previousInfo.filtered) {
634           errs() << "Error: Primary decode conflict: ";
635           errs() << newInfo.name << " would overwrite " << previousInfo.name;
636           errs() << "\n";
637           errs() << "ModRM   " << index << "\n";
638           errs() << "Opcode  " << (uint16_t)opcode << "\n";
639           errs() << "Context " << stringForContext(newInfo.insnContext) << "\n";
640           HasConflicts = true;
641         }
642       }
643 
644       decision.instructionIDs[index] = uid;
645     }
646   }
647 }
648 
649 void DisassemblerTables::setTableFields(OpcodeType          type,
650                                         InstructionContext  insnContext,
651                                         uint8_t             opcode,
652                                         const ModRMFilter   &filter,
653                                         InstrUID            uid,
654                                         bool                is32bit) {
655   unsigned index;
656 
657   ContextDecision &decision = *Tables[type];
658 
659   for (index = 0; index < IC_max; ++index) {
660     if (is32bit && inheritsFrom((InstructionContext)index, IC_64BIT))
661       continue;
662 
663     if (inheritsFrom((InstructionContext)index,
664                      InstructionSpecifiers[uid].insnContext))
665       setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
666                      filter,
667                      uid,
668                      opcode);
669   }
670 }
671