1ac66b4a1SChris Lattner //===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===//
22951955dSChris Lattner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62951955dSChris Lattner //
72951955dSChris Lattner //===----------------------------------------------------------------------===//
82951955dSChris Lattner //
92951955dSChris Lattner // This file computes an optimal sequence of instructions for doing all shuffles
102951955dSChris Lattner // of two 4-element vectors.  With a release build and when configured to emit
112951955dSChris Lattner // an altivec instruction table, this takes about 30s to run on a 2.7Ghz
122951955dSChris Lattner // PowerPC G5.
132951955dSChris Lattner //
142951955dSChris Lattner //===----------------------------------------------------------------------===//
152951955dSChris Lattner 
163db20563SChris Lattner #include <cassert>
17579f0713SAnton Korobeynikov #include <cstdlib>
1891d19d8eSChandler Carruth #include <iomanip>
1991d19d8eSChandler Carruth #include <iostream>
2091d19d8eSChandler Carruth #include <vector>
2173dc9964SDavid Green 
2273dc9964SDavid Green #define GENERATE_NEON
2373dc9964SDavid Green #define GENERATE_NEON_INS
2473dc9964SDavid Green 
252951955dSChris Lattner struct Operator;
262951955dSChris Lattner 
272951955dSChris Lattner // Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
282951955dSChris Lattner // an element from that value of the input vectors.  A value of 8 means the
292951955dSChris Lattner // entry is undefined.
302951955dSChris Lattner 
312951955dSChris Lattner // Mask manipulation functions.
MakeMask(unsigned V0,unsigned V1,unsigned V2,unsigned V3)322951955dSChris Lattner static inline unsigned short MakeMask(unsigned V0, unsigned V1,
332951955dSChris Lattner                                       unsigned V2, unsigned V3) {
342951955dSChris Lattner   return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
352951955dSChris Lattner }
362951955dSChris Lattner 
372951955dSChris Lattner /// getMaskElt - Return element N of the specified mask.
getMaskElt(unsigned Mask,unsigned Elt)382951955dSChris Lattner static unsigned getMaskElt(unsigned Mask, unsigned Elt) {
392951955dSChris Lattner   return (Mask >> ((3-Elt)*4)) & 0xF;
402951955dSChris Lattner }
412951955dSChris Lattner 
setMaskElt(unsigned Mask,unsigned Elt,unsigned NewVal)422951955dSChris Lattner static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
432951955dSChris Lattner   unsigned FieldShift = ((3-Elt)*4);
442951955dSChris Lattner   return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
452951955dSChris Lattner }
462951955dSChris Lattner 
472951955dSChris Lattner // Reject elements where the values are 9-15.
isValidMask(unsigned short Mask)482951955dSChris Lattner static bool isValidMask(unsigned short Mask) {
492951955dSChris Lattner   unsigned short UndefBits = Mask & 0x8888;
502951955dSChris Lattner   return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
512951955dSChris Lattner }
522951955dSChris Lattner 
532951955dSChris Lattner /// hasUndefElements - Return true if any of the elements in the mask are undefs
542951955dSChris Lattner ///
hasUndefElements(unsigned short Mask)552951955dSChris Lattner static bool hasUndefElements(unsigned short Mask) {
562951955dSChris Lattner   return (Mask & 0x8888) != 0;
572951955dSChris Lattner }
582951955dSChris Lattner 
592951955dSChris Lattner /// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
602951955dSChris Lattner /// including undef values..
isOnlyLHSMask(unsigned short Mask)612951955dSChris Lattner static bool isOnlyLHSMask(unsigned short Mask) {
622951955dSChris Lattner   return (Mask & 0x4444) == 0;
632951955dSChris Lattner }
642951955dSChris Lattner 
652951955dSChris Lattner /// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
662951955dSChris Lattner /// refer to the LHS only (for when one argument value is passed into the same
672951955dSChris Lattner /// function twice).
6816e05c37SChris Lattner #if 0
692951955dSChris Lattner static unsigned short getLHSOnlyMask(unsigned short Mask) {
702951955dSChris Lattner   return Mask & 0xBBBB;  // Keep only LHS and Undefs.
712951955dSChris Lattner }
7216e05c37SChris Lattner #endif
732951955dSChris Lattner 
742951955dSChris Lattner /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
752951955dSChris Lattner /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
getCompressedMask(unsigned short Mask)762951955dSChris Lattner static unsigned getCompressedMask(unsigned short Mask) {
772951955dSChris Lattner   return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
782951955dSChris Lattner          getMaskElt(Mask, 2)*9     + getMaskElt(Mask, 3);
792951955dSChris Lattner }
802951955dSChris Lattner 
PrintMask(unsigned i,std::ostream & OS)812951955dSChris Lattner static void PrintMask(unsigned i, std::ostream &OS) {
822951955dSChris Lattner   OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0)))
832951955dSChris Lattner      << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1)))
842951955dSChris Lattner      << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2)))
852951955dSChris Lattner      << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3)))
862951955dSChris Lattner      << ">";
872951955dSChris Lattner }
882951955dSChris Lattner 
892951955dSChris Lattner /// ShuffleVal - This represents a shufflevector operation.
902951955dSChris Lattner struct ShuffleVal {
912951955dSChris Lattner   Operator *Op;   // The Operation used to generate this value.
9246642ffeSBen Craig   unsigned Cost;  // Number of instrs used to generate this value.
932951955dSChris Lattner   unsigned short Arg0, Arg1;  // Input operands for this value.
942951955dSChris Lattner 
ShuffleValShuffleVal952951955dSChris Lattner   ShuffleVal() : Cost(1000000) {}
962951955dSChris Lattner };
972951955dSChris Lattner 
982951955dSChris Lattner 
992951955dSChris Lattner /// ShufTab - This is the actual shuffle table that we are trying to generate.
1002951955dSChris Lattner ///
1012951955dSChris Lattner static ShuffleVal ShufTab[65536];
1022951955dSChris Lattner 
1032951955dSChris Lattner /// TheOperators - All of the operators that this target supports.
1042951955dSChris Lattner static std::vector<Operator*> TheOperators;
1052951955dSChris Lattner 
1062951955dSChris Lattner /// Operator - This is a vector operation that is available for use.
1072951955dSChris Lattner struct Operator {
10846642ffeSBen Craig   const char *Name;
1092951955dSChris Lattner   unsigned short ShuffleMask;
1102951955dSChris Lattner   unsigned short OpNum;
11184e24388SAnton Korobeynikov   unsigned Cost;
1122951955dSChris Lattner 
OperatorOperator11384e24388SAnton Korobeynikov   Operator(unsigned short shufflemask, const char *name, unsigned opnum,
11484e24388SAnton Korobeynikov            unsigned cost = 1)
11546642ffeSBen Craig     :  Name(name), ShuffleMask(shufflemask), OpNum(opnum),Cost(cost) {
1162951955dSChris Lattner     TheOperators.push_back(this);
1172951955dSChris Lattner   }
~OperatorOperator1182951955dSChris Lattner   ~Operator() {
1192951955dSChris Lattner     assert(TheOperators.back() == this);
1202951955dSChris Lattner     TheOperators.pop_back();
1212951955dSChris Lattner   }
1222951955dSChris Lattner 
isOnlyLHSOperatorOperator1232951955dSChris Lattner   bool isOnlyLHSOperator() const {
1242951955dSChris Lattner     return isOnlyLHSMask(ShuffleMask);
1252951955dSChris Lattner   }
1262951955dSChris Lattner 
getNameOperator1272951955dSChris Lattner   const char *getName() const { return Name; }
getCostOperator12884e24388SAnton Korobeynikov   unsigned getCost() const { return Cost; }
1292951955dSChris Lattner 
getTransformedMaskOperator1302951955dSChris Lattner   unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
1312951955dSChris Lattner     // Extract the elements from LHSMask and RHSMask, as appropriate.
1322951955dSChris Lattner     unsigned Result = 0;
1332951955dSChris Lattner     for (unsigned i = 0; i != 4; ++i) {
1342951955dSChris Lattner       unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
1352951955dSChris Lattner       unsigned ResElt;
1362951955dSChris Lattner       if (SrcElt < 4)
1372951955dSChris Lattner         ResElt = getMaskElt(LHSMask, SrcElt);
1382951955dSChris Lattner       else if (SrcElt < 8)
1392951955dSChris Lattner         ResElt = getMaskElt(RHSMask, SrcElt-4);
1402951955dSChris Lattner       else {
1412951955dSChris Lattner         assert(SrcElt == 8 && "Bad src elt!");
1422951955dSChris Lattner         ResElt = 8;
1432951955dSChris Lattner       }
1442951955dSChris Lattner       Result |= ResElt << (4*i);
1452951955dSChris Lattner     }
1462951955dSChris Lattner     return Result;
1472951955dSChris Lattner   }
1482951955dSChris Lattner };
1492951955dSChris Lattner 
15073dc9964SDavid Green #ifdef GENERATE_NEON_INS
15173dc9964SDavid Green // Special case "insert" op identifier used below
15273dc9964SDavid Green static Operator InsOp(0, "ins", 15, 1);
15373dc9964SDavid Green #endif
15473dc9964SDavid Green 
getZeroCostOpName(unsigned short Op)1552951955dSChris Lattner static const char *getZeroCostOpName(unsigned short Op) {
1562951955dSChris Lattner   if (ShufTab[Op].Arg0 == 0x0123)
1572951955dSChris Lattner     return "LHS";
1582951955dSChris Lattner   else if (ShufTab[Op].Arg0 == 0x4567)
1592951955dSChris Lattner     return "RHS";
1602951955dSChris Lattner   else {
1612951955dSChris Lattner     assert(0 && "bad zero cost operation");
1622951955dSChris Lattner     abort();
1632951955dSChris Lattner   }
1642951955dSChris Lattner }
1652951955dSChris Lattner 
PrintOperation(unsigned ValNo,unsigned short Vals[])1662951955dSChris Lattner static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
1672951955dSChris Lattner   unsigned short ThisOp = Vals[ValNo];
1682951955dSChris Lattner   std::cerr << "t" << ValNo;
1692951955dSChris Lattner   PrintMask(ThisOp, std::cerr);
1702951955dSChris Lattner   std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
1712951955dSChris Lattner 
1722951955dSChris Lattner   if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
1732951955dSChris Lattner     std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
1742951955dSChris Lattner     PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
1752951955dSChris Lattner   } else {
1762951955dSChris Lattner     // Figure out what tmp # it is.
1772951955dSChris Lattner     for (unsigned i = 0; ; ++i)
1782951955dSChris Lattner       if (Vals[i] == ShufTab[ThisOp].Arg0) {
1792951955dSChris Lattner         std::cerr << "t" << i;
1802951955dSChris Lattner         break;
1812951955dSChris Lattner       }
1822951955dSChris Lattner   }
1832951955dSChris Lattner 
18473dc9964SDavid Green #ifdef GENERATE_NEON_INS
18573dc9964SDavid Green   if (ShufTab[ThisOp].Op == &InsOp) {
18673dc9964SDavid Green     std::cerr << ", lane " << ShufTab[ThisOp].Arg1;
18773dc9964SDavid Green   } else
18873dc9964SDavid Green #endif
1892951955dSChris Lattner   if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
1902951955dSChris Lattner     std::cerr << ", ";
1912951955dSChris Lattner     if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
1922951955dSChris Lattner       std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1);
1932951955dSChris Lattner       PrintMask(ShufTab[ThisOp].Arg1, std::cerr);
1942951955dSChris Lattner     } else {
1952951955dSChris Lattner       // Figure out what tmp # it is.
1962951955dSChris Lattner       for (unsigned i = 0; ; ++i)
1972951955dSChris Lattner         if (Vals[i] == ShufTab[ThisOp].Arg1) {
1982951955dSChris Lattner           std::cerr << "t" << i;
1992951955dSChris Lattner           break;
2002951955dSChris Lattner         }
2012951955dSChris Lattner     }
2022951955dSChris Lattner   }
2032951955dSChris Lattner   std::cerr << ")  ";
2042951955dSChris Lattner }
2052951955dSChris Lattner 
getNumEntered()2062951955dSChris Lattner static unsigned getNumEntered() {
2072951955dSChris Lattner   unsigned Count = 0;
2082951955dSChris Lattner   for (unsigned i = 0; i != 65536; ++i)
2092951955dSChris Lattner     Count += ShufTab[i].Cost < 100;
2102951955dSChris Lattner   return Count;
2112951955dSChris Lattner }
2122951955dSChris Lattner 
EvaluateOps(unsigned short Elt,unsigned short Vals[],unsigned & NumVals)2132951955dSChris Lattner static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
2142951955dSChris Lattner                         unsigned &NumVals) {
2152951955dSChris Lattner   if (ShufTab[Elt].Cost == 0) return;
21673dc9964SDavid Green #ifdef GENERATE_NEON_INS
21773dc9964SDavid Green   if (ShufTab[Elt].Op == &InsOp) {
21873dc9964SDavid Green     EvaluateOps(ShufTab[Elt].Arg0, Vals, NumVals);
21973dc9964SDavid Green     Vals[NumVals++] = Elt;
22073dc9964SDavid Green     return;
22173dc9964SDavid Green   }
22273dc9964SDavid Green #endif
2232951955dSChris Lattner 
2242951955dSChris Lattner   // If this value has already been evaluated, it is free.  FIXME: match undefs.
2252951955dSChris Lattner   for (unsigned i = 0, e = NumVals; i != e; ++i)
2262951955dSChris Lattner     if (Vals[i] == Elt) return;
2272951955dSChris Lattner 
2282951955dSChris Lattner   // Otherwise, get the operands of the value, then add it.
2292951955dSChris Lattner   unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
2302951955dSChris Lattner   if (ShufTab[Arg0].Cost)
2312951955dSChris Lattner     EvaluateOps(Arg0, Vals, NumVals);
2322951955dSChris Lattner   if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
2332951955dSChris Lattner     EvaluateOps(Arg1, Vals, NumVals);
2342951955dSChris Lattner 
2352951955dSChris Lattner   Vals[NumVals++] = Elt;
2362951955dSChris Lattner }
2372951955dSChris Lattner 
2382951955dSChris Lattner 
main()2392951955dSChris Lattner int main() {
2402951955dSChris Lattner   // Seed the table with accesses to the LHS and RHS.
2412951955dSChris Lattner   ShufTab[0x0123].Cost = 0;
24266f09ad0SCraig Topper   ShufTab[0x0123].Op = nullptr;
2432951955dSChris Lattner   ShufTab[0x0123].Arg0 = 0x0123;
2442951955dSChris Lattner   ShufTab[0x4567].Cost = 0;
24566f09ad0SCraig Topper   ShufTab[0x4567].Op = nullptr;
2462951955dSChris Lattner   ShufTab[0x4567].Arg0 = 0x4567;
2472951955dSChris Lattner 
2482951955dSChris Lattner   // Seed the first-level of shuffles, shuffles whose inputs are the input to
2492951955dSChris Lattner   // the vectorshuffle operation.
2502951955dSChris Lattner   bool MadeChange = true;
2512951955dSChris Lattner   unsigned OpCount = 0;
2522951955dSChris Lattner   while (MadeChange) {
2532951955dSChris Lattner     MadeChange = false;
2542951955dSChris Lattner     ++OpCount;
2552951955dSChris Lattner     std::cerr << "Starting iteration #" << OpCount << " with "
2562951955dSChris Lattner               << getNumEntered() << " entries established.\n";
2572951955dSChris Lattner 
2582951955dSChris Lattner     // Scan the table for two reasons: First, compute the maximum cost of any
2592951955dSChris Lattner     // operation left in the table.  Second, make sure that values with undefs
2602951955dSChris Lattner     // have the cheapest alternative that they match.
2612951955dSChris Lattner     unsigned MaxCost = ShufTab[0].Cost;
2622951955dSChris Lattner     for (unsigned i = 1; i != 0x8889; ++i) {
2632951955dSChris Lattner       if (!isValidMask(i)) continue;
2642951955dSChris Lattner       if (ShufTab[i].Cost > MaxCost)
2652951955dSChris Lattner         MaxCost = ShufTab[i].Cost;
2662951955dSChris Lattner 
2672951955dSChris Lattner       // If this value has an undef, make it be computed the cheapest possible
2682951955dSChris Lattner       // way of any of the things that it matches.
2692951955dSChris Lattner       if (hasUndefElements(i)) {
2702951955dSChris Lattner         // This code is a little bit tricky, so here's the idea: consider some
2712951955dSChris Lattner         // permutation, like 7u4u.  To compute the lowest cost for 7u4u, we
2722951955dSChris Lattner         // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries.  If
2732951955dSChris Lattner         // there are 3 undefs, the number rises to 729 entries we have to scan,
2742951955dSChris Lattner         // and for the 4 undef case, we have to scan the whole table.
2752951955dSChris Lattner         //
2762951955dSChris Lattner         // Instead of doing this huge amount of scanning, we process the table
2772951955dSChris Lattner         // entries *in order*, and use the fact that 'u' is 8, larger than any
2782951955dSChris Lattner         // valid index.  Given an entry like 7u4u then, we only need to scan
2792951955dSChris Lattner         // 7[0-7]4u - 8 entries.  We can get away with this, because we already
2802951955dSChris Lattner         // know that each of 704u, 714u, 724u, etc contain the minimum value of
2812951955dSChris Lattner         // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
2822951955dSChris Lattner         unsigned UndefIdx;
2832951955dSChris Lattner         if (i & 0x8000)
2842951955dSChris Lattner           UndefIdx = 0;
2852951955dSChris Lattner         else if (i & 0x0800)
2862951955dSChris Lattner           UndefIdx = 1;
2872951955dSChris Lattner         else if (i & 0x0080)
2882951955dSChris Lattner           UndefIdx = 2;
2892951955dSChris Lattner         else if (i & 0x0008)
2902951955dSChris Lattner           UndefIdx = 3;
2912951955dSChris Lattner         else
2922951955dSChris Lattner           abort();
2932951955dSChris Lattner 
2942951955dSChris Lattner         unsigned MinVal  = i;
2952951955dSChris Lattner         unsigned MinCost = ShufTab[i].Cost;
2962951955dSChris Lattner 
2972951955dSChris Lattner         // Scan the 8 entries.
2982951955dSChris Lattner         for (unsigned j = 0; j != 8; ++j) {
2992951955dSChris Lattner           unsigned NewElt = setMaskElt(i, UndefIdx, j);
3002951955dSChris Lattner           if (ShufTab[NewElt].Cost < MinCost) {
3012951955dSChris Lattner             MinCost = ShufTab[NewElt].Cost;
3022951955dSChris Lattner             MinVal = NewElt;
3032951955dSChris Lattner           }
3042951955dSChris Lattner         }
3052951955dSChris Lattner 
3062951955dSChris Lattner         // If we found something cheaper than what was here before, use it.
3072951955dSChris Lattner         if (i != MinVal) {
3082951955dSChris Lattner           MadeChange = true;
3092951955dSChris Lattner           ShufTab[i] = ShufTab[MinVal];
3102951955dSChris Lattner         }
3112951955dSChris Lattner       }
31273dc9964SDavid Green #ifdef GENERATE_NEON_INS
31373dc9964SDavid Green       else {
31473dc9964SDavid Green         // Similarly, if we take the mask (eg 3,6,1,0) and take the cost with
31573dc9964SDavid Green         // undef for each lane (eg u,6,1,0 or 3,u,1,0 etc), we can use a single
31673dc9964SDavid Green         // lane insert to fixup the result.
31773dc9964SDavid Green         for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx++) {
31873dc9964SDavid Green           if (getMaskElt(i, LaneIdx) == 8)
31973dc9964SDavid Green             continue;
32073dc9964SDavid Green           unsigned NewElt = setMaskElt(i, LaneIdx, 8);
32173dc9964SDavid Green           if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) {
32273dc9964SDavid Green             MadeChange = true;
32373dc9964SDavid Green             ShufTab[i].Cost = ShufTab[NewElt].Cost + 1;
32473dc9964SDavid Green             ShufTab[i].Op = &InsOp;
32573dc9964SDavid Green             ShufTab[i].Arg0 = NewElt;
32673dc9964SDavid Green             ShufTab[i].Arg1 = LaneIdx;
32773dc9964SDavid Green           }
32873dc9964SDavid Green         }
329*4c6a070aSDavid Green 
330*4c6a070aSDavid Green         // Similar idea for using a D register mov, masking out 2 lanes to undef
331*4c6a070aSDavid Green         for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx += 2) {
332*4c6a070aSDavid Green           unsigned Ln0 = getMaskElt(i, LaneIdx);
333*4c6a070aSDavid Green           unsigned Ln1 = getMaskElt(i, LaneIdx + 1);
334*4c6a070aSDavid Green           if ((Ln0 == 0 && Ln1 == 1) || (Ln0 == 2 && Ln1 == 3) ||
335*4c6a070aSDavid Green               (Ln0 == 4 && Ln1 == 5) || (Ln0 == 6 && Ln1 == 7)) {
336*4c6a070aSDavid Green             unsigned NewElt = setMaskElt(i, LaneIdx, 8);
337*4c6a070aSDavid Green             NewElt = setMaskElt(NewElt, LaneIdx + 1, 8);
338*4c6a070aSDavid Green             if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) {
339*4c6a070aSDavid Green               MadeChange = true;
340*4c6a070aSDavid Green               ShufTab[i].Cost = ShufTab[NewElt].Cost + 1;
341*4c6a070aSDavid Green               ShufTab[i].Op = &InsOp;
342*4c6a070aSDavid Green               ShufTab[i].Arg0 = NewElt;
343*4c6a070aSDavid Green               ShufTab[i].Arg1 = (LaneIdx >> 1) | 0x4;
344*4c6a070aSDavid Green             }
345*4c6a070aSDavid Green           }
346*4c6a070aSDavid Green         }
34773dc9964SDavid Green       }
34873dc9964SDavid Green #endif
3492951955dSChris Lattner     }
3502951955dSChris Lattner 
3512951955dSChris Lattner     for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
3522951955dSChris Lattner       if (!isValidMask(LHS)) continue;
3532951955dSChris Lattner       if (ShufTab[LHS].Cost > 1000) continue;
3542951955dSChris Lattner 
3552951955dSChris Lattner       // If nothing involving this operand could possibly be cheaper than what
3562951955dSChris Lattner       // we already have, don't consider it.
3572951955dSChris Lattner       if (ShufTab[LHS].Cost + 1 >= MaxCost)
3582951955dSChris Lattner         continue;
3592951955dSChris Lattner 
3602951955dSChris Lattner       for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
3612951955dSChris Lattner         Operator *Op = TheOperators[opnum];
36273dc9964SDavid Green #ifdef GENERATE_NEON_INS
36373dc9964SDavid Green         if (Op == &InsOp)
36473dc9964SDavid Green           continue;
36573dc9964SDavid Green #endif
3662951955dSChris Lattner 
3672951955dSChris Lattner         // Evaluate op(LHS,LHS)
3682951955dSChris Lattner         unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
3692951955dSChris Lattner 
37084e24388SAnton Korobeynikov         unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
3712951955dSChris Lattner         if (Cost < ShufTab[ResultMask].Cost) {
3722951955dSChris Lattner           ShufTab[ResultMask].Cost = Cost;
3732951955dSChris Lattner           ShufTab[ResultMask].Op = Op;
3742951955dSChris Lattner           ShufTab[ResultMask].Arg0 = LHS;
3752951955dSChris Lattner           ShufTab[ResultMask].Arg1 = LHS;
3762951955dSChris Lattner           MadeChange = true;
3772951955dSChris Lattner         }
3782951955dSChris Lattner 
3792951955dSChris Lattner         // If this is a two input instruction, include the op(x,y) cases.  If
3802951955dSChris Lattner         // this is a one input instruction, skip this.
3812951955dSChris Lattner         if (Op->isOnlyLHSOperator()) continue;
3822951955dSChris Lattner 
3832951955dSChris Lattner         for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
3842951955dSChris Lattner           if (!isValidMask(RHS)) continue;
3852951955dSChris Lattner           if (ShufTab[RHS].Cost > 1000) continue;
3862951955dSChris Lattner 
3872951955dSChris Lattner           // If nothing involving this operand could possibly be cheaper than
3882951955dSChris Lattner           // what we already have, don't consider it.
3892951955dSChris Lattner           if (ShufTab[RHS].Cost + 1 >= MaxCost)
3902951955dSChris Lattner             continue;
3912951955dSChris Lattner 
3922951955dSChris Lattner 
3932951955dSChris Lattner           // Evaluate op(LHS,RHS)
3942951955dSChris Lattner           unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
3952951955dSChris Lattner 
3962951955dSChris Lattner           if (ShufTab[ResultMask].Cost <= OpCount ||
3972951955dSChris Lattner               ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
3982951955dSChris Lattner               ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
3992951955dSChris Lattner             continue;
4002951955dSChris Lattner 
4012951955dSChris Lattner           // Figure out the cost to evaluate this, knowing that CSE's only need
4022951955dSChris Lattner           // to be evaluated once.
4032951955dSChris Lattner           unsigned short Vals[30];
4042951955dSChris Lattner           unsigned NumVals = 0;
4052951955dSChris Lattner           EvaluateOps(LHS, Vals, NumVals);
4062951955dSChris Lattner           EvaluateOps(RHS, Vals, NumVals);
4072951955dSChris Lattner 
40884e24388SAnton Korobeynikov           unsigned Cost = NumVals + Op->getCost();
4092951955dSChris Lattner           if (Cost < ShufTab[ResultMask].Cost) {
4102951955dSChris Lattner             ShufTab[ResultMask].Cost = Cost;
4112951955dSChris Lattner             ShufTab[ResultMask].Op = Op;
4122951955dSChris Lattner             ShufTab[ResultMask].Arg0 = LHS;
4132951955dSChris Lattner             ShufTab[ResultMask].Arg1 = RHS;
4142951955dSChris Lattner             MadeChange = true;
4152951955dSChris Lattner           }
4162951955dSChris Lattner         }
4172951955dSChris Lattner       }
4182951955dSChris Lattner     }
4192951955dSChris Lattner   }
4202951955dSChris Lattner 
4212951955dSChris Lattner   std::cerr << "Finished Table has " << getNumEntered()
4222951955dSChris Lattner             << " entries established.\n";
4232951955dSChris Lattner 
4242951955dSChris Lattner   unsigned CostArray[10] = { 0 };
4252951955dSChris Lattner 
4262951955dSChris Lattner   // Compute a cost histogram.
4272951955dSChris Lattner   for (unsigned i = 0; i != 65536; ++i) {
4282951955dSChris Lattner     if (!isValidMask(i)) continue;
4292951955dSChris Lattner     if (ShufTab[i].Cost > 9)
4302951955dSChris Lattner       ++CostArray[9];
4312951955dSChris Lattner     else
4322951955dSChris Lattner       ++CostArray[ShufTab[i].Cost];
4332951955dSChris Lattner   }
4342951955dSChris Lattner 
4352951955dSChris Lattner   for (unsigned i = 0; i != 9; ++i)
4362951955dSChris Lattner     if (CostArray[i])
4372951955dSChris Lattner       std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
4382951955dSChris Lattner   if (CostArray[9])
4392951955dSChris Lattner     std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
4402951955dSChris Lattner 
4412951955dSChris Lattner 
4422951955dSChris Lattner   // Build up the table to emit.
4432951955dSChris Lattner   std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
444156c85eaSChris Lattner   std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
4452951955dSChris Lattner 
4462951955dSChris Lattner   for (unsigned i = 0; i != 0x8889; ++i) {
4472951955dSChris Lattner     if (!isValidMask(i)) continue;
4482951955dSChris Lattner 
4492951955dSChris Lattner     // CostSat - The cost of this operation saturated to two bits.
4502951955dSChris Lattner     unsigned CostSat = ShufTab[i].Cost;
451e2e2cc5bSChris Lattner     if (CostSat > 4) CostSat = 4;
452e2e2cc5bSChris Lattner     if (CostSat == 0) CostSat = 1;
453e2e2cc5bSChris Lattner     --CostSat;  // Cost is now between 0-3.
4542951955dSChris Lattner 
4552951955dSChris Lattner     unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
4562951955dSChris Lattner     assert(OpNum < 16 && "Too few bits to encode operation!");
4572951955dSChris Lattner 
4582951955dSChris Lattner     unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
4592951955dSChris Lattner     unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
4602951955dSChris Lattner 
4612951955dSChris Lattner     // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
4622951955dSChris Lattner     // LHS, and 13 bits of RHS = 32 bits.
4633dcfef63SChris Lattner     unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
4642951955dSChris Lattner 
465dba47755SJim Grosbach     std::cout << "  " << std::setw(10) << Val << "U, // ";
4662951955dSChris Lattner     PrintMask(i, std::cout);
4672951955dSChris Lattner     std::cout << ": Cost " << ShufTab[i].Cost;
4682951955dSChris Lattner     std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
4692951955dSChris Lattner     std::cout << " ";
4702951955dSChris Lattner     if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
4712951955dSChris Lattner       std::cout << getZeroCostOpName(ShufTab[i].Arg0);
4722951955dSChris Lattner     } else {
4732951955dSChris Lattner       PrintMask(ShufTab[i].Arg0, std::cout);
4742951955dSChris Lattner     }
4752951955dSChris Lattner 
4762951955dSChris Lattner     if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
4772951955dSChris Lattner       std::cout << ", ";
4782951955dSChris Lattner       if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
4792951955dSChris Lattner         std::cout << getZeroCostOpName(ShufTab[i].Arg1);
4802951955dSChris Lattner       } else {
4812951955dSChris Lattner         PrintMask(ShufTab[i].Arg1, std::cout);
4822951955dSChris Lattner       }
4832951955dSChris Lattner     }
48473dc9964SDavid Green #ifdef GENERATE_NEON_INS
48573dc9964SDavid Green     else if (ShufTab[i].Op == &InsOp) {
48673dc9964SDavid Green       std::cout << ", lane " << ShufTab[i].Arg1;
48773dc9964SDavid Green     }
48873dc9964SDavid Green #endif
48973dc9964SDavid Green 
4902951955dSChris Lattner     std::cout << "\n";
4912951955dSChris Lattner   }
4922951955dSChris Lattner   std::cout << "  0\n};\n";
4932951955dSChris Lattner 
4942aed0813SKazu Hirata   if (false) {
4952951955dSChris Lattner     // Print out the table.
4962951955dSChris Lattner     for (unsigned i = 0; i != 0x8889; ++i) {
4972951955dSChris Lattner       if (!isValidMask(i)) continue;
4982951955dSChris Lattner       if (ShufTab[i].Cost < 1000) {
4992951955dSChris Lattner         PrintMask(i, std::cerr);
5002951955dSChris Lattner         std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
5012951955dSChris Lattner 
5022951955dSChris Lattner         unsigned short Vals[30];
5032951955dSChris Lattner         unsigned NumVals = 0;
5042951955dSChris Lattner         EvaluateOps(i, Vals, NumVals);
5052951955dSChris Lattner 
5062951955dSChris Lattner         for (unsigned j = 0, e = NumVals; j != e; ++j)
5072951955dSChris Lattner           PrintOperation(j, Vals);
5082951955dSChris Lattner         std::cerr << "\n";
5092951955dSChris Lattner       }
5102951955dSChris Lattner     }
51113d267e3SNico Weber   }
5122951955dSChris Lattner }
5132951955dSChris Lattner 
5142951955dSChris Lattner 
515895dba97SChris Lattner #ifdef GENERATE_ALTIVEC
5162951955dSChris Lattner 
5172951955dSChris Lattner ///===---------------------------------------------------------------------===//
5182951955dSChris Lattner /// The altivec instruction definitions.  This is the altivec-specific part of
5192951955dSChris Lattner /// this file.
5202951955dSChris Lattner ///===---------------------------------------------------------------------===//
5212951955dSChris Lattner 
522895dba97SChris Lattner // Note that the opcode numbers here must match those in the PPC backend.
523895dba97SChris Lattner enum {
524895dba97SChris Lattner   OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
525895dba97SChris Lattner   OP_VMRGHW,
526895dba97SChris Lattner   OP_VMRGLW,
527895dba97SChris Lattner   OP_VSPLTISW0,
528895dba97SChris Lattner   OP_VSPLTISW1,
529895dba97SChris Lattner   OP_VSPLTISW2,
530895dba97SChris Lattner   OP_VSPLTISW3,
531895dba97SChris Lattner   OP_VSLDOI4,
532895dba97SChris Lattner   OP_VSLDOI8,
533aa237256SChris Lattner   OP_VSLDOI12
534895dba97SChris Lattner };
535895dba97SChris Lattner 
5362951955dSChris Lattner struct vmrghw : public Operator {
vmrghwvmrghw537895dba97SChris Lattner   vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {}
5382951955dSChris Lattner } the_vmrghw;
5392951955dSChris Lattner 
5402951955dSChris Lattner struct vmrglw : public Operator {
vmrglwvmrglw541895dba97SChris Lattner   vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {}
5422951955dSChris Lattner } the_vmrglw;
5432951955dSChris Lattner 
5442951955dSChris Lattner template<unsigned Elt>
5452951955dSChris Lattner struct vspltisw : public Operator {
vspltiswvspltisw546895dba97SChris Lattner   vspltisw(const char *N, unsigned Opc)
547895dba97SChris Lattner     : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
5482951955dSChris Lattner };
5492951955dSChris Lattner 
550895dba97SChris Lattner vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0);
551895dba97SChris Lattner vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1);
552895dba97SChris Lattner vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2);
553895dba97SChris Lattner vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3);
5542951955dSChris Lattner 
5552951955dSChris Lattner template<unsigned N>
5562951955dSChris Lattner struct vsldoi : public Operator {
vsldoivsldoi557895dba97SChris Lattner   vsldoi(const char *Name, unsigned Opc)
558895dba97SChris Lattner     : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
5592951955dSChris Lattner   }
5602951955dSChris Lattner };
5612951955dSChris Lattner 
562895dba97SChris Lattner vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4);
563895dba97SChris Lattner vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
564895dba97SChris Lattner vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
5652951955dSChris Lattner 
566895dba97SChris Lattner #endif
5679a232f46SAnton Korobeynikov 
5689a232f46SAnton Korobeynikov #ifdef GENERATE_NEON
5699a232f46SAnton Korobeynikov enum {
5709a232f46SAnton Korobeynikov   OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5719a232f46SAnton Korobeynikov   OP_VREV,
5729a232f46SAnton Korobeynikov   OP_VDUP0,
5739a232f46SAnton Korobeynikov   OP_VDUP1,
5749a232f46SAnton Korobeynikov   OP_VDUP2,
5759a232f46SAnton Korobeynikov   OP_VDUP3,
5769a232f46SAnton Korobeynikov   OP_VEXT1,
5779a232f46SAnton Korobeynikov   OP_VEXT2,
5789a232f46SAnton Korobeynikov   OP_VEXT3,
5799a232f46SAnton Korobeynikov   OP_VUZPL, // VUZP, left result
5809a232f46SAnton Korobeynikov   OP_VUZPR, // VUZP, right result
5819a232f46SAnton Korobeynikov   OP_VZIPL, // VZIP, left result
5829a232f46SAnton Korobeynikov   OP_VZIPR, // VZIP, right result
5839a232f46SAnton Korobeynikov   OP_VTRNL, // VTRN, left result
5849a232f46SAnton Korobeynikov   OP_VTRNR  // VTRN, right result
5859a232f46SAnton Korobeynikov };
5869a232f46SAnton Korobeynikov 
5879a232f46SAnton Korobeynikov struct vrev : public Operator {
vrevvrev58848b182c3STanya Lattner   vrev() : Operator(0x1032, "vrev", OP_VREV) {}
5899a232f46SAnton Korobeynikov } the_vrev;
5909a232f46SAnton Korobeynikov 
5919a232f46SAnton Korobeynikov template<unsigned Elt>
5929a232f46SAnton Korobeynikov struct vdup : public Operator {
vdupvdup5939a232f46SAnton Korobeynikov   vdup(const char *N, unsigned Opc)
5949a232f46SAnton Korobeynikov     : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
5959a232f46SAnton Korobeynikov };
5969a232f46SAnton Korobeynikov 
5979a232f46SAnton Korobeynikov vdup<0> the_vdup0("vdup0", OP_VDUP0);
5989a232f46SAnton Korobeynikov vdup<1> the_vdup1("vdup1", OP_VDUP1);
5999a232f46SAnton Korobeynikov vdup<2> the_vdup2("vdup2", OP_VDUP2);
6009a232f46SAnton Korobeynikov vdup<3> the_vdup3("vdup3", OP_VDUP3);
6019a232f46SAnton Korobeynikov 
6029a232f46SAnton Korobeynikov template<unsigned N>
6039a232f46SAnton Korobeynikov struct vext : public Operator {
vextvext6049a232f46SAnton Korobeynikov   vext(const char *Name, unsigned Opc)
6059a232f46SAnton Korobeynikov     : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
6069a232f46SAnton Korobeynikov   }
6079a232f46SAnton Korobeynikov };
6089a232f46SAnton Korobeynikov 
6099a232f46SAnton Korobeynikov vext<1> the_vext1("vext1", OP_VEXT1);
6109a232f46SAnton Korobeynikov vext<2> the_vext2("vext2", OP_VEXT2);
6119a232f46SAnton Korobeynikov vext<3> the_vext3("vext3", OP_VEXT3);
6129a232f46SAnton Korobeynikov 
6139a232f46SAnton Korobeynikov struct vuzpl : public Operator {
vuzplvuzpl61450af8270SDavid Green   vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 1) {}
6159a232f46SAnton Korobeynikov } the_vuzpl;
6169a232f46SAnton Korobeynikov 
6179a232f46SAnton Korobeynikov struct vuzpr : public Operator {
vuzprvuzpr61850af8270SDavid Green   vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 1) {}
6199a232f46SAnton Korobeynikov } the_vuzpr;
6209a232f46SAnton Korobeynikov 
6219a232f46SAnton Korobeynikov struct vzipl : public Operator {
vziplvzipl62250af8270SDavid Green   vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 1) {}
6239a232f46SAnton Korobeynikov } the_vzipl;
6249a232f46SAnton Korobeynikov 
6259a232f46SAnton Korobeynikov struct vzipr : public Operator {
vziprvzipr62650af8270SDavid Green   vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 1) {}
6279a232f46SAnton Korobeynikov } the_vzipr;
6289a232f46SAnton Korobeynikov 
6299a232f46SAnton Korobeynikov struct vtrnl : public Operator {
vtrnlvtrnl63050af8270SDavid Green   vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 1) {}
6319a232f46SAnton Korobeynikov } the_vtrnl;
6329a232f46SAnton Korobeynikov 
6339a232f46SAnton Korobeynikov struct vtrnr : public Operator {
vtrnrvtrnr63450af8270SDavid Green   vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 1) {}
6359a232f46SAnton Korobeynikov } the_vtrnr;
6369a232f46SAnton Korobeynikov 
6379a232f46SAnton Korobeynikov #endif
638