1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SelectionDAG::LegalizeVectors method.
11 //
12 // The vector legalizer looks for vector operations which might need to be
13 // scalarized and legalizes them. This is a separate step from Legalize because
14 // scalarizing can introduce illegal types.  For example, suppose we have an
15 // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
16 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
17 // operation, which introduces nodes with the illegal type i64 which must be
18 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
19 // the operation must be unrolled, which introduces nodes with the illegal
20 // type i8 which must be promoted.
21 //
22 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
23 // or operations that happen to take a vector which are custom-lowered;
24 // the legalization for such operations never produces nodes
25 // with illegal types, so it's okay to put off legalizing them until
26 // SelectionDAG::Legalize runs.
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/MachineMemOperand.h"
35 #include "llvm/CodeGen/SelectionDAG.h"
36 #include "llvm/CodeGen/SelectionDAGNodes.h"
37 #include "llvm/CodeGen/TargetLowering.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MachineValueType.h"
44 #include "llvm/Support/MathExtras.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "legalizevectorops"
53 
54 namespace {
55 
56 class VectorLegalizer {
57   SelectionDAG& DAG;
58   const TargetLowering &TLI;
59   bool Changed = false; // Keep track of whether anything changed
60 
61   /// For nodes that are of legal width, and that have more than one use, this
62   /// map indicates what regularized operand to use.  This allows us to avoid
63   /// legalizing the same thing more than once.
64   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
65 
66   /// Adds a node to the translation cache.
67   void AddLegalizedOperand(SDValue From, SDValue To) {
68     LegalizedNodes.insert(std::make_pair(From, To));
69     // If someone requests legalization of the new node, return itself.
70     if (From != To)
71       LegalizedNodes.insert(std::make_pair(To, To));
72   }
73 
74   /// Legalizes the given node.
75   SDValue LegalizeOp(SDValue Op);
76 
77   /// Assuming the node is legal, "legalize" the results.
78   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
79 
80   /// Implements unrolling a VSETCC.
81   SDValue UnrollVSETCC(SDValue Op);
82 
83   /// Implement expand-based legalization of vector operations.
84   ///
85   /// This is just a high-level routine to dispatch to specific code paths for
86   /// operations to legalize them.
87   SDValue Expand(SDValue Op);
88 
89   /// Implements expansion for FNEG; falls back to UnrollVectorOp if
90   /// FSUB isn't legal.
91   ///
92   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
93   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
94   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
95 
96   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
97   SDValue ExpandSEXTINREG(SDValue Op);
98 
99   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
100   ///
101   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
102   /// type. The contents of the bits in the extended part of each element are
103   /// undef.
104   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
105 
106   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
107   ///
108   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
109   /// type, then shifts left and arithmetic shifts right to introduce a sign
110   /// extension.
111   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
112 
113   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
114   ///
115   /// Shuffles the low lanes of the operand into place and blends zeros into
116   /// the remaining lanes, finally bitcasting to the proper type.
117   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
118 
119   /// Expand bswap of vectors into a shuffle if legal.
120   SDValue ExpandBSWAP(SDValue Op);
121 
122   /// Implement vselect in terms of XOR, AND, OR when blend is not
123   /// supported by the target.
124   SDValue ExpandVSELECT(SDValue Op);
125   SDValue ExpandSELECT(SDValue Op);
126   SDValue ExpandLoad(SDValue Op);
127   SDValue ExpandStore(SDValue Op);
128   SDValue ExpandFNEG(SDValue Op);
129   SDValue ExpandFSUB(SDValue Op);
130   SDValue ExpandBITREVERSE(SDValue Op);
131   SDValue ExpandCTLZ(SDValue Op);
132   SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
133   SDValue ExpandStrictFPOp(SDValue Op);
134 
135   /// Implements vector promotion.
136   ///
137   /// This is essentially just bitcasting the operands to a different type and
138   /// bitcasting the result back to the original type.
139   SDValue Promote(SDValue Op);
140 
141   /// Implements [SU]INT_TO_FP vector promotion.
142   ///
143   /// This is a [zs]ext of the input operand to a larger integer type.
144   SDValue PromoteINT_TO_FP(SDValue Op);
145 
146   /// Implements FP_TO_[SU]INT vector promotion of the result type.
147   ///
148   /// It is promoted to a larger integer type.  The result is then
149   /// truncated back to the original type.
150   SDValue PromoteFP_TO_INT(SDValue Op);
151 
152 public:
153   VectorLegalizer(SelectionDAG& dag) :
154       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
155 
156   /// Begin legalizer the vector operations in the DAG.
157   bool Run();
158 };
159 
160 } // end anonymous namespace
161 
162 bool VectorLegalizer::Run() {
163   // Before we start legalizing vector nodes, check if there are any vectors.
164   bool HasVectors = false;
165   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
166        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
167     // Check if the values of the nodes contain vectors. We don't need to check
168     // the operands because we are going to check their values at some point.
169     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
170          J != E; ++J)
171       HasVectors |= J->isVector();
172 
173     // If we found a vector node we can start the legalization.
174     if (HasVectors)
175       break;
176   }
177 
178   // If this basic block has no vectors then no need to legalize vectors.
179   if (!HasVectors)
180     return false;
181 
182   // The legalize process is inherently a bottom-up recursive process (users
183   // legalize their uses before themselves).  Given infinite stack space, we
184   // could just start legalizing on the root and traverse the whole graph.  In
185   // practice however, this causes us to run out of stack space on large basic
186   // blocks.  To avoid this problem, compute an ordering of the nodes where each
187   // node is only legalized after all of its operands are legalized.
188   DAG.AssignTopologicalOrder();
189   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
190        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
191     LegalizeOp(SDValue(&*I, 0));
192 
193   // Finally, it's possible the root changed.  Get the new root.
194   SDValue OldRoot = DAG.getRoot();
195   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
196   DAG.setRoot(LegalizedNodes[OldRoot]);
197 
198   LegalizedNodes.clear();
199 
200   // Remove dead nodes now.
201   DAG.RemoveDeadNodes();
202 
203   return Changed;
204 }
205 
206 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
207   // Generic legalization: just pass the operand through.
208   for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
209     AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
210   return Result.getValue(Op.getResNo());
211 }
212 
213 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
214   // Note that LegalizeOp may be reentered even from single-use nodes, which
215   // means that we always must cache transformed nodes.
216   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
217   if (I != LegalizedNodes.end()) return I->second;
218 
219   SDNode* Node = Op.getNode();
220 
221   // Legalize the operands
222   SmallVector<SDValue, 8> Ops;
223   for (const SDValue &Op : Node->op_values())
224     Ops.push_back(LegalizeOp(Op));
225 
226   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
227 
228   bool HasVectorValue = false;
229   if (Op.getOpcode() == ISD::LOAD) {
230     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
231     ISD::LoadExtType ExtType = LD->getExtensionType();
232     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
233       LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
234                  Node->dump(&DAG));
235       switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
236                                    LD->getMemoryVT())) {
237       default: llvm_unreachable("This action is not supported yet!");
238       case TargetLowering::Legal:
239         return TranslateLegalizeResults(Op, Result);
240       case TargetLowering::Custom:
241         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
242           if (Lowered == Result)
243             return TranslateLegalizeResults(Op, Lowered);
244           Changed = true;
245           if (Lowered->getNumValues() != Op->getNumValues()) {
246             // This expanded to something other than the load. Assume the
247             // lowering code took care of any chain values, and just handle the
248             // returned value.
249             assert(Result.getValue(1).use_empty() &&
250                    "There are still live users of the old chain!");
251             return LegalizeOp(Lowered);
252           }
253           return TranslateLegalizeResults(Op, Lowered);
254         }
255         LLVM_FALLTHROUGH;
256       case TargetLowering::Expand:
257         Changed = true;
258         return LegalizeOp(ExpandLoad(Op));
259       }
260     }
261   } else if (Op.getOpcode() == ISD::STORE) {
262     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
263     EVT StVT = ST->getMemoryVT();
264     MVT ValVT = ST->getValue().getSimpleValueType();
265     if (StVT.isVector() && ST->isTruncatingStore()) {
266       LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
267                  Node->dump(&DAG));
268       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
269       default: llvm_unreachable("This action is not supported yet!");
270       case TargetLowering::Legal:
271         return TranslateLegalizeResults(Op, Result);
272       case TargetLowering::Custom: {
273         SDValue Lowered = TLI.LowerOperation(Result, DAG);
274         Changed = Lowered != Result;
275         return TranslateLegalizeResults(Op, Lowered);
276       }
277       case TargetLowering::Expand:
278         Changed = true;
279         return LegalizeOp(ExpandStore(Op));
280       }
281     }
282   } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
283     HasVectorValue = true;
284 
285   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
286        J != E;
287        ++J)
288     HasVectorValue |= J->isVector();
289   if (!HasVectorValue)
290     return TranslateLegalizeResults(Op, Result);
291 
292   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
293   switch (Op.getOpcode()) {
294   default:
295     return TranslateLegalizeResults(Op, Result);
296   case ISD::STRICT_FADD:
297   case ISD::STRICT_FSUB:
298   case ISD::STRICT_FMUL:
299   case ISD::STRICT_FDIV:
300   case ISD::STRICT_FSQRT:
301   case ISD::STRICT_FMA:
302   case ISD::STRICT_FPOW:
303   case ISD::STRICT_FPOWI:
304   case ISD::STRICT_FSIN:
305   case ISD::STRICT_FCOS:
306   case ISD::STRICT_FEXP:
307   case ISD::STRICT_FEXP2:
308   case ISD::STRICT_FLOG:
309   case ISD::STRICT_FLOG10:
310   case ISD::STRICT_FLOG2:
311   case ISD::STRICT_FRINT:
312   case ISD::STRICT_FNEARBYINT:
313     // These pseudo-ops get legalized as if they were their non-strict
314     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
315     // is also legal, but if ISD::FSQRT requires expansion then so does
316     // ISD::STRICT_FSQRT.
317     Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
318                                             Node->getValueType(0));
319     break;
320   case ISD::ADD:
321   case ISD::SUB:
322   case ISD::MUL:
323   case ISD::SDIV:
324   case ISD::UDIV:
325   case ISD::SREM:
326   case ISD::UREM:
327   case ISD::SDIVREM:
328   case ISD::UDIVREM:
329   case ISD::FADD:
330   case ISD::FSUB:
331   case ISD::FMUL:
332   case ISD::FDIV:
333   case ISD::FREM:
334   case ISD::AND:
335   case ISD::OR:
336   case ISD::XOR:
337   case ISD::SHL:
338   case ISD::SRA:
339   case ISD::SRL:
340   case ISD::ROTL:
341   case ISD::ROTR:
342   case ISD::BSWAP:
343   case ISD::BITREVERSE:
344   case ISD::CTLZ:
345   case ISD::CTTZ:
346   case ISD::CTLZ_ZERO_UNDEF:
347   case ISD::CTTZ_ZERO_UNDEF:
348   case ISD::CTPOP:
349   case ISD::SELECT:
350   case ISD::VSELECT:
351   case ISD::SELECT_CC:
352   case ISD::SETCC:
353   case ISD::ZERO_EXTEND:
354   case ISD::ANY_EXTEND:
355   case ISD::TRUNCATE:
356   case ISD::SIGN_EXTEND:
357   case ISD::FP_TO_SINT:
358   case ISD::FP_TO_UINT:
359   case ISD::FNEG:
360   case ISD::FABS:
361   case ISD::FMINNUM:
362   case ISD::FMAXNUM:
363   case ISD::FMINNAN:
364   case ISD::FMAXNAN:
365   case ISD::FCOPYSIGN:
366   case ISD::FSQRT:
367   case ISD::FSIN:
368   case ISD::FCOS:
369   case ISD::FPOWI:
370   case ISD::FPOW:
371   case ISD::FLOG:
372   case ISD::FLOG2:
373   case ISD::FLOG10:
374   case ISD::FEXP:
375   case ISD::FEXP2:
376   case ISD::FCEIL:
377   case ISD::FTRUNC:
378   case ISD::FRINT:
379   case ISD::FNEARBYINT:
380   case ISD::FROUND:
381   case ISD::FFLOOR:
382   case ISD::FP_ROUND:
383   case ISD::FP_EXTEND:
384   case ISD::FMA:
385   case ISD::SIGN_EXTEND_INREG:
386   case ISD::ANY_EXTEND_VECTOR_INREG:
387   case ISD::SIGN_EXTEND_VECTOR_INREG:
388   case ISD::ZERO_EXTEND_VECTOR_INREG:
389   case ISD::SMIN:
390   case ISD::SMAX:
391   case ISD::UMIN:
392   case ISD::UMAX:
393   case ISD::SMUL_LOHI:
394   case ISD::UMUL_LOHI:
395   case ISD::FCANONICALIZE:
396     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
397     break;
398   case ISD::FP_ROUND_INREG:
399     Action = TLI.getOperationAction(Node->getOpcode(),
400                cast<VTSDNode>(Node->getOperand(1))->getVT());
401     break;
402   case ISD::SINT_TO_FP:
403   case ISD::UINT_TO_FP:
404     Action = TLI.getOperationAction(Node->getOpcode(),
405                                     Node->getOperand(0).getValueType());
406     break;
407   case ISD::MSCATTER:
408     Action = TLI.getOperationAction(Node->getOpcode(),
409                cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
410     break;
411   case ISD::MSTORE:
412     Action = TLI.getOperationAction(Node->getOpcode(),
413                cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
414     break;
415   }
416 
417   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
418 
419   switch (Action) {
420   default: llvm_unreachable("This action is not supported yet!");
421   case TargetLowering::Promote:
422     Result = Promote(Op);
423     Changed = true;
424     break;
425   case TargetLowering::Legal:
426     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
427     break;
428   case TargetLowering::Custom: {
429     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
430     if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
431       LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
432       Result = Tmp1;
433       break;
434     }
435     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
436     LLVM_FALLTHROUGH;
437   }
438   case TargetLowering::Expand:
439     Result = Expand(Op);
440   }
441 
442   // Make sure that the generated code is itself legal.
443   if (Result != Op) {
444     Result = LegalizeOp(Result);
445     Changed = true;
446   }
447 
448   // Note that LegalizeOp may be reentered even from single-use nodes, which
449   // means that we always must cache transformed nodes.
450   AddLegalizedOperand(Op, Result);
451   return Result;
452 }
453 
454 SDValue VectorLegalizer::Promote(SDValue Op) {
455   // For a few operations there is a specific concept for promotion based on
456   // the operand's type.
457   switch (Op.getOpcode()) {
458   case ISD::SINT_TO_FP:
459   case ISD::UINT_TO_FP:
460     // "Promote" the operation by extending the operand.
461     return PromoteINT_TO_FP(Op);
462   case ISD::FP_TO_UINT:
463   case ISD::FP_TO_SINT:
464     // Promote the operation by extending the operand.
465     return PromoteFP_TO_INT(Op);
466   }
467 
468   // There are currently two cases of vector promotion:
469   // 1) Bitcasting a vector of integers to a different type to a vector of the
470   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
471   // 2) Extending a vector of floats to a vector of the same number of larger
472   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
473   MVT VT = Op.getSimpleValueType();
474   assert(Op.getNode()->getNumValues() == 1 &&
475          "Can't promote a vector with multiple results!");
476   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
477   SDLoc dl(Op);
478   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
479 
480   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
481     if (Op.getOperand(j).getValueType().isVector())
482       if (Op.getOperand(j)
483               .getValueType()
484               .getVectorElementType()
485               .isFloatingPoint() &&
486           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
487         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
488       else
489         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
490     else
491       Operands[j] = Op.getOperand(j);
492   }
493 
494   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
495   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
496       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
497        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
498     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
499   else
500     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
501 }
502 
503 SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
504   // INT_TO_FP operations may require the input operand be promoted even
505   // when the type is otherwise legal.
506   MVT VT = Op.getOperand(0).getSimpleValueType();
507   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
508   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
509          "Vectors have different number of elements!");
510 
511   SDLoc dl(Op);
512   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
513 
514   unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
515     ISD::SIGN_EXTEND;
516   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
517     if (Op.getOperand(j).getValueType().isVector())
518       Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
519     else
520       Operands[j] = Op.getOperand(j);
521   }
522 
523   return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
524 }
525 
526 // For FP_TO_INT we promote the result type to a vector type with wider
527 // elements and then truncate the result.  This is different from the default
528 // PromoteVector which uses bitcast to promote thus assumning that the
529 // promoted vector type has the same overall size.
530 SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
531   MVT VT = Op.getSimpleValueType();
532   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
533   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
534          "Vectors have different number of elements!");
535 
536   unsigned NewOpc = Op->getOpcode();
537   // Change FP_TO_UINT to FP_TO_SINT if possible.
538   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
539   if (NewOpc == ISD::FP_TO_UINT &&
540       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
541     NewOpc = ISD::FP_TO_SINT;
542 
543   SDLoc dl(Op);
544   SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
545 
546   // Assert that the converted value fits in the original type.  If it doesn't
547   // (eg: because the value being converted is too big), then the result of the
548   // original operation was undefined anyway, so the assert is still correct.
549   Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
550                                                             : ISD::AssertSext,
551                          dl, NVT, Promoted,
552                          DAG.getValueType(VT.getScalarType()));
553   return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
554 }
555 
556 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
557   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
558 
559   EVT SrcVT = LD->getMemoryVT();
560   EVT SrcEltVT = SrcVT.getScalarType();
561   unsigned NumElem = SrcVT.getVectorNumElements();
562 
563   SDValue NewChain;
564   SDValue Value;
565   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
566     SDLoc dl(Op);
567 
568     SmallVector<SDValue, 8> Vals;
569     SmallVector<SDValue, 8> LoadChains;
570 
571     EVT DstEltVT = LD->getValueType(0).getScalarType();
572     SDValue Chain = LD->getChain();
573     SDValue BasePTR = LD->getBasePtr();
574     ISD::LoadExtType ExtType = LD->getExtensionType();
575 
576     // When elements in a vector is not byte-addressable, we cannot directly
577     // load each element by advancing pointer, which could only address bytes.
578     // Instead, we load all significant words, mask bits off, and concatenate
579     // them to form each element. Finally, they are extended to destination
580     // scalar type to build the destination vector.
581     EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
582 
583     assert(WideVT.isRound() &&
584            "Could not handle the sophisticated case when the widest integer is"
585            " not power of 2.");
586     assert(WideVT.bitsGE(SrcEltVT) &&
587            "Type is not legalized?");
588 
589     unsigned WideBytes = WideVT.getStoreSize();
590     unsigned Offset = 0;
591     unsigned RemainingBytes = SrcVT.getStoreSize();
592     SmallVector<SDValue, 8> LoadVals;
593     while (RemainingBytes > 0) {
594       SDValue ScalarLoad;
595       unsigned LoadBytes = WideBytes;
596 
597       if (RemainingBytes >= LoadBytes) {
598         ScalarLoad =
599             DAG.getLoad(WideVT, dl, Chain, BasePTR,
600                         LD->getPointerInfo().getWithOffset(Offset),
601                         MinAlign(LD->getAlignment(), Offset),
602                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
603       } else {
604         EVT LoadVT = WideVT;
605         while (RemainingBytes < LoadBytes) {
606           LoadBytes >>= 1; // Reduce the load size by half.
607           LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
608         }
609         ScalarLoad =
610             DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
611                            LD->getPointerInfo().getWithOffset(Offset), LoadVT,
612                            MinAlign(LD->getAlignment(), Offset),
613                            LD->getMemOperand()->getFlags(), LD->getAAInfo());
614       }
615 
616       RemainingBytes -= LoadBytes;
617       Offset += LoadBytes;
618 
619       BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
620 
621       LoadVals.push_back(ScalarLoad.getValue(0));
622       LoadChains.push_back(ScalarLoad.getValue(1));
623     }
624 
625     // Extract bits, pack and extend/trunc them into destination type.
626     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
627     SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
628 
629     unsigned BitOffset = 0;
630     unsigned WideIdx = 0;
631     unsigned WideBits = WideVT.getSizeInBits();
632 
633     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
634       SDValue Lo, Hi, ShAmt;
635 
636       if (BitOffset < WideBits) {
637         ShAmt = DAG.getConstant(
638             BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
639         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
640         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
641       }
642 
643       BitOffset += SrcEltBits;
644       if (BitOffset >= WideBits) {
645         WideIdx++;
646         BitOffset -= WideBits;
647         if (BitOffset > 0) {
648           ShAmt = DAG.getConstant(
649               SrcEltBits - BitOffset, dl,
650               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
651           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
652           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
653         }
654       }
655 
656       if (Hi.getNode())
657         Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
658 
659       switch (ExtType) {
660       default: llvm_unreachable("Unknown extended-load op!");
661       case ISD::EXTLOAD:
662         Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
663         break;
664       case ISD::ZEXTLOAD:
665         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
666         break;
667       case ISD::SEXTLOAD:
668         ShAmt =
669             DAG.getConstant(WideBits - SrcEltBits, dl,
670                             TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
671         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
672         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
673         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
674         break;
675       }
676       Vals.push_back(Lo);
677     }
678 
679     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
680     Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
681   } else {
682     SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
683 
684     NewChain = Scalarized.getValue(1);
685     Value = Scalarized.getValue(0);
686   }
687 
688   AddLegalizedOperand(Op.getValue(0), Value);
689   AddLegalizedOperand(Op.getValue(1), NewChain);
690 
691   return (Op.getResNo() ? NewChain : Value);
692 }
693 
694 SDValue VectorLegalizer::ExpandStore(SDValue Op) {
695   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
696   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
697   AddLegalizedOperand(Op, TF);
698   return TF;
699 }
700 
701 SDValue VectorLegalizer::Expand(SDValue Op) {
702   switch (Op->getOpcode()) {
703   case ISD::SIGN_EXTEND_INREG:
704     return ExpandSEXTINREG(Op);
705   case ISD::ANY_EXTEND_VECTOR_INREG:
706     return ExpandANY_EXTEND_VECTOR_INREG(Op);
707   case ISD::SIGN_EXTEND_VECTOR_INREG:
708     return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
709   case ISD::ZERO_EXTEND_VECTOR_INREG:
710     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
711   case ISD::BSWAP:
712     return ExpandBSWAP(Op);
713   case ISD::VSELECT:
714     return ExpandVSELECT(Op);
715   case ISD::SELECT:
716     return ExpandSELECT(Op);
717   case ISD::UINT_TO_FP:
718     return ExpandUINT_TO_FLOAT(Op);
719   case ISD::FNEG:
720     return ExpandFNEG(Op);
721   case ISD::FSUB:
722     return ExpandFSUB(Op);
723   case ISD::SETCC:
724     return UnrollVSETCC(Op);
725   case ISD::BITREVERSE:
726     return ExpandBITREVERSE(Op);
727   case ISD::CTLZ:
728   case ISD::CTLZ_ZERO_UNDEF:
729     return ExpandCTLZ(Op);
730   case ISD::CTTZ_ZERO_UNDEF:
731     return ExpandCTTZ_ZERO_UNDEF(Op);
732   case ISD::STRICT_FADD:
733   case ISD::STRICT_FSUB:
734   case ISD::STRICT_FMUL:
735   case ISD::STRICT_FDIV:
736   case ISD::STRICT_FSQRT:
737   case ISD::STRICT_FMA:
738   case ISD::STRICT_FPOW:
739   case ISD::STRICT_FPOWI:
740   case ISD::STRICT_FSIN:
741   case ISD::STRICT_FCOS:
742   case ISD::STRICT_FEXP:
743   case ISD::STRICT_FEXP2:
744   case ISD::STRICT_FLOG:
745   case ISD::STRICT_FLOG10:
746   case ISD::STRICT_FLOG2:
747   case ISD::STRICT_FRINT:
748   case ISD::STRICT_FNEARBYINT:
749     return ExpandStrictFPOp(Op);
750   default:
751     return DAG.UnrollVectorOp(Op.getNode());
752   }
753 }
754 
755 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
756   // Lower a select instruction where the condition is a scalar and the
757   // operands are vectors. Lower this select to VSELECT and implement it
758   // using XOR AND OR. The selector bit is broadcasted.
759   EVT VT = Op.getValueType();
760   SDLoc DL(Op);
761 
762   SDValue Mask = Op.getOperand(0);
763   SDValue Op1 = Op.getOperand(1);
764   SDValue Op2 = Op.getOperand(2);
765 
766   assert(VT.isVector() && !Mask.getValueType().isVector()
767          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
768 
769   // If we can't even use the basic vector operations of
770   // AND,OR,XOR, we will have to scalarize the op.
771   // Notice that the operation may be 'promoted' which means that it is
772   // 'bitcasted' to another type which is handled.
773   // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
774   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
775       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
776       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
777       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
778     return DAG.UnrollVectorOp(Op.getNode());
779 
780   // Generate a mask operand.
781   EVT MaskTy = VT.changeVectorElementTypeToInteger();
782 
783   // What is the size of each element in the vector mask.
784   EVT BitTy = MaskTy.getScalarType();
785 
786   Mask = DAG.getSelect(DL, BitTy, Mask,
787           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
788                           BitTy),
789           DAG.getConstant(0, DL, BitTy));
790 
791   // Broadcast the mask so that the entire vector is all-one or all zero.
792   Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
793 
794   // Bitcast the operands to be the same type as the mask.
795   // This is needed when we select between FP types because
796   // the mask is a vector of integers.
797   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
798   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
799 
800   SDValue AllOnes = DAG.getConstant(
801             APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
802   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
803 
804   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
805   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
806   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
807   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
808 }
809 
810 SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
811   EVT VT = Op.getValueType();
812 
813   // Make sure that the SRA and SHL instructions are available.
814   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
815       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
816     return DAG.UnrollVectorOp(Op.getNode());
817 
818   SDLoc DL(Op);
819   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
820 
821   unsigned BW = VT.getScalarSizeInBits();
822   unsigned OrigBW = OrigTy.getScalarSizeInBits();
823   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
824 
825   Op = Op.getOperand(0);
826   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
827   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
828 }
829 
830 // Generically expand a vector anyext in register to a shuffle of the relevant
831 // lanes into the appropriate locations, with other lanes left undef.
832 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
833   SDLoc DL(Op);
834   EVT VT = Op.getValueType();
835   int NumElements = VT.getVectorNumElements();
836   SDValue Src = Op.getOperand(0);
837   EVT SrcVT = Src.getValueType();
838   int NumSrcElements = SrcVT.getVectorNumElements();
839 
840   // Build a base mask of undef shuffles.
841   SmallVector<int, 16> ShuffleMask;
842   ShuffleMask.resize(NumSrcElements, -1);
843 
844   // Place the extended lanes into the correct locations.
845   int ExtLaneScale = NumSrcElements / NumElements;
846   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
847   for (int i = 0; i < NumElements; ++i)
848     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
849 
850   return DAG.getNode(
851       ISD::BITCAST, DL, VT,
852       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
853 }
854 
855 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
856   SDLoc DL(Op);
857   EVT VT = Op.getValueType();
858   SDValue Src = Op.getOperand(0);
859   EVT SrcVT = Src.getValueType();
860 
861   // First build an any-extend node which can be legalized above when we
862   // recurse through it.
863   Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
864 
865   // Now we need sign extend. Do this by shifting the elements. Even if these
866   // aren't legal operations, they have a better chance of being legalized
867   // without full scalarization than the sign extension does.
868   unsigned EltWidth = VT.getScalarSizeInBits();
869   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
870   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
871   return DAG.getNode(ISD::SRA, DL, VT,
872                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
873                      ShiftAmount);
874 }
875 
876 // Generically expand a vector zext in register to a shuffle of the relevant
877 // lanes into the appropriate locations, a blend of zero into the high bits,
878 // and a bitcast to the wider element type.
879 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
880   SDLoc DL(Op);
881   EVT VT = Op.getValueType();
882   int NumElements = VT.getVectorNumElements();
883   SDValue Src = Op.getOperand(0);
884   EVT SrcVT = Src.getValueType();
885   int NumSrcElements = SrcVT.getVectorNumElements();
886 
887   // Build up a zero vector to blend into this one.
888   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
889 
890   // Shuffle the incoming lanes into the correct position, and pull all other
891   // lanes from the zero vector.
892   SmallVector<int, 16> ShuffleMask;
893   ShuffleMask.reserve(NumSrcElements);
894   for (int i = 0; i < NumSrcElements; ++i)
895     ShuffleMask.push_back(i);
896 
897   int ExtLaneScale = NumSrcElements / NumElements;
898   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
899   for (int i = 0; i < NumElements; ++i)
900     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
901 
902   return DAG.getNode(ISD::BITCAST, DL, VT,
903                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
904 }
905 
906 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
907   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
908   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
909     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
910       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
911 }
912 
913 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
914   EVT VT = Op.getValueType();
915 
916   // Generate a byte wise shuffle mask for the BSWAP.
917   SmallVector<int, 16> ShuffleMask;
918   createBSWAPShuffleMask(VT, ShuffleMask);
919   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
920 
921   // Only emit a shuffle if the mask is legal.
922   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
923     return DAG.UnrollVectorOp(Op.getNode());
924 
925   SDLoc DL(Op);
926   Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
927   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
928   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
929 }
930 
931 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
932   EVT VT = Op.getValueType();
933 
934   // If we have the scalar operation, it's probably cheaper to unroll it.
935   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
936     return DAG.UnrollVectorOp(Op.getNode());
937 
938   // If the vector element width is a whole number of bytes, test if its legal
939   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
940   // vector. This greatly reduces the number of bit shifts necessary.
941   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
942   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
943     SmallVector<int, 16> BSWAPMask;
944     createBSWAPShuffleMask(VT, BSWAPMask);
945 
946     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
947     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
948         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
949          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
950           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
951           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
952           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
953       SDLoc DL(Op);
954       Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
955       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
956                                 BSWAPMask);
957       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
958       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
959     }
960   }
961 
962   // If we have the appropriate vector bit operations, it is better to use them
963   // than unrolling and expanding each component.
964   if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
965       !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
966       !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
967       !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
968     return DAG.UnrollVectorOp(Op.getNode());
969 
970   // Let LegalizeDAG handle this later.
971   return Op;
972 }
973 
974 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
975   // Implement VSELECT in terms of XOR, AND, OR
976   // on platforms which do not support blend natively.
977   SDLoc DL(Op);
978 
979   SDValue Mask = Op.getOperand(0);
980   SDValue Op1 = Op.getOperand(1);
981   SDValue Op2 = Op.getOperand(2);
982 
983   EVT VT = Mask.getValueType();
984 
985   // If we can't even use the basic vector operations of
986   // AND,OR,XOR, we will have to scalarize the op.
987   // Notice that the operation may be 'promoted' which means that it is
988   // 'bitcasted' to another type which is handled.
989   // This operation also isn't safe with AND, OR, XOR when the boolean
990   // type is 0/1 as we need an all ones vector constant to mask with.
991   // FIXME: Sign extend 1 to all ones if thats legal on the target.
992   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
993       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
994       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
995       TLI.getBooleanContents(Op1.getValueType()) !=
996           TargetLowering::ZeroOrNegativeOneBooleanContent)
997     return DAG.UnrollVectorOp(Op.getNode());
998 
999   // If the mask and the type are different sizes, unroll the vector op. This
1000   // can occur when getSetCCResultType returns something that is different in
1001   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1002   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1003     return DAG.UnrollVectorOp(Op.getNode());
1004 
1005   // Bitcast the operands to be the same type as the mask.
1006   // This is needed when we select between FP types because
1007   // the mask is a vector of integers.
1008   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1009   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1010 
1011   SDValue AllOnes = DAG.getConstant(
1012     APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
1013   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
1014 
1015   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1016   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1017   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1018   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
1019 }
1020 
1021 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
1022   EVT VT = Op.getOperand(0).getValueType();
1023   SDLoc DL(Op);
1024 
1025   // Make sure that the SINT_TO_FP and SRL instructions are available.
1026   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
1027       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
1028     return DAG.UnrollVectorOp(Op.getNode());
1029 
1030   unsigned BW = VT.getScalarSizeInBits();
1031   assert((BW == 64 || BW == 32) &&
1032          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1033 
1034   SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1035 
1036   // Constants to clear the upper part of the word.
1037   // Notice that we can also use SHL+SHR, but using a constant is slightly
1038   // faster on x86.
1039   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1040   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1041 
1042   // Two to the power of half-word-size.
1043   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
1044 
1045   // Clear upper part of LO, lower HI
1046   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
1047   SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
1048 
1049   // Convert hi and lo to floats
1050   // Convert the hi part back to the upper values
1051   // TODO: Can any fast-math-flags be set on these nodes?
1052   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
1053           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
1054   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
1055 
1056   // Add the two halves
1057   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
1058 }
1059 
1060 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
1061   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
1062     SDLoc DL(Op);
1063     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
1064     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1065     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
1066                        Zero, Op.getOperand(0));
1067   }
1068   return DAG.UnrollVectorOp(Op.getNode());
1069 }
1070 
1071 SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
1072   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1073   // we can defer this to operation legalization where it will be lowered as
1074   // a+(-b).
1075   EVT VT = Op.getValueType();
1076   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1077       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1078     return Op; // Defer to LegalizeDAG
1079 
1080   return DAG.UnrollVectorOp(Op.getNode());
1081 }
1082 
1083 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
1084   EVT VT = Op.getValueType();
1085   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1086 
1087   // If the non-ZERO_UNDEF version is supported we can use that instead.
1088   if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
1089       TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
1090     SDLoc DL(Op);
1091     return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
1092   }
1093 
1094   // If CTPOP is available we can lower with a CTPOP based method:
1095   // u16 ctlz(u16 x) {
1096   //   x |= (x >> 1);
1097   //   x |= (x >> 2);
1098   //   x |= (x >> 4);
1099   //   x |= (x >> 8);
1100   //   return ctpop(~x);
1101   // }
1102   // Ref: "Hacker's Delight" by Henry Warren
1103   if (isPowerOf2_32(NumBitsPerElt) &&
1104       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
1105       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1106       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
1107       TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
1108     SDLoc DL(Op);
1109     SDValue Res = Op.getOperand(0);
1110     EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
1111 
1112     for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
1113       Res = DAG.getNode(
1114           ISD::OR, DL, VT, Res,
1115           DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
1116 
1117     Res = DAG.getNOT(DL, Res, VT);
1118     return DAG.getNode(ISD::CTPOP, DL, VT, Res);
1119   }
1120 
1121   // Otherwise go ahead and unroll.
1122   return DAG.UnrollVectorOp(Op.getNode());
1123 }
1124 
1125 SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
1126   // If the non-ZERO_UNDEF version is supported we can use that instead.
1127   if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
1128     SDLoc DL(Op);
1129     return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
1130   }
1131 
1132   // Otherwise go ahead and unroll.
1133   return DAG.UnrollVectorOp(Op.getNode());
1134 }
1135 
1136 SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
1137   EVT VT = Op.getValueType();
1138   EVT EltVT = VT.getVectorElementType();
1139   unsigned NumElems = VT.getVectorNumElements();
1140   unsigned NumOpers = Op.getNumOperands();
1141   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1142   EVT ValueVTs[] = {EltVT, MVT::Other};
1143   SDValue Chain = Op.getOperand(0);
1144   SDLoc dl(Op);
1145 
1146   SmallVector<SDValue, 32> OpValues;
1147   SmallVector<SDValue, 32> OpChains;
1148   for (unsigned i = 0; i < NumElems; ++i) {
1149     SmallVector<SDValue, 4> Opers;
1150     SDValue Idx = DAG.getConstant(i, dl,
1151                                   TLI.getVectorIdxTy(DAG.getDataLayout()));
1152 
1153     // The Chain is the first operand.
1154     Opers.push_back(Chain);
1155 
1156     // Now process the remaining operands.
1157     for (unsigned j = 1; j < NumOpers; ++j) {
1158       SDValue Oper = Op.getOperand(j);
1159       EVT OperVT = Oper.getValueType();
1160 
1161       if (OperVT.isVector())
1162         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1163                            EltVT, Oper, Idx);
1164 
1165       Opers.push_back(Oper);
1166     }
1167 
1168     SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
1169 
1170     OpValues.push_back(ScalarOp.getValue(0));
1171     OpChains.push_back(ScalarOp.getValue(1));
1172   }
1173 
1174   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1175   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1176 
1177   AddLegalizedOperand(Op.getValue(0), Result);
1178   AddLegalizedOperand(Op.getValue(1), NewChain);
1179 
1180   return NewChain;
1181 }
1182 
1183 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
1184   EVT VT = Op.getValueType();
1185   unsigned NumElems = VT.getVectorNumElements();
1186   EVT EltVT = VT.getVectorElementType();
1187   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
1188   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1189   SDLoc dl(Op);
1190   SmallVector<SDValue, 8> Ops(NumElems);
1191   for (unsigned i = 0; i < NumElems; ++i) {
1192     SDValue LHSElem = DAG.getNode(
1193         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1194         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1195     SDValue RHSElem = DAG.getNode(
1196         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1197         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1198     Ops[i] = DAG.getNode(ISD::SETCC, dl,
1199                          TLI.getSetCCResultType(DAG.getDataLayout(),
1200                                                 *DAG.getContext(), TmpEltVT),
1201                          LHSElem, RHSElem, CC);
1202     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
1203                            DAG.getConstant(APInt::getAllOnesValue
1204                                            (EltVT.getSizeInBits()), dl, EltVT),
1205                            DAG.getConstant(0, dl, EltVT));
1206   }
1207   return DAG.getBuildVector(VT, dl, Ops);
1208 }
1209 
1210 bool SelectionDAG::LegalizeVectors() {
1211   return VectorLegalizer(*this).Run();
1212 }
1213