1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SelectionDAG::LegalizeVectors method.
11 //
12 // The vector legalizer looks for vector operations which might need to be
13 // scalarized and legalizes them. This is a separate step from Legalize because
14 // scalarizing can introduce illegal types.  For example, suppose we have an
15 // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
16 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
17 // operation, which introduces nodes with the illegal type i64 which must be
18 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
19 // the operation must be unrolled, which introduces nodes with the illegal
20 // type i8 which must be promoted.
21 //
22 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
23 // or operations that happen to take a vector which are custom-lowered;
24 // the legalization for such operations never produces nodes
25 // with illegal types, so it's okay to put off legalizing them until
26 // SelectionDAG::Legalize runs.
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/MachineMemOperand.h"
35 #include "llvm/CodeGen/SelectionDAG.h"
36 #include "llvm/CodeGen/SelectionDAGNodes.h"
37 #include "llvm/CodeGen/TargetLowering.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MachineValueType.h"
44 #include "llvm/Support/MathExtras.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "legalizevectorops"
53 
54 namespace {
55 
56 class VectorLegalizer {
57   SelectionDAG& DAG;
58   const TargetLowering &TLI;
59   bool Changed = false; // Keep track of whether anything changed
60 
61   /// For nodes that are of legal width, and that have more than one use, this
62   /// map indicates what regularized operand to use.  This allows us to avoid
63   /// legalizing the same thing more than once.
64   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
65 
66   /// Adds a node to the translation cache.
67   void AddLegalizedOperand(SDValue From, SDValue To) {
68     LegalizedNodes.insert(std::make_pair(From, To));
69     // If someone requests legalization of the new node, return itself.
70     if (From != To)
71       LegalizedNodes.insert(std::make_pair(To, To));
72   }
73 
74   /// Legalizes the given node.
75   SDValue LegalizeOp(SDValue Op);
76 
77   /// Assuming the node is legal, "legalize" the results.
78   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
79 
80   /// Implements unrolling a VSETCC.
81   SDValue UnrollVSETCC(SDValue Op);
82 
83   /// Implement expand-based legalization of vector operations.
84   ///
85   /// This is just a high-level routine to dispatch to specific code paths for
86   /// operations to legalize them.
87   SDValue Expand(SDValue Op);
88 
89   /// Implements expansion for FNEG; falls back to UnrollVectorOp if
90   /// FSUB isn't legal.
91   ///
92   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
93   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
94   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
95 
96   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
97   SDValue ExpandSEXTINREG(SDValue Op);
98 
99   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
100   ///
101   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
102   /// type. The contents of the bits in the extended part of each element are
103   /// undef.
104   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
105 
106   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
107   ///
108   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
109   /// type, then shifts left and arithmetic shifts right to introduce a sign
110   /// extension.
111   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
112 
113   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
114   ///
115   /// Shuffles the low lanes of the operand into place and blends zeros into
116   /// the remaining lanes, finally bitcasting to the proper type.
117   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
118 
119   /// Expand bswap of vectors into a shuffle if legal.
120   SDValue ExpandBSWAP(SDValue Op);
121 
122   /// Implement vselect in terms of XOR, AND, OR when blend is not
123   /// supported by the target.
124   SDValue ExpandVSELECT(SDValue Op);
125   SDValue ExpandSELECT(SDValue Op);
126   SDValue ExpandLoad(SDValue Op);
127   SDValue ExpandStore(SDValue Op);
128   SDValue ExpandFNEG(SDValue Op);
129   SDValue ExpandFSUB(SDValue Op);
130   SDValue ExpandBITREVERSE(SDValue Op);
131   SDValue ExpandCTLZ(SDValue Op);
132   SDValue ExpandCTTZ(SDValue Op);
133   SDValue ExpandStrictFPOp(SDValue Op);
134 
135   /// Implements vector promotion.
136   ///
137   /// This is essentially just bitcasting the operands to a different type and
138   /// bitcasting the result back to the original type.
139   SDValue Promote(SDValue Op);
140 
141   /// Implements [SU]INT_TO_FP vector promotion.
142   ///
143   /// This is a [zs]ext of the input operand to a larger integer type.
144   SDValue PromoteINT_TO_FP(SDValue Op);
145 
146   /// Implements FP_TO_[SU]INT vector promotion of the result type.
147   ///
148   /// It is promoted to a larger integer type.  The result is then
149   /// truncated back to the original type.
150   SDValue PromoteFP_TO_INT(SDValue Op);
151 
152 public:
153   VectorLegalizer(SelectionDAG& dag) :
154       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
155 
156   /// Begin legalizer the vector operations in the DAG.
157   bool Run();
158 };
159 
160 } // end anonymous namespace
161 
162 bool VectorLegalizer::Run() {
163   // Before we start legalizing vector nodes, check if there are any vectors.
164   bool HasVectors = false;
165   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
166        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
167     // Check if the values of the nodes contain vectors. We don't need to check
168     // the operands because we are going to check their values at some point.
169     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
170          J != E; ++J)
171       HasVectors |= J->isVector();
172 
173     // If we found a vector node we can start the legalization.
174     if (HasVectors)
175       break;
176   }
177 
178   // If this basic block has no vectors then no need to legalize vectors.
179   if (!HasVectors)
180     return false;
181 
182   // The legalize process is inherently a bottom-up recursive process (users
183   // legalize their uses before themselves).  Given infinite stack space, we
184   // could just start legalizing on the root and traverse the whole graph.  In
185   // practice however, this causes us to run out of stack space on large basic
186   // blocks.  To avoid this problem, compute an ordering of the nodes where each
187   // node is only legalized after all of its operands are legalized.
188   DAG.AssignTopologicalOrder();
189   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
190        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
191     LegalizeOp(SDValue(&*I, 0));
192 
193   // Finally, it's possible the root changed.  Get the new root.
194   SDValue OldRoot = DAG.getRoot();
195   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
196   DAG.setRoot(LegalizedNodes[OldRoot]);
197 
198   LegalizedNodes.clear();
199 
200   // Remove dead nodes now.
201   DAG.RemoveDeadNodes();
202 
203   return Changed;
204 }
205 
206 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
207   // Generic legalization: just pass the operand through.
208   for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
209     AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
210   return Result.getValue(Op.getResNo());
211 }
212 
213 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
214   // Note that LegalizeOp may be reentered even from single-use nodes, which
215   // means that we always must cache transformed nodes.
216   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
217   if (I != LegalizedNodes.end()) return I->second;
218 
219   SDNode* Node = Op.getNode();
220 
221   // Legalize the operands
222   SmallVector<SDValue, 8> Ops;
223   for (const SDValue &Op : Node->op_values())
224     Ops.push_back(LegalizeOp(Op));
225 
226   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
227                            Op.getResNo());
228 
229   if (Op.getOpcode() == ISD::LOAD) {
230     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
231     ISD::LoadExtType ExtType = LD->getExtensionType();
232     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
233       LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
234                  Node->dump(&DAG));
235       switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
236                                    LD->getMemoryVT())) {
237       default: llvm_unreachable("This action is not supported yet!");
238       case TargetLowering::Legal:
239         return TranslateLegalizeResults(Op, Result);
240       case TargetLowering::Custom:
241         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
242           assert(Lowered->getNumValues() == Op->getNumValues() &&
243                  "Unexpected number of results");
244           Changed = Lowered != Result;
245           return TranslateLegalizeResults(Op, Lowered);
246         }
247         LLVM_FALLTHROUGH;
248       case TargetLowering::Expand:
249         Changed = true;
250         return LegalizeOp(ExpandLoad(Op));
251       }
252     }
253   } else if (Op.getOpcode() == ISD::STORE) {
254     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
255     EVT StVT = ST->getMemoryVT();
256     MVT ValVT = ST->getValue().getSimpleValueType();
257     if (StVT.isVector() && ST->isTruncatingStore()) {
258       LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
259                  Node->dump(&DAG));
260       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
261       default: llvm_unreachable("This action is not supported yet!");
262       case TargetLowering::Legal:
263         return TranslateLegalizeResults(Op, Result);
264       case TargetLowering::Custom: {
265         SDValue Lowered = TLI.LowerOperation(Result, DAG);
266         Changed = Lowered != Result;
267         return TranslateLegalizeResults(Op, Lowered);
268       }
269       case TargetLowering::Expand:
270         Changed = true;
271         return LegalizeOp(ExpandStore(Op));
272       }
273     }
274   }
275 
276   bool HasVectorValue = false;
277   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
278        J != E;
279        ++J)
280     HasVectorValue |= J->isVector();
281   if (!HasVectorValue)
282     return TranslateLegalizeResults(Op, Result);
283 
284   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
285   switch (Op.getOpcode()) {
286   default:
287     return TranslateLegalizeResults(Op, Result);
288   case ISD::STRICT_FADD:
289   case ISD::STRICT_FSUB:
290   case ISD::STRICT_FMUL:
291   case ISD::STRICT_FDIV:
292   case ISD::STRICT_FREM:
293   case ISD::STRICT_FSQRT:
294   case ISD::STRICT_FMA:
295   case ISD::STRICT_FPOW:
296   case ISD::STRICT_FPOWI:
297   case ISD::STRICT_FSIN:
298   case ISD::STRICT_FCOS:
299   case ISD::STRICT_FEXP:
300   case ISD::STRICT_FEXP2:
301   case ISD::STRICT_FLOG:
302   case ISD::STRICT_FLOG10:
303   case ISD::STRICT_FLOG2:
304   case ISD::STRICT_FRINT:
305   case ISD::STRICT_FNEARBYINT:
306     // These pseudo-ops get legalized as if they were their non-strict
307     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
308     // is also legal, but if ISD::FSQRT requires expansion then so does
309     // ISD::STRICT_FSQRT.
310     Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
311                                             Node->getValueType(0));
312     break;
313   case ISD::ADD:
314   case ISD::SUB:
315   case ISD::MUL:
316   case ISD::SDIV:
317   case ISD::UDIV:
318   case ISD::SREM:
319   case ISD::UREM:
320   case ISD::SDIVREM:
321   case ISD::UDIVREM:
322   case ISD::FADD:
323   case ISD::FSUB:
324   case ISD::FMUL:
325   case ISD::FDIV:
326   case ISD::FREM:
327   case ISD::AND:
328   case ISD::OR:
329   case ISD::XOR:
330   case ISD::SHL:
331   case ISD::SRA:
332   case ISD::SRL:
333   case ISD::ROTL:
334   case ISD::ROTR:
335   case ISD::BSWAP:
336   case ISD::BITREVERSE:
337   case ISD::CTLZ:
338   case ISD::CTTZ:
339   case ISD::CTLZ_ZERO_UNDEF:
340   case ISD::CTTZ_ZERO_UNDEF:
341   case ISD::CTPOP:
342   case ISD::SELECT:
343   case ISD::VSELECT:
344   case ISD::SELECT_CC:
345   case ISD::SETCC:
346   case ISD::ZERO_EXTEND:
347   case ISD::ANY_EXTEND:
348   case ISD::TRUNCATE:
349   case ISD::SIGN_EXTEND:
350   case ISD::FP_TO_SINT:
351   case ISD::FP_TO_UINT:
352   case ISD::FNEG:
353   case ISD::FABS:
354   case ISD::FMINNUM:
355   case ISD::FMAXNUM:
356   case ISD::FMINNAN:
357   case ISD::FMAXNAN:
358   case ISD::FCOPYSIGN:
359   case ISD::FSQRT:
360   case ISD::FSIN:
361   case ISD::FCOS:
362   case ISD::FPOWI:
363   case ISD::FPOW:
364   case ISD::FLOG:
365   case ISD::FLOG2:
366   case ISD::FLOG10:
367   case ISD::FEXP:
368   case ISD::FEXP2:
369   case ISD::FCEIL:
370   case ISD::FTRUNC:
371   case ISD::FRINT:
372   case ISD::FNEARBYINT:
373   case ISD::FROUND:
374   case ISD::FFLOOR:
375   case ISD::FP_ROUND:
376   case ISD::FP_EXTEND:
377   case ISD::FMA:
378   case ISD::SIGN_EXTEND_INREG:
379   case ISD::ANY_EXTEND_VECTOR_INREG:
380   case ISD::SIGN_EXTEND_VECTOR_INREG:
381   case ISD::ZERO_EXTEND_VECTOR_INREG:
382   case ISD::SMIN:
383   case ISD::SMAX:
384   case ISD::UMIN:
385   case ISD::UMAX:
386   case ISD::SMUL_LOHI:
387   case ISD::UMUL_LOHI:
388   case ISD::FCANONICALIZE:
389     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
390     break;
391   case ISD::FP_ROUND_INREG:
392     Action = TLI.getOperationAction(Node->getOpcode(),
393                cast<VTSDNode>(Node->getOperand(1))->getVT());
394     break;
395   case ISD::SINT_TO_FP:
396   case ISD::UINT_TO_FP:
397     Action = TLI.getOperationAction(Node->getOpcode(),
398                                     Node->getOperand(0).getValueType());
399     break;
400   }
401 
402   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
403 
404   switch (Action) {
405   default: llvm_unreachable("This action is not supported yet!");
406   case TargetLowering::Promote:
407     Result = Promote(Op);
408     Changed = true;
409     break;
410   case TargetLowering::Legal:
411     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
412     break;
413   case TargetLowering::Custom: {
414     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
415     if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
416       LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
417       Result = Tmp1;
418       break;
419     }
420     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
421     LLVM_FALLTHROUGH;
422   }
423   case TargetLowering::Expand:
424     Result = Expand(Op);
425   }
426 
427   // Make sure that the generated code is itself legal.
428   if (Result != Op) {
429     Result = LegalizeOp(Result);
430     Changed = true;
431   }
432 
433   // Note that LegalizeOp may be reentered even from single-use nodes, which
434   // means that we always must cache transformed nodes.
435   AddLegalizedOperand(Op, Result);
436   return Result;
437 }
438 
439 SDValue VectorLegalizer::Promote(SDValue Op) {
440   // For a few operations there is a specific concept for promotion based on
441   // the operand's type.
442   switch (Op.getOpcode()) {
443   case ISD::SINT_TO_FP:
444   case ISD::UINT_TO_FP:
445     // "Promote" the operation by extending the operand.
446     return PromoteINT_TO_FP(Op);
447   case ISD::FP_TO_UINT:
448   case ISD::FP_TO_SINT:
449     // Promote the operation by extending the operand.
450     return PromoteFP_TO_INT(Op);
451   }
452 
453   // There are currently two cases of vector promotion:
454   // 1) Bitcasting a vector of integers to a different type to a vector of the
455   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
456   // 2) Extending a vector of floats to a vector of the same number of larger
457   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
458   MVT VT = Op.getSimpleValueType();
459   assert(Op.getNode()->getNumValues() == 1 &&
460          "Can't promote a vector with multiple results!");
461   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
462   SDLoc dl(Op);
463   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
464 
465   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
466     if (Op.getOperand(j).getValueType().isVector())
467       if (Op.getOperand(j)
468               .getValueType()
469               .getVectorElementType()
470               .isFloatingPoint() &&
471           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
472         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
473       else
474         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
475     else
476       Operands[j] = Op.getOperand(j);
477   }
478 
479   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
480   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
481       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
482        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
483     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
484   else
485     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
486 }
487 
488 SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
489   // INT_TO_FP operations may require the input operand be promoted even
490   // when the type is otherwise legal.
491   MVT VT = Op.getOperand(0).getSimpleValueType();
492   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
493   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
494          "Vectors have different number of elements!");
495 
496   SDLoc dl(Op);
497   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
498 
499   unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
500     ISD::SIGN_EXTEND;
501   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
502     if (Op.getOperand(j).getValueType().isVector())
503       Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
504     else
505       Operands[j] = Op.getOperand(j);
506   }
507 
508   return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
509 }
510 
511 // For FP_TO_INT we promote the result type to a vector type with wider
512 // elements and then truncate the result.  This is different from the default
513 // PromoteVector which uses bitcast to promote thus assumning that the
514 // promoted vector type has the same overall size.
515 SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
516   MVT VT = Op.getSimpleValueType();
517   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
518   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
519          "Vectors have different number of elements!");
520 
521   unsigned NewOpc = Op->getOpcode();
522   // Change FP_TO_UINT to FP_TO_SINT if possible.
523   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
524   if (NewOpc == ISD::FP_TO_UINT &&
525       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
526     NewOpc = ISD::FP_TO_SINT;
527 
528   SDLoc dl(Op);
529   SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
530 
531   // Assert that the converted value fits in the original type.  If it doesn't
532   // (eg: because the value being converted is too big), then the result of the
533   // original operation was undefined anyway, so the assert is still correct.
534   Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
535                                                             : ISD::AssertSext,
536                          dl, NVT, Promoted,
537                          DAG.getValueType(VT.getScalarType()));
538   return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
539 }
540 
541 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
542   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
543 
544   EVT SrcVT = LD->getMemoryVT();
545   EVT SrcEltVT = SrcVT.getScalarType();
546   unsigned NumElem = SrcVT.getVectorNumElements();
547 
548   SDValue NewChain;
549   SDValue Value;
550   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
551     SDLoc dl(Op);
552 
553     SmallVector<SDValue, 8> Vals;
554     SmallVector<SDValue, 8> LoadChains;
555 
556     EVT DstEltVT = LD->getValueType(0).getScalarType();
557     SDValue Chain = LD->getChain();
558     SDValue BasePTR = LD->getBasePtr();
559     ISD::LoadExtType ExtType = LD->getExtensionType();
560 
561     // When elements in a vector is not byte-addressable, we cannot directly
562     // load each element by advancing pointer, which could only address bytes.
563     // Instead, we load all significant words, mask bits off, and concatenate
564     // them to form each element. Finally, they are extended to destination
565     // scalar type to build the destination vector.
566     EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
567 
568     assert(WideVT.isRound() &&
569            "Could not handle the sophisticated case when the widest integer is"
570            " not power of 2.");
571     assert(WideVT.bitsGE(SrcEltVT) &&
572            "Type is not legalized?");
573 
574     unsigned WideBytes = WideVT.getStoreSize();
575     unsigned Offset = 0;
576     unsigned RemainingBytes = SrcVT.getStoreSize();
577     SmallVector<SDValue, 8> LoadVals;
578     while (RemainingBytes > 0) {
579       SDValue ScalarLoad;
580       unsigned LoadBytes = WideBytes;
581 
582       if (RemainingBytes >= LoadBytes) {
583         ScalarLoad =
584             DAG.getLoad(WideVT, dl, Chain, BasePTR,
585                         LD->getPointerInfo().getWithOffset(Offset),
586                         MinAlign(LD->getAlignment(), Offset),
587                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
588       } else {
589         EVT LoadVT = WideVT;
590         while (RemainingBytes < LoadBytes) {
591           LoadBytes >>= 1; // Reduce the load size by half.
592           LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
593         }
594         ScalarLoad =
595             DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
596                            LD->getPointerInfo().getWithOffset(Offset), LoadVT,
597                            MinAlign(LD->getAlignment(), Offset),
598                            LD->getMemOperand()->getFlags(), LD->getAAInfo());
599       }
600 
601       RemainingBytes -= LoadBytes;
602       Offset += LoadBytes;
603 
604       BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
605 
606       LoadVals.push_back(ScalarLoad.getValue(0));
607       LoadChains.push_back(ScalarLoad.getValue(1));
608     }
609 
610     // Extract bits, pack and extend/trunc them into destination type.
611     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
612     SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
613 
614     unsigned BitOffset = 0;
615     unsigned WideIdx = 0;
616     unsigned WideBits = WideVT.getSizeInBits();
617 
618     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
619       SDValue Lo, Hi, ShAmt;
620 
621       if (BitOffset < WideBits) {
622         ShAmt = DAG.getConstant(
623             BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
624         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
625         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
626       }
627 
628       BitOffset += SrcEltBits;
629       if (BitOffset >= WideBits) {
630         WideIdx++;
631         BitOffset -= WideBits;
632         if (BitOffset > 0) {
633           ShAmt = DAG.getConstant(
634               SrcEltBits - BitOffset, dl,
635               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
636           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
637           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
638         }
639       }
640 
641       if (Hi.getNode())
642         Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
643 
644       switch (ExtType) {
645       default: llvm_unreachable("Unknown extended-load op!");
646       case ISD::EXTLOAD:
647         Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
648         break;
649       case ISD::ZEXTLOAD:
650         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
651         break;
652       case ISD::SEXTLOAD:
653         ShAmt =
654             DAG.getConstant(WideBits - SrcEltBits, dl,
655                             TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
656         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
657         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
658         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
659         break;
660       }
661       Vals.push_back(Lo);
662     }
663 
664     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
665     Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
666   } else {
667     SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
668     // Skip past MERGE_VALUE node if known.
669     if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
670       NewChain = Scalarized.getOperand(1);
671       Value = Scalarized.getOperand(0);
672     } else {
673       NewChain = Scalarized.getValue(1);
674       Value = Scalarized.getValue(0);
675     }
676   }
677 
678   AddLegalizedOperand(Op.getValue(0), Value);
679   AddLegalizedOperand(Op.getValue(1), NewChain);
680 
681   return (Op.getResNo() ? NewChain : Value);
682 }
683 
684 SDValue VectorLegalizer::ExpandStore(SDValue Op) {
685   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
686   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
687   AddLegalizedOperand(Op, TF);
688   return TF;
689 }
690 
691 SDValue VectorLegalizer::Expand(SDValue Op) {
692   switch (Op->getOpcode()) {
693   case ISD::SIGN_EXTEND_INREG:
694     return ExpandSEXTINREG(Op);
695   case ISD::ANY_EXTEND_VECTOR_INREG:
696     return ExpandANY_EXTEND_VECTOR_INREG(Op);
697   case ISD::SIGN_EXTEND_VECTOR_INREG:
698     return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
699   case ISD::ZERO_EXTEND_VECTOR_INREG:
700     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
701   case ISD::BSWAP:
702     return ExpandBSWAP(Op);
703   case ISD::VSELECT:
704     return ExpandVSELECT(Op);
705   case ISD::SELECT:
706     return ExpandSELECT(Op);
707   case ISD::UINT_TO_FP:
708     return ExpandUINT_TO_FLOAT(Op);
709   case ISD::FNEG:
710     return ExpandFNEG(Op);
711   case ISD::FSUB:
712     return ExpandFSUB(Op);
713   case ISD::SETCC:
714     return UnrollVSETCC(Op);
715   case ISD::BITREVERSE:
716     return ExpandBITREVERSE(Op);
717   case ISD::CTLZ:
718   case ISD::CTLZ_ZERO_UNDEF:
719     return ExpandCTLZ(Op);
720   case ISD::CTTZ:
721   case ISD::CTTZ_ZERO_UNDEF:
722     return ExpandCTTZ(Op);
723   case ISD::STRICT_FADD:
724   case ISD::STRICT_FSUB:
725   case ISD::STRICT_FMUL:
726   case ISD::STRICT_FDIV:
727   case ISD::STRICT_FREM:
728   case ISD::STRICT_FSQRT:
729   case ISD::STRICT_FMA:
730   case ISD::STRICT_FPOW:
731   case ISD::STRICT_FPOWI:
732   case ISD::STRICT_FSIN:
733   case ISD::STRICT_FCOS:
734   case ISD::STRICT_FEXP:
735   case ISD::STRICT_FEXP2:
736   case ISD::STRICT_FLOG:
737   case ISD::STRICT_FLOG10:
738   case ISD::STRICT_FLOG2:
739   case ISD::STRICT_FRINT:
740   case ISD::STRICT_FNEARBYINT:
741     return ExpandStrictFPOp(Op);
742   default:
743     return DAG.UnrollVectorOp(Op.getNode());
744   }
745 }
746 
747 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
748   // Lower a select instruction where the condition is a scalar and the
749   // operands are vectors. Lower this select to VSELECT and implement it
750   // using XOR AND OR. The selector bit is broadcasted.
751   EVT VT = Op.getValueType();
752   SDLoc DL(Op);
753 
754   SDValue Mask = Op.getOperand(0);
755   SDValue Op1 = Op.getOperand(1);
756   SDValue Op2 = Op.getOperand(2);
757 
758   assert(VT.isVector() && !Mask.getValueType().isVector()
759          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
760 
761   // If we can't even use the basic vector operations of
762   // AND,OR,XOR, we will have to scalarize the op.
763   // Notice that the operation may be 'promoted' which means that it is
764   // 'bitcasted' to another type which is handled.
765   // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
766   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
767       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
768       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
769       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
770     return DAG.UnrollVectorOp(Op.getNode());
771 
772   // Generate a mask operand.
773   EVT MaskTy = VT.changeVectorElementTypeToInteger();
774 
775   // What is the size of each element in the vector mask.
776   EVT BitTy = MaskTy.getScalarType();
777 
778   Mask = DAG.getSelect(DL, BitTy, Mask,
779           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
780                           BitTy),
781           DAG.getConstant(0, DL, BitTy));
782 
783   // Broadcast the mask so that the entire vector is all-one or all zero.
784   Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
785 
786   // Bitcast the operands to be the same type as the mask.
787   // This is needed when we select between FP types because
788   // the mask is a vector of integers.
789   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
790   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
791 
792   SDValue AllOnes = DAG.getConstant(
793             APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
794   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
795 
796   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
797   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
798   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
799   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
800 }
801 
802 SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
803   EVT VT = Op.getValueType();
804 
805   // Make sure that the SRA and SHL instructions are available.
806   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
807       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
808     return DAG.UnrollVectorOp(Op.getNode());
809 
810   SDLoc DL(Op);
811   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
812 
813   unsigned BW = VT.getScalarSizeInBits();
814   unsigned OrigBW = OrigTy.getScalarSizeInBits();
815   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
816 
817   Op = Op.getOperand(0);
818   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
819   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
820 }
821 
822 // Generically expand a vector anyext in register to a shuffle of the relevant
823 // lanes into the appropriate locations, with other lanes left undef.
824 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
825   SDLoc DL(Op);
826   EVT VT = Op.getValueType();
827   int NumElements = VT.getVectorNumElements();
828   SDValue Src = Op.getOperand(0);
829   EVT SrcVT = Src.getValueType();
830   int NumSrcElements = SrcVT.getVectorNumElements();
831 
832   // Build a base mask of undef shuffles.
833   SmallVector<int, 16> ShuffleMask;
834   ShuffleMask.resize(NumSrcElements, -1);
835 
836   // Place the extended lanes into the correct locations.
837   int ExtLaneScale = NumSrcElements / NumElements;
838   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
839   for (int i = 0; i < NumElements; ++i)
840     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
841 
842   return DAG.getNode(
843       ISD::BITCAST, DL, VT,
844       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
845 }
846 
847 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
848   SDLoc DL(Op);
849   EVT VT = Op.getValueType();
850   SDValue Src = Op.getOperand(0);
851   EVT SrcVT = Src.getValueType();
852 
853   // First build an any-extend node which can be legalized above when we
854   // recurse through it.
855   Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
856 
857   // Now we need sign extend. Do this by shifting the elements. Even if these
858   // aren't legal operations, they have a better chance of being legalized
859   // without full scalarization than the sign extension does.
860   unsigned EltWidth = VT.getScalarSizeInBits();
861   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
862   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
863   return DAG.getNode(ISD::SRA, DL, VT,
864                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
865                      ShiftAmount);
866 }
867 
868 // Generically expand a vector zext in register to a shuffle of the relevant
869 // lanes into the appropriate locations, a blend of zero into the high bits,
870 // and a bitcast to the wider element type.
871 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
872   SDLoc DL(Op);
873   EVT VT = Op.getValueType();
874   int NumElements = VT.getVectorNumElements();
875   SDValue Src = Op.getOperand(0);
876   EVT SrcVT = Src.getValueType();
877   int NumSrcElements = SrcVT.getVectorNumElements();
878 
879   // Build up a zero vector to blend into this one.
880   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
881 
882   // Shuffle the incoming lanes into the correct position, and pull all other
883   // lanes from the zero vector.
884   SmallVector<int, 16> ShuffleMask;
885   ShuffleMask.reserve(NumSrcElements);
886   for (int i = 0; i < NumSrcElements; ++i)
887     ShuffleMask.push_back(i);
888 
889   int ExtLaneScale = NumSrcElements / NumElements;
890   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
891   for (int i = 0; i < NumElements; ++i)
892     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
893 
894   return DAG.getNode(ISD::BITCAST, DL, VT,
895                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
896 }
897 
898 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
899   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
900   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
901     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
902       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
903 }
904 
905 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
906   EVT VT = Op.getValueType();
907 
908   // Generate a byte wise shuffle mask for the BSWAP.
909   SmallVector<int, 16> ShuffleMask;
910   createBSWAPShuffleMask(VT, ShuffleMask);
911   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
912 
913   // Only emit a shuffle if the mask is legal.
914   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
915     return DAG.UnrollVectorOp(Op.getNode());
916 
917   SDLoc DL(Op);
918   Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
919   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
920   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
921 }
922 
923 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
924   EVT VT = Op.getValueType();
925 
926   // If we have the scalar operation, it's probably cheaper to unroll it.
927   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
928     return DAG.UnrollVectorOp(Op.getNode());
929 
930   // If the vector element width is a whole number of bytes, test if its legal
931   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
932   // vector. This greatly reduces the number of bit shifts necessary.
933   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
934   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
935     SmallVector<int, 16> BSWAPMask;
936     createBSWAPShuffleMask(VT, BSWAPMask);
937 
938     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
939     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
940         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
941          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
942           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
943           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
944           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
945       SDLoc DL(Op);
946       Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
947       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
948                                 BSWAPMask);
949       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
950       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
951     }
952   }
953 
954   // If we have the appropriate vector bit operations, it is better to use them
955   // than unrolling and expanding each component.
956   if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
957       !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
958       !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
959       !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
960     return DAG.UnrollVectorOp(Op.getNode());
961 
962   // Let LegalizeDAG handle this later.
963   return Op;
964 }
965 
966 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
967   // Implement VSELECT in terms of XOR, AND, OR
968   // on platforms which do not support blend natively.
969   SDLoc DL(Op);
970 
971   SDValue Mask = Op.getOperand(0);
972   SDValue Op1 = Op.getOperand(1);
973   SDValue Op2 = Op.getOperand(2);
974 
975   EVT VT = Mask.getValueType();
976 
977   // If we can't even use the basic vector operations of
978   // AND,OR,XOR, we will have to scalarize the op.
979   // Notice that the operation may be 'promoted' which means that it is
980   // 'bitcasted' to another type which is handled.
981   // This operation also isn't safe with AND, OR, XOR when the boolean
982   // type is 0/1 as we need an all ones vector constant to mask with.
983   // FIXME: Sign extend 1 to all ones if thats legal on the target.
984   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
985       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
986       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
987       TLI.getBooleanContents(Op1.getValueType()) !=
988           TargetLowering::ZeroOrNegativeOneBooleanContent)
989     return DAG.UnrollVectorOp(Op.getNode());
990 
991   // If the mask and the type are different sizes, unroll the vector op. This
992   // can occur when getSetCCResultType returns something that is different in
993   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
994   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
995     return DAG.UnrollVectorOp(Op.getNode());
996 
997   // Bitcast the operands to be the same type as the mask.
998   // This is needed when we select between FP types because
999   // the mask is a vector of integers.
1000   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1001   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1002 
1003   SDValue AllOnes = DAG.getConstant(
1004     APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
1005   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
1006 
1007   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1008   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1009   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1010   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
1011 }
1012 
1013 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
1014   EVT VT = Op.getOperand(0).getValueType();
1015   SDLoc DL(Op);
1016 
1017   // Make sure that the SINT_TO_FP and SRL instructions are available.
1018   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
1019       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
1020     return DAG.UnrollVectorOp(Op.getNode());
1021 
1022   unsigned BW = VT.getScalarSizeInBits();
1023   assert((BW == 64 || BW == 32) &&
1024          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1025 
1026   SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1027 
1028   // Constants to clear the upper part of the word.
1029   // Notice that we can also use SHL+SHR, but using a constant is slightly
1030   // faster on x86.
1031   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1032   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1033 
1034   // Two to the power of half-word-size.
1035   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
1036 
1037   // Clear upper part of LO, lower HI
1038   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
1039   SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
1040 
1041   // Convert hi and lo to floats
1042   // Convert the hi part back to the upper values
1043   // TODO: Can any fast-math-flags be set on these nodes?
1044   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
1045           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
1046   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
1047 
1048   // Add the two halves
1049   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
1050 }
1051 
1052 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
1053   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
1054     SDLoc DL(Op);
1055     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
1056     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1057     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
1058                        Zero, Op.getOperand(0));
1059   }
1060   return DAG.UnrollVectorOp(Op.getNode());
1061 }
1062 
1063 SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
1064   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1065   // we can defer this to operation legalization where it will be lowered as
1066   // a+(-b).
1067   EVT VT = Op.getValueType();
1068   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1069       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1070     return Op; // Defer to LegalizeDAG
1071 
1072   return DAG.UnrollVectorOp(Op.getNode());
1073 }
1074 
1075 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
1076   EVT VT = Op.getValueType();
1077   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1078 
1079   // If the non-ZERO_UNDEF version is supported we can use that instead.
1080   if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
1081       TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
1082     SDLoc DL(Op);
1083     return DAG.getNode(ISD::CTLZ, DL, VT, Op.getOperand(0));
1084   }
1085 
1086   // If we have the appropriate vector bit operations, it is better to use them
1087   // than unrolling and expanding each component.
1088   if (isPowerOf2_32(NumBitsPerElt) &&
1089       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
1090       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1091       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1092     return Op;
1093 
1094   // Otherwise go ahead and unroll.
1095   return DAG.UnrollVectorOp(Op.getNode());
1096 }
1097 
1098 SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
1099   EVT VT = Op.getValueType();
1100   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1101 
1102   // If the non-ZERO_UNDEF version is supported we can use that instead.
1103   if (TLI.isOperationLegalOrCustom(ISD::CTTZ, VT)) {
1104     SDLoc DL(Op);
1105     return DAG.getNode(ISD::CTTZ, DL, VT, Op.getOperand(0));
1106   }
1107 
1108   // If we have the appropriate vector bit operations, it is better to use them
1109   // than unrolling and expanding each component.
1110   if (isPowerOf2_32(NumBitsPerElt) &&
1111       (TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) ||
1112        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) &&
1113       TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
1114       TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1115       TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT))
1116     return Op;
1117 
1118   // Otherwise go ahead and unroll.
1119   return DAG.UnrollVectorOp(Op.getNode());
1120 }
1121 
1122 SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
1123   EVT VT = Op.getValueType();
1124   EVT EltVT = VT.getVectorElementType();
1125   unsigned NumElems = VT.getVectorNumElements();
1126   unsigned NumOpers = Op.getNumOperands();
1127   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1128   EVT ValueVTs[] = {EltVT, MVT::Other};
1129   SDValue Chain = Op.getOperand(0);
1130   SDLoc dl(Op);
1131 
1132   SmallVector<SDValue, 32> OpValues;
1133   SmallVector<SDValue, 32> OpChains;
1134   for (unsigned i = 0; i < NumElems; ++i) {
1135     SmallVector<SDValue, 4> Opers;
1136     SDValue Idx = DAG.getConstant(i, dl,
1137                                   TLI.getVectorIdxTy(DAG.getDataLayout()));
1138 
1139     // The Chain is the first operand.
1140     Opers.push_back(Chain);
1141 
1142     // Now process the remaining operands.
1143     for (unsigned j = 1; j < NumOpers; ++j) {
1144       SDValue Oper = Op.getOperand(j);
1145       EVT OperVT = Oper.getValueType();
1146 
1147       if (OperVT.isVector())
1148         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1149                            EltVT, Oper, Idx);
1150 
1151       Opers.push_back(Oper);
1152     }
1153 
1154     SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
1155 
1156     OpValues.push_back(ScalarOp.getValue(0));
1157     OpChains.push_back(ScalarOp.getValue(1));
1158   }
1159 
1160   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1161   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1162 
1163   AddLegalizedOperand(Op.getValue(0), Result);
1164   AddLegalizedOperand(Op.getValue(1), NewChain);
1165 
1166   return Op.getResNo() ? NewChain : Result;
1167 }
1168 
1169 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
1170   EVT VT = Op.getValueType();
1171   unsigned NumElems = VT.getVectorNumElements();
1172   EVT EltVT = VT.getVectorElementType();
1173   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
1174   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1175   SDLoc dl(Op);
1176   SmallVector<SDValue, 8> Ops(NumElems);
1177   for (unsigned i = 0; i < NumElems; ++i) {
1178     SDValue LHSElem = DAG.getNode(
1179         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1180         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1181     SDValue RHSElem = DAG.getNode(
1182         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1183         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1184     Ops[i] = DAG.getNode(ISD::SETCC, dl,
1185                          TLI.getSetCCResultType(DAG.getDataLayout(),
1186                                                 *DAG.getContext(), TmpEltVT),
1187                          LHSElem, RHSElem, CC);
1188     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
1189                            DAG.getConstant(APInt::getAllOnesValue
1190                                            (EltVT.getSizeInBits()), dl, EltVT),
1191                            DAG.getConstant(0, dl, EltVT));
1192   }
1193   return DAG.getBuildVector(VT, dl, Ops);
1194 }
1195 
1196 bool SelectionDAG::LegalizeVectors() {
1197   return VectorLegalizer(*this).Run();
1198 }
1199