1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SelectionDAG::LegalizeVectors method.
11 //
12 // The vector legalizer looks for vector operations which might need to be
13 // scalarized and legalizes them. This is a separate step from Legalize because
14 // scalarizing can introduce illegal types.  For example, suppose we have an
15 // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
16 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
17 // operation, which introduces nodes with the illegal type i64 which must be
18 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
19 // the operation must be unrolled, which introduces nodes with the illegal
20 // type i8 which must be promoted.
21 //
22 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
23 // or operations that happen to take a vector which are custom-lowered;
24 // the legalization for such operations never produces nodes
25 // with illegal types, so it's okay to put off legalizing them until
26 // SelectionDAG::Legalize runs.
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "llvm/ADT/APInt.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/CodeGen/ISDOpcodes.h"
34 #include "llvm/CodeGen/MachineMemOperand.h"
35 #include "llvm/CodeGen/SelectionDAG.h"
36 #include "llvm/CodeGen/SelectionDAGNodes.h"
37 #include "llvm/CodeGen/TargetLowering.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MachineValueType.h"
44 #include "llvm/Support/MathExtras.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "legalizevectorops"
53 
54 namespace {
55 
56 class VectorLegalizer {
57   SelectionDAG& DAG;
58   const TargetLowering &TLI;
59   bool Changed = false; // Keep track of whether anything changed
60 
61   /// For nodes that are of legal width, and that have more than one use, this
62   /// map indicates what regularized operand to use.  This allows us to avoid
63   /// legalizing the same thing more than once.
64   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
65 
66   /// Adds a node to the translation cache.
67   void AddLegalizedOperand(SDValue From, SDValue To) {
68     LegalizedNodes.insert(std::make_pair(From, To));
69     // If someone requests legalization of the new node, return itself.
70     if (From != To)
71       LegalizedNodes.insert(std::make_pair(To, To));
72   }
73 
74   /// Legalizes the given node.
75   SDValue LegalizeOp(SDValue Op);
76 
77   /// Assuming the node is legal, "legalize" the results.
78   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
79 
80   /// Implements unrolling a VSETCC.
81   SDValue UnrollVSETCC(SDValue Op);
82 
83   /// Implement expand-based legalization of vector operations.
84   ///
85   /// This is just a high-level routine to dispatch to specific code paths for
86   /// operations to legalize them.
87   SDValue Expand(SDValue Op);
88 
89   /// Implements expansion for FNEG; falls back to UnrollVectorOp if
90   /// FSUB isn't legal.
91   ///
92   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
93   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
94   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
95 
96   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
97   SDValue ExpandSEXTINREG(SDValue Op);
98 
99   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
100   ///
101   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
102   /// type. The contents of the bits in the extended part of each element are
103   /// undef.
104   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
105 
106   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
107   ///
108   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
109   /// type, then shifts left and arithmetic shifts right to introduce a sign
110   /// extension.
111   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
112 
113   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
114   ///
115   /// Shuffles the low lanes of the operand into place and blends zeros into
116   /// the remaining lanes, finally bitcasting to the proper type.
117   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
118 
119   /// Expand bswap of vectors into a shuffle if legal.
120   SDValue ExpandBSWAP(SDValue Op);
121 
122   /// Implement vselect in terms of XOR, AND, OR when blend is not
123   /// supported by the target.
124   SDValue ExpandVSELECT(SDValue Op);
125   SDValue ExpandSELECT(SDValue Op);
126   SDValue ExpandLoad(SDValue Op);
127   SDValue ExpandStore(SDValue Op);
128   SDValue ExpandFNEG(SDValue Op);
129   SDValue ExpandFSUB(SDValue Op);
130   SDValue ExpandBITREVERSE(SDValue Op);
131   SDValue ExpandCTLZ(SDValue Op);
132   SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
133   SDValue ExpandStrictFPOp(SDValue Op);
134 
135   /// Implements vector promotion.
136   ///
137   /// This is essentially just bitcasting the operands to a different type and
138   /// bitcasting the result back to the original type.
139   SDValue Promote(SDValue Op);
140 
141   /// Implements [SU]INT_TO_FP vector promotion.
142   ///
143   /// This is a [zs]ext of the input operand to a larger integer type.
144   SDValue PromoteINT_TO_FP(SDValue Op);
145 
146   /// Implements FP_TO_[SU]INT vector promotion of the result type.
147   ///
148   /// It is promoted to a larger integer type.  The result is then
149   /// truncated back to the original type.
150   SDValue PromoteFP_TO_INT(SDValue Op);
151 
152 public:
153   VectorLegalizer(SelectionDAG& dag) :
154       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
155 
156   /// Begin legalizer the vector operations in the DAG.
157   bool Run();
158 };
159 
160 } // end anonymous namespace
161 
162 bool VectorLegalizer::Run() {
163   // Before we start legalizing vector nodes, check if there are any vectors.
164   bool HasVectors = false;
165   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
166        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
167     // Check if the values of the nodes contain vectors. We don't need to check
168     // the operands because we are going to check their values at some point.
169     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
170          J != E; ++J)
171       HasVectors |= J->isVector();
172 
173     // If we found a vector node we can start the legalization.
174     if (HasVectors)
175       break;
176   }
177 
178   // If this basic block has no vectors then no need to legalize vectors.
179   if (!HasVectors)
180     return false;
181 
182   // The legalize process is inherently a bottom-up recursive process (users
183   // legalize their uses before themselves).  Given infinite stack space, we
184   // could just start legalizing on the root and traverse the whole graph.  In
185   // practice however, this causes us to run out of stack space on large basic
186   // blocks.  To avoid this problem, compute an ordering of the nodes where each
187   // node is only legalized after all of its operands are legalized.
188   DAG.AssignTopologicalOrder();
189   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
190        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
191     LegalizeOp(SDValue(&*I, 0));
192 
193   // Finally, it's possible the root changed.  Get the new root.
194   SDValue OldRoot = DAG.getRoot();
195   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
196   DAG.setRoot(LegalizedNodes[OldRoot]);
197 
198   LegalizedNodes.clear();
199 
200   // Remove dead nodes now.
201   DAG.RemoveDeadNodes();
202 
203   return Changed;
204 }
205 
206 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
207   // Generic legalization: just pass the operand through.
208   for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
209     AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
210   return Result.getValue(Op.getResNo());
211 }
212 
213 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
214   // Note that LegalizeOp may be reentered even from single-use nodes, which
215   // means that we always must cache transformed nodes.
216   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
217   if (I != LegalizedNodes.end()) return I->second;
218 
219   SDNode* Node = Op.getNode();
220 
221   // Legalize the operands
222   SmallVector<SDValue, 8> Ops;
223   for (const SDValue &Op : Node->op_values())
224     Ops.push_back(LegalizeOp(Op));
225 
226   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
227                            Op.getResNo());
228 
229   if (Op.getOpcode() == ISD::LOAD) {
230     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
231     ISD::LoadExtType ExtType = LD->getExtensionType();
232     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
233       LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
234                  Node->dump(&DAG));
235       switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
236                                    LD->getMemoryVT())) {
237       default: llvm_unreachable("This action is not supported yet!");
238       case TargetLowering::Legal:
239         return TranslateLegalizeResults(Op, Result);
240       case TargetLowering::Custom:
241         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
242           assert(Lowered->getNumValues() == Op->getNumValues() &&
243                  "Unexpected number of results");
244           Changed = Lowered != Result;
245           return TranslateLegalizeResults(Op, Lowered);
246         }
247         LLVM_FALLTHROUGH;
248       case TargetLowering::Expand:
249         Changed = true;
250         return LegalizeOp(ExpandLoad(Op));
251       }
252     }
253   } else if (Op.getOpcode() == ISD::STORE) {
254     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
255     EVT StVT = ST->getMemoryVT();
256     MVT ValVT = ST->getValue().getSimpleValueType();
257     if (StVT.isVector() && ST->isTruncatingStore()) {
258       LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
259                  Node->dump(&DAG));
260       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
261       default: llvm_unreachable("This action is not supported yet!");
262       case TargetLowering::Legal:
263         return TranslateLegalizeResults(Op, Result);
264       case TargetLowering::Custom: {
265         SDValue Lowered = TLI.LowerOperation(Result, DAG);
266         Changed = Lowered != Result;
267         return TranslateLegalizeResults(Op, Lowered);
268       }
269       case TargetLowering::Expand:
270         Changed = true;
271         return LegalizeOp(ExpandStore(Op));
272       }
273     }
274   }
275 
276   bool HasVectorValue = false;
277   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
278        J != E;
279        ++J)
280     HasVectorValue |= J->isVector();
281   if (!HasVectorValue)
282     return TranslateLegalizeResults(Op, Result);
283 
284   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
285   switch (Op.getOpcode()) {
286   default:
287     return TranslateLegalizeResults(Op, Result);
288   case ISD::STRICT_FADD:
289   case ISD::STRICT_FSUB:
290   case ISD::STRICT_FMUL:
291   case ISD::STRICT_FDIV:
292   case ISD::STRICT_FREM:
293   case ISD::STRICT_FSQRT:
294   case ISD::STRICT_FMA:
295   case ISD::STRICT_FPOW:
296   case ISD::STRICT_FPOWI:
297   case ISD::STRICT_FSIN:
298   case ISD::STRICT_FCOS:
299   case ISD::STRICT_FEXP:
300   case ISD::STRICT_FEXP2:
301   case ISD::STRICT_FLOG:
302   case ISD::STRICT_FLOG10:
303   case ISD::STRICT_FLOG2:
304   case ISD::STRICT_FRINT:
305   case ISD::STRICT_FNEARBYINT:
306     // These pseudo-ops get legalized as if they were their non-strict
307     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
308     // is also legal, but if ISD::FSQRT requires expansion then so does
309     // ISD::STRICT_FSQRT.
310     Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
311                                             Node->getValueType(0));
312     break;
313   case ISD::ADD:
314   case ISD::SUB:
315   case ISD::MUL:
316   case ISD::SDIV:
317   case ISD::UDIV:
318   case ISD::SREM:
319   case ISD::UREM:
320   case ISD::SDIVREM:
321   case ISD::UDIVREM:
322   case ISD::FADD:
323   case ISD::FSUB:
324   case ISD::FMUL:
325   case ISD::FDIV:
326   case ISD::FREM:
327   case ISD::AND:
328   case ISD::OR:
329   case ISD::XOR:
330   case ISD::SHL:
331   case ISD::SRA:
332   case ISD::SRL:
333   case ISD::ROTL:
334   case ISD::ROTR:
335   case ISD::BSWAP:
336   case ISD::BITREVERSE:
337   case ISD::CTLZ:
338   case ISD::CTTZ:
339   case ISD::CTLZ_ZERO_UNDEF:
340   case ISD::CTTZ_ZERO_UNDEF:
341   case ISD::CTPOP:
342   case ISD::SELECT:
343   case ISD::VSELECT:
344   case ISD::SELECT_CC:
345   case ISD::SETCC:
346   case ISD::ZERO_EXTEND:
347   case ISD::ANY_EXTEND:
348   case ISD::TRUNCATE:
349   case ISD::SIGN_EXTEND:
350   case ISD::FP_TO_SINT:
351   case ISD::FP_TO_UINT:
352   case ISD::FNEG:
353   case ISD::FABS:
354   case ISD::FMINNUM:
355   case ISD::FMAXNUM:
356   case ISD::FMINNAN:
357   case ISD::FMAXNAN:
358   case ISD::FCOPYSIGN:
359   case ISD::FSQRT:
360   case ISD::FSIN:
361   case ISD::FCOS:
362   case ISD::FPOWI:
363   case ISD::FPOW:
364   case ISD::FLOG:
365   case ISD::FLOG2:
366   case ISD::FLOG10:
367   case ISD::FEXP:
368   case ISD::FEXP2:
369   case ISD::FCEIL:
370   case ISD::FTRUNC:
371   case ISD::FRINT:
372   case ISD::FNEARBYINT:
373   case ISD::FROUND:
374   case ISD::FFLOOR:
375   case ISD::FP_ROUND:
376   case ISD::FP_EXTEND:
377   case ISD::FMA:
378   case ISD::SIGN_EXTEND_INREG:
379   case ISD::ANY_EXTEND_VECTOR_INREG:
380   case ISD::SIGN_EXTEND_VECTOR_INREG:
381   case ISD::ZERO_EXTEND_VECTOR_INREG:
382   case ISD::SMIN:
383   case ISD::SMAX:
384   case ISD::UMIN:
385   case ISD::UMAX:
386   case ISD::SMUL_LOHI:
387   case ISD::UMUL_LOHI:
388   case ISD::FCANONICALIZE:
389     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
390     break;
391   case ISD::FP_ROUND_INREG:
392     Action = TLI.getOperationAction(Node->getOpcode(),
393                cast<VTSDNode>(Node->getOperand(1))->getVT());
394     break;
395   case ISD::SINT_TO_FP:
396   case ISD::UINT_TO_FP:
397     Action = TLI.getOperationAction(Node->getOpcode(),
398                                     Node->getOperand(0).getValueType());
399     break;
400   }
401 
402   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
403 
404   switch (Action) {
405   default: llvm_unreachable("This action is not supported yet!");
406   case TargetLowering::Promote:
407     Result = Promote(Op);
408     Changed = true;
409     break;
410   case TargetLowering::Legal:
411     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
412     break;
413   case TargetLowering::Custom: {
414     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
415     if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
416       LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
417       Result = Tmp1;
418       break;
419     }
420     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
421     LLVM_FALLTHROUGH;
422   }
423   case TargetLowering::Expand:
424     Result = Expand(Op);
425   }
426 
427   // Make sure that the generated code is itself legal.
428   if (Result != Op) {
429     Result = LegalizeOp(Result);
430     Changed = true;
431   }
432 
433   // Note that LegalizeOp may be reentered even from single-use nodes, which
434   // means that we always must cache transformed nodes.
435   AddLegalizedOperand(Op, Result);
436   return Result;
437 }
438 
439 SDValue VectorLegalizer::Promote(SDValue Op) {
440   // For a few operations there is a specific concept for promotion based on
441   // the operand's type.
442   switch (Op.getOpcode()) {
443   case ISD::SINT_TO_FP:
444   case ISD::UINT_TO_FP:
445     // "Promote" the operation by extending the operand.
446     return PromoteINT_TO_FP(Op);
447   case ISD::FP_TO_UINT:
448   case ISD::FP_TO_SINT:
449     // Promote the operation by extending the operand.
450     return PromoteFP_TO_INT(Op);
451   }
452 
453   // There are currently two cases of vector promotion:
454   // 1) Bitcasting a vector of integers to a different type to a vector of the
455   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
456   // 2) Extending a vector of floats to a vector of the same number of larger
457   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
458   MVT VT = Op.getSimpleValueType();
459   assert(Op.getNode()->getNumValues() == 1 &&
460          "Can't promote a vector with multiple results!");
461   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
462   SDLoc dl(Op);
463   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
464 
465   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
466     if (Op.getOperand(j).getValueType().isVector())
467       if (Op.getOperand(j)
468               .getValueType()
469               .getVectorElementType()
470               .isFloatingPoint() &&
471           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
472         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
473       else
474         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
475     else
476       Operands[j] = Op.getOperand(j);
477   }
478 
479   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
480   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
481       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
482        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
483     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
484   else
485     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
486 }
487 
488 SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
489   // INT_TO_FP operations may require the input operand be promoted even
490   // when the type is otherwise legal.
491   MVT VT = Op.getOperand(0).getSimpleValueType();
492   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
493   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
494          "Vectors have different number of elements!");
495 
496   SDLoc dl(Op);
497   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
498 
499   unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
500     ISD::SIGN_EXTEND;
501   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
502     if (Op.getOperand(j).getValueType().isVector())
503       Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
504     else
505       Operands[j] = Op.getOperand(j);
506   }
507 
508   return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
509 }
510 
511 // For FP_TO_INT we promote the result type to a vector type with wider
512 // elements and then truncate the result.  This is different from the default
513 // PromoteVector which uses bitcast to promote thus assumning that the
514 // promoted vector type has the same overall size.
515 SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
516   MVT VT = Op.getSimpleValueType();
517   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
518   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
519          "Vectors have different number of elements!");
520 
521   unsigned NewOpc = Op->getOpcode();
522   // Change FP_TO_UINT to FP_TO_SINT if possible.
523   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
524   if (NewOpc == ISD::FP_TO_UINT &&
525       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
526     NewOpc = ISD::FP_TO_SINT;
527 
528   SDLoc dl(Op);
529   SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
530 
531   // Assert that the converted value fits in the original type.  If it doesn't
532   // (eg: because the value being converted is too big), then the result of the
533   // original operation was undefined anyway, so the assert is still correct.
534   Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
535                                                             : ISD::AssertSext,
536                          dl, NVT, Promoted,
537                          DAG.getValueType(VT.getScalarType()));
538   return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
539 }
540 
541 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
542   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
543 
544   EVT SrcVT = LD->getMemoryVT();
545   EVT SrcEltVT = SrcVT.getScalarType();
546   unsigned NumElem = SrcVT.getVectorNumElements();
547 
548   SDValue NewChain;
549   SDValue Value;
550   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
551     SDLoc dl(Op);
552 
553     SmallVector<SDValue, 8> Vals;
554     SmallVector<SDValue, 8> LoadChains;
555 
556     EVT DstEltVT = LD->getValueType(0).getScalarType();
557     SDValue Chain = LD->getChain();
558     SDValue BasePTR = LD->getBasePtr();
559     ISD::LoadExtType ExtType = LD->getExtensionType();
560 
561     // When elements in a vector is not byte-addressable, we cannot directly
562     // load each element by advancing pointer, which could only address bytes.
563     // Instead, we load all significant words, mask bits off, and concatenate
564     // them to form each element. Finally, they are extended to destination
565     // scalar type to build the destination vector.
566     EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
567 
568     assert(WideVT.isRound() &&
569            "Could not handle the sophisticated case when the widest integer is"
570            " not power of 2.");
571     assert(WideVT.bitsGE(SrcEltVT) &&
572            "Type is not legalized?");
573 
574     unsigned WideBytes = WideVT.getStoreSize();
575     unsigned Offset = 0;
576     unsigned RemainingBytes = SrcVT.getStoreSize();
577     SmallVector<SDValue, 8> LoadVals;
578     while (RemainingBytes > 0) {
579       SDValue ScalarLoad;
580       unsigned LoadBytes = WideBytes;
581 
582       if (RemainingBytes >= LoadBytes) {
583         ScalarLoad =
584             DAG.getLoad(WideVT, dl, Chain, BasePTR,
585                         LD->getPointerInfo().getWithOffset(Offset),
586                         MinAlign(LD->getAlignment(), Offset),
587                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
588       } else {
589         EVT LoadVT = WideVT;
590         while (RemainingBytes < LoadBytes) {
591           LoadBytes >>= 1; // Reduce the load size by half.
592           LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
593         }
594         ScalarLoad =
595             DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
596                            LD->getPointerInfo().getWithOffset(Offset), LoadVT,
597                            MinAlign(LD->getAlignment(), Offset),
598                            LD->getMemOperand()->getFlags(), LD->getAAInfo());
599       }
600 
601       RemainingBytes -= LoadBytes;
602       Offset += LoadBytes;
603 
604       BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
605 
606       LoadVals.push_back(ScalarLoad.getValue(0));
607       LoadChains.push_back(ScalarLoad.getValue(1));
608     }
609 
610     // Extract bits, pack and extend/trunc them into destination type.
611     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
612     SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
613 
614     unsigned BitOffset = 0;
615     unsigned WideIdx = 0;
616     unsigned WideBits = WideVT.getSizeInBits();
617 
618     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
619       SDValue Lo, Hi, ShAmt;
620 
621       if (BitOffset < WideBits) {
622         ShAmt = DAG.getConstant(
623             BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
624         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
625         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
626       }
627 
628       BitOffset += SrcEltBits;
629       if (BitOffset >= WideBits) {
630         WideIdx++;
631         BitOffset -= WideBits;
632         if (BitOffset > 0) {
633           ShAmt = DAG.getConstant(
634               SrcEltBits - BitOffset, dl,
635               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
636           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
637           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
638         }
639       }
640 
641       if (Hi.getNode())
642         Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
643 
644       switch (ExtType) {
645       default: llvm_unreachable("Unknown extended-load op!");
646       case ISD::EXTLOAD:
647         Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
648         break;
649       case ISD::ZEXTLOAD:
650         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
651         break;
652       case ISD::SEXTLOAD:
653         ShAmt =
654             DAG.getConstant(WideBits - SrcEltBits, dl,
655                             TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
656         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
657         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
658         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
659         break;
660       }
661       Vals.push_back(Lo);
662     }
663 
664     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
665     Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
666   } else {
667     SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
668     // Skip past MERGE_VALUE node if known.
669     if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
670       NewChain = Scalarized.getOperand(1);
671       Value = Scalarized.getOperand(0);
672     } else {
673       NewChain = Scalarized.getValue(1);
674       Value = Scalarized.getValue(0);
675     }
676   }
677 
678   AddLegalizedOperand(Op.getValue(0), Value);
679   AddLegalizedOperand(Op.getValue(1), NewChain);
680 
681   return (Op.getResNo() ? NewChain : Value);
682 }
683 
684 SDValue VectorLegalizer::ExpandStore(SDValue Op) {
685   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
686   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
687   AddLegalizedOperand(Op, TF);
688   return TF;
689 }
690 
691 SDValue VectorLegalizer::Expand(SDValue Op) {
692   switch (Op->getOpcode()) {
693   case ISD::SIGN_EXTEND_INREG:
694     return ExpandSEXTINREG(Op);
695   case ISD::ANY_EXTEND_VECTOR_INREG:
696     return ExpandANY_EXTEND_VECTOR_INREG(Op);
697   case ISD::SIGN_EXTEND_VECTOR_INREG:
698     return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
699   case ISD::ZERO_EXTEND_VECTOR_INREG:
700     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
701   case ISD::BSWAP:
702     return ExpandBSWAP(Op);
703   case ISD::VSELECT:
704     return ExpandVSELECT(Op);
705   case ISD::SELECT:
706     return ExpandSELECT(Op);
707   case ISD::UINT_TO_FP:
708     return ExpandUINT_TO_FLOAT(Op);
709   case ISD::FNEG:
710     return ExpandFNEG(Op);
711   case ISD::FSUB:
712     return ExpandFSUB(Op);
713   case ISD::SETCC:
714     return UnrollVSETCC(Op);
715   case ISD::BITREVERSE:
716     return ExpandBITREVERSE(Op);
717   case ISD::CTLZ:
718   case ISD::CTLZ_ZERO_UNDEF:
719     return ExpandCTLZ(Op);
720   case ISD::CTTZ_ZERO_UNDEF:
721     return ExpandCTTZ_ZERO_UNDEF(Op);
722   case ISD::STRICT_FADD:
723   case ISD::STRICT_FSUB:
724   case ISD::STRICT_FMUL:
725   case ISD::STRICT_FDIV:
726   case ISD::STRICT_FREM:
727   case ISD::STRICT_FSQRT:
728   case ISD::STRICT_FMA:
729   case ISD::STRICT_FPOW:
730   case ISD::STRICT_FPOWI:
731   case ISD::STRICT_FSIN:
732   case ISD::STRICT_FCOS:
733   case ISD::STRICT_FEXP:
734   case ISD::STRICT_FEXP2:
735   case ISD::STRICT_FLOG:
736   case ISD::STRICT_FLOG10:
737   case ISD::STRICT_FLOG2:
738   case ISD::STRICT_FRINT:
739   case ISD::STRICT_FNEARBYINT:
740     return ExpandStrictFPOp(Op);
741   default:
742     return DAG.UnrollVectorOp(Op.getNode());
743   }
744 }
745 
746 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
747   // Lower a select instruction where the condition is a scalar and the
748   // operands are vectors. Lower this select to VSELECT and implement it
749   // using XOR AND OR. The selector bit is broadcasted.
750   EVT VT = Op.getValueType();
751   SDLoc DL(Op);
752 
753   SDValue Mask = Op.getOperand(0);
754   SDValue Op1 = Op.getOperand(1);
755   SDValue Op2 = Op.getOperand(2);
756 
757   assert(VT.isVector() && !Mask.getValueType().isVector()
758          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
759 
760   // If we can't even use the basic vector operations of
761   // AND,OR,XOR, we will have to scalarize the op.
762   // Notice that the operation may be 'promoted' which means that it is
763   // 'bitcasted' to another type which is handled.
764   // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
765   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
766       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
767       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
768       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
769     return DAG.UnrollVectorOp(Op.getNode());
770 
771   // Generate a mask operand.
772   EVT MaskTy = VT.changeVectorElementTypeToInteger();
773 
774   // What is the size of each element in the vector mask.
775   EVT BitTy = MaskTy.getScalarType();
776 
777   Mask = DAG.getSelect(DL, BitTy, Mask,
778           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
779                           BitTy),
780           DAG.getConstant(0, DL, BitTy));
781 
782   // Broadcast the mask so that the entire vector is all-one or all zero.
783   Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
784 
785   // Bitcast the operands to be the same type as the mask.
786   // This is needed when we select between FP types because
787   // the mask is a vector of integers.
788   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
789   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
790 
791   SDValue AllOnes = DAG.getConstant(
792             APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
793   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
794 
795   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
796   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
797   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
798   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
799 }
800 
801 SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
802   EVT VT = Op.getValueType();
803 
804   // Make sure that the SRA and SHL instructions are available.
805   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
806       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
807     return DAG.UnrollVectorOp(Op.getNode());
808 
809   SDLoc DL(Op);
810   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
811 
812   unsigned BW = VT.getScalarSizeInBits();
813   unsigned OrigBW = OrigTy.getScalarSizeInBits();
814   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
815 
816   Op = Op.getOperand(0);
817   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
818   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
819 }
820 
821 // Generically expand a vector anyext in register to a shuffle of the relevant
822 // lanes into the appropriate locations, with other lanes left undef.
823 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
824   SDLoc DL(Op);
825   EVT VT = Op.getValueType();
826   int NumElements = VT.getVectorNumElements();
827   SDValue Src = Op.getOperand(0);
828   EVT SrcVT = Src.getValueType();
829   int NumSrcElements = SrcVT.getVectorNumElements();
830 
831   // Build a base mask of undef shuffles.
832   SmallVector<int, 16> ShuffleMask;
833   ShuffleMask.resize(NumSrcElements, -1);
834 
835   // Place the extended lanes into the correct locations.
836   int ExtLaneScale = NumSrcElements / NumElements;
837   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
838   for (int i = 0; i < NumElements; ++i)
839     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
840 
841   return DAG.getNode(
842       ISD::BITCAST, DL, VT,
843       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
844 }
845 
846 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
847   SDLoc DL(Op);
848   EVT VT = Op.getValueType();
849   SDValue Src = Op.getOperand(0);
850   EVT SrcVT = Src.getValueType();
851 
852   // First build an any-extend node which can be legalized above when we
853   // recurse through it.
854   Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
855 
856   // Now we need sign extend. Do this by shifting the elements. Even if these
857   // aren't legal operations, they have a better chance of being legalized
858   // without full scalarization than the sign extension does.
859   unsigned EltWidth = VT.getScalarSizeInBits();
860   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
861   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
862   return DAG.getNode(ISD::SRA, DL, VT,
863                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
864                      ShiftAmount);
865 }
866 
867 // Generically expand a vector zext in register to a shuffle of the relevant
868 // lanes into the appropriate locations, a blend of zero into the high bits,
869 // and a bitcast to the wider element type.
870 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
871   SDLoc DL(Op);
872   EVT VT = Op.getValueType();
873   int NumElements = VT.getVectorNumElements();
874   SDValue Src = Op.getOperand(0);
875   EVT SrcVT = Src.getValueType();
876   int NumSrcElements = SrcVT.getVectorNumElements();
877 
878   // Build up a zero vector to blend into this one.
879   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
880 
881   // Shuffle the incoming lanes into the correct position, and pull all other
882   // lanes from the zero vector.
883   SmallVector<int, 16> ShuffleMask;
884   ShuffleMask.reserve(NumSrcElements);
885   for (int i = 0; i < NumSrcElements; ++i)
886     ShuffleMask.push_back(i);
887 
888   int ExtLaneScale = NumSrcElements / NumElements;
889   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
890   for (int i = 0; i < NumElements; ++i)
891     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
892 
893   return DAG.getNode(ISD::BITCAST, DL, VT,
894                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
895 }
896 
897 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
898   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
899   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
900     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
901       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
902 }
903 
904 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
905   EVT VT = Op.getValueType();
906 
907   // Generate a byte wise shuffle mask for the BSWAP.
908   SmallVector<int, 16> ShuffleMask;
909   createBSWAPShuffleMask(VT, ShuffleMask);
910   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
911 
912   // Only emit a shuffle if the mask is legal.
913   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
914     return DAG.UnrollVectorOp(Op.getNode());
915 
916   SDLoc DL(Op);
917   Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
918   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
919   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
920 }
921 
922 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
923   EVT VT = Op.getValueType();
924 
925   // If we have the scalar operation, it's probably cheaper to unroll it.
926   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
927     return DAG.UnrollVectorOp(Op.getNode());
928 
929   // If the vector element width is a whole number of bytes, test if its legal
930   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
931   // vector. This greatly reduces the number of bit shifts necessary.
932   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
933   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
934     SmallVector<int, 16> BSWAPMask;
935     createBSWAPShuffleMask(VT, BSWAPMask);
936 
937     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
938     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
939         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
940          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
941           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
942           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
943           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
944       SDLoc DL(Op);
945       Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
946       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
947                                 BSWAPMask);
948       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
949       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
950     }
951   }
952 
953   // If we have the appropriate vector bit operations, it is better to use them
954   // than unrolling and expanding each component.
955   if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
956       !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
957       !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
958       !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
959     return DAG.UnrollVectorOp(Op.getNode());
960 
961   // Let LegalizeDAG handle this later.
962   return Op;
963 }
964 
965 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
966   // Implement VSELECT in terms of XOR, AND, OR
967   // on platforms which do not support blend natively.
968   SDLoc DL(Op);
969 
970   SDValue Mask = Op.getOperand(0);
971   SDValue Op1 = Op.getOperand(1);
972   SDValue Op2 = Op.getOperand(2);
973 
974   EVT VT = Mask.getValueType();
975 
976   // If we can't even use the basic vector operations of
977   // AND,OR,XOR, we will have to scalarize the op.
978   // Notice that the operation may be 'promoted' which means that it is
979   // 'bitcasted' to another type which is handled.
980   // This operation also isn't safe with AND, OR, XOR when the boolean
981   // type is 0/1 as we need an all ones vector constant to mask with.
982   // FIXME: Sign extend 1 to all ones if thats legal on the target.
983   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
984       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
985       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
986       TLI.getBooleanContents(Op1.getValueType()) !=
987           TargetLowering::ZeroOrNegativeOneBooleanContent)
988     return DAG.UnrollVectorOp(Op.getNode());
989 
990   // If the mask and the type are different sizes, unroll the vector op. This
991   // can occur when getSetCCResultType returns something that is different in
992   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
993   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
994     return DAG.UnrollVectorOp(Op.getNode());
995 
996   // Bitcast the operands to be the same type as the mask.
997   // This is needed when we select between FP types because
998   // the mask is a vector of integers.
999   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1000   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1001 
1002   SDValue AllOnes = DAG.getConstant(
1003     APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
1004   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
1005 
1006   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1007   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1008   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1009   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
1010 }
1011 
1012 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
1013   EVT VT = Op.getOperand(0).getValueType();
1014   SDLoc DL(Op);
1015 
1016   // Make sure that the SINT_TO_FP and SRL instructions are available.
1017   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
1018       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
1019     return DAG.UnrollVectorOp(Op.getNode());
1020 
1021   unsigned BW = VT.getScalarSizeInBits();
1022   assert((BW == 64 || BW == 32) &&
1023          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1024 
1025   SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1026 
1027   // Constants to clear the upper part of the word.
1028   // Notice that we can also use SHL+SHR, but using a constant is slightly
1029   // faster on x86.
1030   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1031   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1032 
1033   // Two to the power of half-word-size.
1034   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
1035 
1036   // Clear upper part of LO, lower HI
1037   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
1038   SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
1039 
1040   // Convert hi and lo to floats
1041   // Convert the hi part back to the upper values
1042   // TODO: Can any fast-math-flags be set on these nodes?
1043   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
1044           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
1045   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
1046 
1047   // Add the two halves
1048   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
1049 }
1050 
1051 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
1052   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
1053     SDLoc DL(Op);
1054     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
1055     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1056     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
1057                        Zero, Op.getOperand(0));
1058   }
1059   return DAG.UnrollVectorOp(Op.getNode());
1060 }
1061 
1062 SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
1063   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1064   // we can defer this to operation legalization where it will be lowered as
1065   // a+(-b).
1066   EVT VT = Op.getValueType();
1067   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1068       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1069     return Op; // Defer to LegalizeDAG
1070 
1071   return DAG.UnrollVectorOp(Op.getNode());
1072 }
1073 
1074 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
1075   EVT VT = Op.getValueType();
1076   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1077 
1078   // If the non-ZERO_UNDEF version is supported we can use that instead.
1079   if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
1080       TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
1081     SDLoc DL(Op);
1082     return DAG.getNode(ISD::CTLZ, DL, VT, Op.getOperand(0));
1083   }
1084 
1085   // If we have the appropriate vector bit operations, it is better to use them
1086   // than unrolling and expanding each component.
1087   if (isPowerOf2_32(NumBitsPerElt) &&
1088       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
1089       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1090       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1091     return Op;
1092 
1093   // Otherwise go ahead and unroll.
1094   return DAG.UnrollVectorOp(Op.getNode());
1095 }
1096 
1097 SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
1098   EVT VT = Op.getValueType();
1099 
1100   // If the non-ZERO_UNDEF version is supported we can use that instead.
1101   if (TLI.isOperationLegalOrCustom(ISD::CTTZ, VT)) {
1102     SDLoc DL(Op);
1103     return DAG.getNode(ISD::CTTZ, DL, VT, Op.getOperand(0));
1104   }
1105 
1106   // Otherwise go ahead and unroll.
1107   return DAG.UnrollVectorOp(Op.getNode());
1108 }
1109 
1110 SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
1111   EVT VT = Op.getValueType();
1112   EVT EltVT = VT.getVectorElementType();
1113   unsigned NumElems = VT.getVectorNumElements();
1114   unsigned NumOpers = Op.getNumOperands();
1115   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1116   EVT ValueVTs[] = {EltVT, MVT::Other};
1117   SDValue Chain = Op.getOperand(0);
1118   SDLoc dl(Op);
1119 
1120   SmallVector<SDValue, 32> OpValues;
1121   SmallVector<SDValue, 32> OpChains;
1122   for (unsigned i = 0; i < NumElems; ++i) {
1123     SmallVector<SDValue, 4> Opers;
1124     SDValue Idx = DAG.getConstant(i, dl,
1125                                   TLI.getVectorIdxTy(DAG.getDataLayout()));
1126 
1127     // The Chain is the first operand.
1128     Opers.push_back(Chain);
1129 
1130     // Now process the remaining operands.
1131     for (unsigned j = 1; j < NumOpers; ++j) {
1132       SDValue Oper = Op.getOperand(j);
1133       EVT OperVT = Oper.getValueType();
1134 
1135       if (OperVT.isVector())
1136         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1137                            EltVT, Oper, Idx);
1138 
1139       Opers.push_back(Oper);
1140     }
1141 
1142     SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
1143 
1144     OpValues.push_back(ScalarOp.getValue(0));
1145     OpChains.push_back(ScalarOp.getValue(1));
1146   }
1147 
1148   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1149   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1150 
1151   AddLegalizedOperand(Op.getValue(0), Result);
1152   AddLegalizedOperand(Op.getValue(1), NewChain);
1153 
1154   return Op.getResNo() ? NewChain : Result;
1155 }
1156 
1157 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
1158   EVT VT = Op.getValueType();
1159   unsigned NumElems = VT.getVectorNumElements();
1160   EVT EltVT = VT.getVectorElementType();
1161   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
1162   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1163   SDLoc dl(Op);
1164   SmallVector<SDValue, 8> Ops(NumElems);
1165   for (unsigned i = 0; i < NumElems; ++i) {
1166     SDValue LHSElem = DAG.getNode(
1167         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1168         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1169     SDValue RHSElem = DAG.getNode(
1170         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1171         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1172     Ops[i] = DAG.getNode(ISD::SETCC, dl,
1173                          TLI.getSetCCResultType(DAG.getDataLayout(),
1174                                                 *DAG.getContext(), TmpEltVT),
1175                          LHSElem, RHSElem, CC);
1176     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
1177                            DAG.getConstant(APInt::getAllOnesValue
1178                                            (EltVT.getSizeInBits()), dl, EltVT),
1179                            DAG.getConstant(0, dl, EltVT));
1180   }
1181   return DAG.getBuildVector(VT, dl, Ops);
1182 }
1183 
1184 bool SelectionDAG::LegalizeVectors() {
1185   return VectorLegalizer(*this).Run();
1186 }
1187