1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SelectionDAG::LegalizeVectors method. 10 // 11 // The vector legalizer looks for vector operations which might need to be 12 // scalarized and legalizes them. This is a separate step from Legalize because 13 // scalarizing can introduce illegal types. For example, suppose we have an 14 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition 15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the 16 // operation, which introduces nodes with the illegal type i64 which must be 17 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; 18 // the operation must be unrolled, which introduces nodes with the illegal 19 // type i8 which must be promoted. 20 // 21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR, 22 // or operations that happen to take a vector which are custom-lowered; 23 // the legalization for such operations never produces nodes 24 // with illegal types, so it's okay to put off legalizing them until 25 // SelectionDAG::Legalize runs. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "llvm/ADT/APInt.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/CodeGen/ISDOpcodes.h" 33 #include "llvm/CodeGen/MachineMemOperand.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/TargetLowering.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/DataLayout.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/Compiler.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/MachineValueType.h" 43 #include "llvm/Support/MathExtras.h" 44 #include <cassert> 45 #include <cstdint> 46 #include <iterator> 47 #include <utility> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "legalizevectorops" 52 53 namespace { 54 55 class VectorLegalizer { 56 SelectionDAG& DAG; 57 const TargetLowering &TLI; 58 bool Changed = false; // Keep track of whether anything changed 59 60 /// For nodes that are of legal width, and that have more than one use, this 61 /// map indicates what regularized operand to use. This allows us to avoid 62 /// legalizing the same thing more than once. 63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; 64 65 /// Adds a node to the translation cache. 66 void AddLegalizedOperand(SDValue From, SDValue To) { 67 LegalizedNodes.insert(std::make_pair(From, To)); 68 // If someone requests legalization of the new node, return itself. 69 if (From != To) 70 LegalizedNodes.insert(std::make_pair(To, To)); 71 } 72 73 /// Legalizes the given node. 74 SDValue LegalizeOp(SDValue Op); 75 76 /// Assuming the node is legal, "legalize" the results. 77 SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); 78 79 /// Implements unrolling a VSETCC. 80 SDValue UnrollVSETCC(SDValue Op); 81 82 /// Implement expand-based legalization of vector operations. 83 /// 84 /// This is just a high-level routine to dispatch to specific code paths for 85 /// operations to legalize them. 86 SDValue Expand(SDValue Op); 87 88 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if 89 /// FP_TO_SINT isn't legal. 90 SDValue ExpandFP_TO_UINT(SDValue Op); 91 92 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if 93 /// SINT_TO_FLOAT and SHR on vectors isn't legal. 94 SDValue ExpandUINT_TO_FLOAT(SDValue Op); 95 96 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. 97 SDValue ExpandSEXTINREG(SDValue Op); 98 99 /// Implement expansion for ANY_EXTEND_VECTOR_INREG. 100 /// 101 /// Shuffles the low lanes of the operand into place and bitcasts to the proper 102 /// type. The contents of the bits in the extended part of each element are 103 /// undef. 104 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); 105 106 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. 107 /// 108 /// Shuffles the low lanes of the operand into place, bitcasts to the proper 109 /// type, then shifts left and arithmetic shifts right to introduce a sign 110 /// extension. 111 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); 112 113 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. 114 /// 115 /// Shuffles the low lanes of the operand into place and blends zeros into 116 /// the remaining lanes, finally bitcasting to the proper type. 117 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); 118 119 /// Implement expand-based legalization of ABS vector operations. 120 /// If following expanding is legal/custom then do it: 121 /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1)) 122 /// else unroll the operation. 123 SDValue ExpandABS(SDValue Op); 124 125 /// Expand bswap of vectors into a shuffle if legal. 126 SDValue ExpandBSWAP(SDValue Op); 127 128 /// Implement vselect in terms of XOR, AND, OR when blend is not 129 /// supported by the target. 130 SDValue ExpandVSELECT(SDValue Op); 131 SDValue ExpandSELECT(SDValue Op); 132 SDValue ExpandLoad(SDValue Op); 133 SDValue ExpandStore(SDValue Op); 134 SDValue ExpandFNEG(SDValue Op); 135 SDValue ExpandFSUB(SDValue Op); 136 SDValue ExpandBITREVERSE(SDValue Op); 137 SDValue ExpandCTPOP(SDValue Op); 138 SDValue ExpandCTLZ(SDValue Op); 139 SDValue ExpandCTTZ(SDValue Op); 140 SDValue ExpandFunnelShift(SDValue Op); 141 SDValue ExpandROT(SDValue Op); 142 SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); 143 SDValue ExpandAddSubSat(SDValue Op); 144 SDValue ExpandStrictFPOp(SDValue Op); 145 146 /// Implements vector promotion. 147 /// 148 /// This is essentially just bitcasting the operands to a different type and 149 /// bitcasting the result back to the original type. 150 SDValue Promote(SDValue Op); 151 152 /// Implements [SU]INT_TO_FP vector promotion. 153 /// 154 /// This is a [zs]ext of the input operand to a larger integer type. 155 SDValue PromoteINT_TO_FP(SDValue Op); 156 157 /// Implements FP_TO_[SU]INT vector promotion of the result type. 158 /// 159 /// It is promoted to a larger integer type. The result is then 160 /// truncated back to the original type. 161 SDValue PromoteFP_TO_INT(SDValue Op); 162 163 public: 164 VectorLegalizer(SelectionDAG& dag) : 165 DAG(dag), TLI(dag.getTargetLoweringInfo()) {} 166 167 /// Begin legalizer the vector operations in the DAG. 168 bool Run(); 169 }; 170 171 } // end anonymous namespace 172 173 bool VectorLegalizer::Run() { 174 // Before we start legalizing vector nodes, check if there are any vectors. 175 bool HasVectors = false; 176 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 177 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { 178 // Check if the values of the nodes contain vectors. We don't need to check 179 // the operands because we are going to check their values at some point. 180 for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); 181 J != E; ++J) 182 HasVectors |= J->isVector(); 183 184 // If we found a vector node we can start the legalization. 185 if (HasVectors) 186 break; 187 } 188 189 // If this basic block has no vectors then no need to legalize vectors. 190 if (!HasVectors) 191 return false; 192 193 // The legalize process is inherently a bottom-up recursive process (users 194 // legalize their uses before themselves). Given infinite stack space, we 195 // could just start legalizing on the root and traverse the whole graph. In 196 // practice however, this causes us to run out of stack space on large basic 197 // blocks. To avoid this problem, compute an ordering of the nodes where each 198 // node is only legalized after all of its operands are legalized. 199 DAG.AssignTopologicalOrder(); 200 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 201 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) 202 LegalizeOp(SDValue(&*I, 0)); 203 204 // Finally, it's possible the root changed. Get the new root. 205 SDValue OldRoot = DAG.getRoot(); 206 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); 207 DAG.setRoot(LegalizedNodes[OldRoot]); 208 209 LegalizedNodes.clear(); 210 211 // Remove dead nodes now. 212 DAG.RemoveDeadNodes(); 213 214 return Changed; 215 } 216 217 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { 218 // Generic legalization: just pass the operand through. 219 for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) 220 AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); 221 return Result.getValue(Op.getResNo()); 222 } 223 224 SDValue VectorLegalizer::LegalizeOp(SDValue Op) { 225 // Note that LegalizeOp may be reentered even from single-use nodes, which 226 // means that we always must cache transformed nodes. 227 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); 228 if (I != LegalizedNodes.end()) return I->second; 229 230 SDNode* Node = Op.getNode(); 231 232 // Legalize the operands 233 SmallVector<SDValue, 8> Ops; 234 for (const SDValue &Op : Node->op_values()) 235 Ops.push_back(LegalizeOp(Op)); 236 237 SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 238 Op.getResNo()); 239 240 if (Op.getOpcode() == ISD::LOAD) { 241 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); 242 ISD::LoadExtType ExtType = LD->getExtensionType(); 243 if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { 244 LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; 245 Node->dump(&DAG)); 246 switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), 247 LD->getMemoryVT())) { 248 default: llvm_unreachable("This action is not supported yet!"); 249 case TargetLowering::Legal: 250 return TranslateLegalizeResults(Op, Result); 251 case TargetLowering::Custom: 252 if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { 253 assert(Lowered->getNumValues() == Op->getNumValues() && 254 "Unexpected number of results"); 255 if (Lowered != Result) { 256 // Make sure the new code is also legal. 257 Lowered = LegalizeOp(Lowered); 258 Changed = true; 259 } 260 return TranslateLegalizeResults(Op, Lowered); 261 } 262 LLVM_FALLTHROUGH; 263 case TargetLowering::Expand: 264 Changed = true; 265 return LegalizeOp(ExpandLoad(Op)); 266 } 267 } 268 } else if (Op.getOpcode() == ISD::STORE) { 269 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); 270 EVT StVT = ST->getMemoryVT(); 271 MVT ValVT = ST->getValue().getSimpleValueType(); 272 if (StVT.isVector() && ST->isTruncatingStore()) { 273 LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: "; 274 Node->dump(&DAG)); 275 switch (TLI.getTruncStoreAction(ValVT, StVT)) { 276 default: llvm_unreachable("This action is not supported yet!"); 277 case TargetLowering::Legal: 278 return TranslateLegalizeResults(Op, Result); 279 case TargetLowering::Custom: { 280 SDValue Lowered = TLI.LowerOperation(Result, DAG); 281 if (Lowered != Result) { 282 // Make sure the new code is also legal. 283 Lowered = LegalizeOp(Lowered); 284 Changed = true; 285 } 286 return TranslateLegalizeResults(Op, Lowered); 287 } 288 case TargetLowering::Expand: 289 Changed = true; 290 return LegalizeOp(ExpandStore(Op)); 291 } 292 } 293 } 294 295 bool HasVectorValue = false; 296 for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); 297 J != E; 298 ++J) 299 HasVectorValue |= J->isVector(); 300 if (!HasVectorValue) 301 return TranslateLegalizeResults(Op, Result); 302 303 TargetLowering::LegalizeAction Action = TargetLowering::Legal; 304 switch (Op.getOpcode()) { 305 default: 306 return TranslateLegalizeResults(Op, Result); 307 case ISD::STRICT_FADD: 308 case ISD::STRICT_FSUB: 309 case ISD::STRICT_FMUL: 310 case ISD::STRICT_FDIV: 311 case ISD::STRICT_FREM: 312 case ISD::STRICT_FSQRT: 313 case ISD::STRICT_FMA: 314 case ISD::STRICT_FPOW: 315 case ISD::STRICT_FPOWI: 316 case ISD::STRICT_FSIN: 317 case ISD::STRICT_FCOS: 318 case ISD::STRICT_FEXP: 319 case ISD::STRICT_FEXP2: 320 case ISD::STRICT_FLOG: 321 case ISD::STRICT_FLOG10: 322 case ISD::STRICT_FLOG2: 323 case ISD::STRICT_FRINT: 324 case ISD::STRICT_FNEARBYINT: 325 case ISD::STRICT_FMAXNUM: 326 case ISD::STRICT_FMINNUM: 327 case ISD::STRICT_FCEIL: 328 case ISD::STRICT_FFLOOR: 329 case ISD::STRICT_FROUND: 330 case ISD::STRICT_FTRUNC: 331 // These pseudo-ops get legalized as if they were their non-strict 332 // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT 333 // is also legal, but if ISD::FSQRT requires expansion then so does 334 // ISD::STRICT_FSQRT. 335 Action = TLI.getStrictFPOperationAction(Node->getOpcode(), 336 Node->getValueType(0)); 337 break; 338 case ISD::ADD: 339 case ISD::SUB: 340 case ISD::MUL: 341 case ISD::MULHS: 342 case ISD::MULHU: 343 case ISD::SDIV: 344 case ISD::UDIV: 345 case ISD::SREM: 346 case ISD::UREM: 347 case ISD::SDIVREM: 348 case ISD::UDIVREM: 349 case ISD::FADD: 350 case ISD::FSUB: 351 case ISD::FMUL: 352 case ISD::FDIV: 353 case ISD::FREM: 354 case ISD::AND: 355 case ISD::OR: 356 case ISD::XOR: 357 case ISD::SHL: 358 case ISD::SRA: 359 case ISD::SRL: 360 case ISD::FSHL: 361 case ISD::FSHR: 362 case ISD::ROTL: 363 case ISD::ROTR: 364 case ISD::ABS: 365 case ISD::BSWAP: 366 case ISD::BITREVERSE: 367 case ISD::CTLZ: 368 case ISD::CTTZ: 369 case ISD::CTLZ_ZERO_UNDEF: 370 case ISD::CTTZ_ZERO_UNDEF: 371 case ISD::CTPOP: 372 case ISD::SELECT: 373 case ISD::VSELECT: 374 case ISD::SELECT_CC: 375 case ISD::SETCC: 376 case ISD::ZERO_EXTEND: 377 case ISD::ANY_EXTEND: 378 case ISD::TRUNCATE: 379 case ISD::SIGN_EXTEND: 380 case ISD::FP_TO_SINT: 381 case ISD::FP_TO_UINT: 382 case ISD::FNEG: 383 case ISD::FABS: 384 case ISD::FMINNUM: 385 case ISD::FMAXNUM: 386 case ISD::FMINNUM_IEEE: 387 case ISD::FMAXNUM_IEEE: 388 case ISD::FMINIMUM: 389 case ISD::FMAXIMUM: 390 case ISD::FCOPYSIGN: 391 case ISD::FSQRT: 392 case ISD::FSIN: 393 case ISD::FCOS: 394 case ISD::FPOWI: 395 case ISD::FPOW: 396 case ISD::FLOG: 397 case ISD::FLOG2: 398 case ISD::FLOG10: 399 case ISD::FEXP: 400 case ISD::FEXP2: 401 case ISD::FCEIL: 402 case ISD::FTRUNC: 403 case ISD::FRINT: 404 case ISD::FNEARBYINT: 405 case ISD::FROUND: 406 case ISD::FFLOOR: 407 case ISD::FP_ROUND: 408 case ISD::FP_EXTEND: 409 case ISD::FMA: 410 case ISD::SIGN_EXTEND_INREG: 411 case ISD::ANY_EXTEND_VECTOR_INREG: 412 case ISD::SIGN_EXTEND_VECTOR_INREG: 413 case ISD::ZERO_EXTEND_VECTOR_INREG: 414 case ISD::SMIN: 415 case ISD::SMAX: 416 case ISD::UMIN: 417 case ISD::UMAX: 418 case ISD::SMUL_LOHI: 419 case ISD::UMUL_LOHI: 420 case ISD::FCANONICALIZE: 421 case ISD::SADDSAT: 422 case ISD::UADDSAT: 423 case ISD::SSUBSAT: 424 case ISD::USUBSAT: 425 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); 426 break; 427 case ISD::SMULFIX: { 428 unsigned Scale = Node->getConstantOperandVal(2); 429 Action = TLI.getFixedPointOperationAction(Node->getOpcode(), 430 Node->getValueType(0), Scale); 431 break; 432 } 433 case ISD::FP_ROUND_INREG: 434 Action = TLI.getOperationAction(Node->getOpcode(), 435 cast<VTSDNode>(Node->getOperand(1))->getVT()); 436 break; 437 case ISD::SINT_TO_FP: 438 case ISD::UINT_TO_FP: 439 Action = TLI.getOperationAction(Node->getOpcode(), 440 Node->getOperand(0).getValueType()); 441 break; 442 } 443 444 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); 445 446 switch (Action) { 447 default: llvm_unreachable("This action is not supported yet!"); 448 case TargetLowering::Promote: 449 Result = Promote(Op); 450 Changed = true; 451 break; 452 case TargetLowering::Legal: 453 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); 454 break; 455 case TargetLowering::Custom: { 456 LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); 457 if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { 458 LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); 459 Result = Tmp1; 460 break; 461 } 462 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); 463 LLVM_FALLTHROUGH; 464 } 465 case TargetLowering::Expand: 466 Result = Expand(Op); 467 } 468 469 // Make sure that the generated code is itself legal. 470 if (Result != Op) { 471 Result = LegalizeOp(Result); 472 Changed = true; 473 } 474 475 // Note that LegalizeOp may be reentered even from single-use nodes, which 476 // means that we always must cache transformed nodes. 477 AddLegalizedOperand(Op, Result); 478 return Result; 479 } 480 481 SDValue VectorLegalizer::Promote(SDValue Op) { 482 // For a few operations there is a specific concept for promotion based on 483 // the operand's type. 484 switch (Op.getOpcode()) { 485 case ISD::SINT_TO_FP: 486 case ISD::UINT_TO_FP: 487 // "Promote" the operation by extending the operand. 488 return PromoteINT_TO_FP(Op); 489 case ISD::FP_TO_UINT: 490 case ISD::FP_TO_SINT: 491 // Promote the operation by extending the operand. 492 return PromoteFP_TO_INT(Op); 493 } 494 495 // There are currently two cases of vector promotion: 496 // 1) Bitcasting a vector of integers to a different type to a vector of the 497 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. 498 // 2) Extending a vector of floats to a vector of the same number of larger 499 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. 500 MVT VT = Op.getSimpleValueType(); 501 assert(Op.getNode()->getNumValues() == 1 && 502 "Can't promote a vector with multiple results!"); 503 MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); 504 SDLoc dl(Op); 505 SmallVector<SDValue, 4> Operands(Op.getNumOperands()); 506 507 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 508 if (Op.getOperand(j).getValueType().isVector()) 509 if (Op.getOperand(j) 510 .getValueType() 511 .getVectorElementType() 512 .isFloatingPoint() && 513 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) 514 Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); 515 else 516 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); 517 else 518 Operands[j] = Op.getOperand(j); 519 } 520 521 Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); 522 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || 523 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && 524 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) 525 return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); 526 else 527 return DAG.getNode(ISD::BITCAST, dl, VT, Op); 528 } 529 530 SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { 531 // INT_TO_FP operations may require the input operand be promoted even 532 // when the type is otherwise legal. 533 MVT VT = Op.getOperand(0).getSimpleValueType(); 534 MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); 535 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 536 "Vectors have different number of elements!"); 537 538 SDLoc dl(Op); 539 SmallVector<SDValue, 4> Operands(Op.getNumOperands()); 540 541 unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : 542 ISD::SIGN_EXTEND; 543 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { 544 if (Op.getOperand(j).getValueType().isVector()) 545 Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); 546 else 547 Operands[j] = Op.getOperand(j); 548 } 549 550 return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); 551 } 552 553 // For FP_TO_INT we promote the result type to a vector type with wider 554 // elements and then truncate the result. This is different from the default 555 // PromoteVector which uses bitcast to promote thus assumning that the 556 // promoted vector type has the same overall size. 557 SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) { 558 MVT VT = Op.getSimpleValueType(); 559 MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); 560 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 561 "Vectors have different number of elements!"); 562 563 unsigned NewOpc = Op->getOpcode(); 564 // Change FP_TO_UINT to FP_TO_SINT if possible. 565 // TODO: Should we only do this if FP_TO_UINT itself isn't legal? 566 if (NewOpc == ISD::FP_TO_UINT && 567 TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) 568 NewOpc = ISD::FP_TO_SINT; 569 570 SDLoc dl(Op); 571 SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0)); 572 573 // Assert that the converted value fits in the original type. If it doesn't 574 // (eg: because the value being converted is too big), then the result of the 575 // original operation was undefined anyway, so the assert is still correct. 576 Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext 577 : ISD::AssertSext, 578 dl, NVT, Promoted, 579 DAG.getValueType(VT.getScalarType())); 580 return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); 581 } 582 583 SDValue VectorLegalizer::ExpandLoad(SDValue Op) { 584 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); 585 586 EVT SrcVT = LD->getMemoryVT(); 587 EVT SrcEltVT = SrcVT.getScalarType(); 588 unsigned NumElem = SrcVT.getVectorNumElements(); 589 590 SDValue NewChain; 591 SDValue Value; 592 if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { 593 SDLoc dl(Op); 594 595 SmallVector<SDValue, 8> Vals; 596 SmallVector<SDValue, 8> LoadChains; 597 598 EVT DstEltVT = LD->getValueType(0).getScalarType(); 599 SDValue Chain = LD->getChain(); 600 SDValue BasePTR = LD->getBasePtr(); 601 ISD::LoadExtType ExtType = LD->getExtensionType(); 602 603 // When elements in a vector is not byte-addressable, we cannot directly 604 // load each element by advancing pointer, which could only address bytes. 605 // Instead, we load all significant words, mask bits off, and concatenate 606 // them to form each element. Finally, they are extended to destination 607 // scalar type to build the destination vector. 608 EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); 609 610 assert(WideVT.isRound() && 611 "Could not handle the sophisticated case when the widest integer is" 612 " not power of 2."); 613 assert(WideVT.bitsGE(SrcEltVT) && 614 "Type is not legalized?"); 615 616 unsigned WideBytes = WideVT.getStoreSize(); 617 unsigned Offset = 0; 618 unsigned RemainingBytes = SrcVT.getStoreSize(); 619 SmallVector<SDValue, 8> LoadVals; 620 while (RemainingBytes > 0) { 621 SDValue ScalarLoad; 622 unsigned LoadBytes = WideBytes; 623 624 if (RemainingBytes >= LoadBytes) { 625 ScalarLoad = 626 DAG.getLoad(WideVT, dl, Chain, BasePTR, 627 LD->getPointerInfo().getWithOffset(Offset), 628 MinAlign(LD->getAlignment(), Offset), 629 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 630 } else { 631 EVT LoadVT = WideVT; 632 while (RemainingBytes < LoadBytes) { 633 LoadBytes >>= 1; // Reduce the load size by half. 634 LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); 635 } 636 ScalarLoad = 637 DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, 638 LD->getPointerInfo().getWithOffset(Offset), LoadVT, 639 MinAlign(LD->getAlignment(), Offset), 640 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 641 } 642 643 RemainingBytes -= LoadBytes; 644 Offset += LoadBytes; 645 646 BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); 647 648 LoadVals.push_back(ScalarLoad.getValue(0)); 649 LoadChains.push_back(ScalarLoad.getValue(1)); 650 } 651 652 // Extract bits, pack and extend/trunc them into destination type. 653 unsigned SrcEltBits = SrcEltVT.getSizeInBits(); 654 SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT); 655 656 unsigned BitOffset = 0; 657 unsigned WideIdx = 0; 658 unsigned WideBits = WideVT.getSizeInBits(); 659 660 for (unsigned Idx = 0; Idx != NumElem; ++Idx) { 661 SDValue Lo, Hi, ShAmt; 662 663 if (BitOffset < WideBits) { 664 ShAmt = DAG.getConstant( 665 BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); 666 Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); 667 Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); 668 } 669 670 BitOffset += SrcEltBits; 671 if (BitOffset >= WideBits) { 672 WideIdx++; 673 BitOffset -= WideBits; 674 if (BitOffset > 0) { 675 ShAmt = DAG.getConstant( 676 SrcEltBits - BitOffset, dl, 677 TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); 678 Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); 679 Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); 680 } 681 } 682 683 if (Hi.getNode()) 684 Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); 685 686 switch (ExtType) { 687 default: llvm_unreachable("Unknown extended-load op!"); 688 case ISD::EXTLOAD: 689 Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); 690 break; 691 case ISD::ZEXTLOAD: 692 Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); 693 break; 694 case ISD::SEXTLOAD: 695 ShAmt = 696 DAG.getConstant(WideBits - SrcEltBits, dl, 697 TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); 698 Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); 699 Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); 700 Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); 701 break; 702 } 703 Vals.push_back(Lo); 704 } 705 706 NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 707 Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals); 708 } else { 709 SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); 710 // Skip past MERGE_VALUE node if known. 711 if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { 712 NewChain = Scalarized.getOperand(1); 713 Value = Scalarized.getOperand(0); 714 } else { 715 NewChain = Scalarized.getValue(1); 716 Value = Scalarized.getValue(0); 717 } 718 } 719 720 AddLegalizedOperand(Op.getValue(0), Value); 721 AddLegalizedOperand(Op.getValue(1), NewChain); 722 723 return (Op.getResNo() ? NewChain : Value); 724 } 725 726 SDValue VectorLegalizer::ExpandStore(SDValue Op) { 727 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); 728 SDValue TF = TLI.scalarizeVectorStore(ST, DAG); 729 AddLegalizedOperand(Op, TF); 730 return TF; 731 } 732 733 SDValue VectorLegalizer::Expand(SDValue Op) { 734 switch (Op->getOpcode()) { 735 case ISD::SIGN_EXTEND_INREG: 736 return ExpandSEXTINREG(Op); 737 case ISD::ANY_EXTEND_VECTOR_INREG: 738 return ExpandANY_EXTEND_VECTOR_INREG(Op); 739 case ISD::SIGN_EXTEND_VECTOR_INREG: 740 return ExpandSIGN_EXTEND_VECTOR_INREG(Op); 741 case ISD::ZERO_EXTEND_VECTOR_INREG: 742 return ExpandZERO_EXTEND_VECTOR_INREG(Op); 743 case ISD::BSWAP: 744 return ExpandBSWAP(Op); 745 case ISD::VSELECT: 746 return ExpandVSELECT(Op); 747 case ISD::SELECT: 748 return ExpandSELECT(Op); 749 case ISD::FP_TO_UINT: 750 return ExpandFP_TO_UINT(Op); 751 case ISD::UINT_TO_FP: 752 return ExpandUINT_TO_FLOAT(Op); 753 case ISD::FNEG: 754 return ExpandFNEG(Op); 755 case ISD::FSUB: 756 return ExpandFSUB(Op); 757 case ISD::SETCC: 758 return UnrollVSETCC(Op); 759 case ISD::ABS: 760 return ExpandABS(Op); 761 case ISD::BITREVERSE: 762 return ExpandBITREVERSE(Op); 763 case ISD::CTPOP: 764 return ExpandCTPOP(Op); 765 case ISD::CTLZ: 766 case ISD::CTLZ_ZERO_UNDEF: 767 return ExpandCTLZ(Op); 768 case ISD::CTTZ: 769 case ISD::CTTZ_ZERO_UNDEF: 770 return ExpandCTTZ(Op); 771 case ISD::FSHL: 772 case ISD::FSHR: 773 return ExpandFunnelShift(Op); 774 case ISD::ROTL: 775 case ISD::ROTR: 776 return ExpandROT(Op); 777 case ISD::FMINNUM: 778 case ISD::FMAXNUM: 779 return ExpandFMINNUM_FMAXNUM(Op); 780 case ISD::USUBSAT: 781 case ISD::SSUBSAT: 782 case ISD::UADDSAT: 783 case ISD::SADDSAT: 784 return ExpandAddSubSat(Op); 785 case ISD::STRICT_FADD: 786 case ISD::STRICT_FSUB: 787 case ISD::STRICT_FMUL: 788 case ISD::STRICT_FDIV: 789 case ISD::STRICT_FREM: 790 case ISD::STRICT_FSQRT: 791 case ISD::STRICT_FMA: 792 case ISD::STRICT_FPOW: 793 case ISD::STRICT_FPOWI: 794 case ISD::STRICT_FSIN: 795 case ISD::STRICT_FCOS: 796 case ISD::STRICT_FEXP: 797 case ISD::STRICT_FEXP2: 798 case ISD::STRICT_FLOG: 799 case ISD::STRICT_FLOG10: 800 case ISD::STRICT_FLOG2: 801 case ISD::STRICT_FRINT: 802 case ISD::STRICT_FNEARBYINT: 803 case ISD::STRICT_FMAXNUM: 804 case ISD::STRICT_FMINNUM: 805 case ISD::STRICT_FCEIL: 806 case ISD::STRICT_FFLOOR: 807 case ISD::STRICT_FROUND: 808 case ISD::STRICT_FTRUNC: 809 return ExpandStrictFPOp(Op); 810 default: 811 return DAG.UnrollVectorOp(Op.getNode()); 812 } 813 } 814 815 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { 816 // Lower a select instruction where the condition is a scalar and the 817 // operands are vectors. Lower this select to VSELECT and implement it 818 // using XOR AND OR. The selector bit is broadcasted. 819 EVT VT = Op.getValueType(); 820 SDLoc DL(Op); 821 822 SDValue Mask = Op.getOperand(0); 823 SDValue Op1 = Op.getOperand(1); 824 SDValue Op2 = Op.getOperand(2); 825 826 assert(VT.isVector() && !Mask.getValueType().isVector() 827 && Op1.getValueType() == Op2.getValueType() && "Invalid type"); 828 829 // If we can't even use the basic vector operations of 830 // AND,OR,XOR, we will have to scalarize the op. 831 // Notice that the operation may be 'promoted' which means that it is 832 // 'bitcasted' to another type which is handled. 833 // Also, we need to be able to construct a splat vector using BUILD_VECTOR. 834 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 835 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 836 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || 837 TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) 838 return DAG.UnrollVectorOp(Op.getNode()); 839 840 // Generate a mask operand. 841 EVT MaskTy = VT.changeVectorElementTypeToInteger(); 842 843 // What is the size of each element in the vector mask. 844 EVT BitTy = MaskTy.getScalarType(); 845 846 Mask = DAG.getSelect(DL, BitTy, Mask, 847 DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, 848 BitTy), 849 DAG.getConstant(0, DL, BitTy)); 850 851 // Broadcast the mask so that the entire vector is all-one or all zero. 852 Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); 853 854 // Bitcast the operands to be the same type as the mask. 855 // This is needed when we select between FP types because 856 // the mask is a vector of integers. 857 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); 858 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); 859 860 SDValue AllOnes = DAG.getConstant( 861 APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); 862 SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); 863 864 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); 865 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); 866 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); 867 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); 868 } 869 870 SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { 871 EVT VT = Op.getValueType(); 872 873 // Make sure that the SRA and SHL instructions are available. 874 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || 875 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) 876 return DAG.UnrollVectorOp(Op.getNode()); 877 878 SDLoc DL(Op); 879 EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); 880 881 unsigned BW = VT.getScalarSizeInBits(); 882 unsigned OrigBW = OrigTy.getScalarSizeInBits(); 883 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); 884 885 Op = Op.getOperand(0); 886 Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); 887 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); 888 } 889 890 // Generically expand a vector anyext in register to a shuffle of the relevant 891 // lanes into the appropriate locations, with other lanes left undef. 892 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { 893 SDLoc DL(Op); 894 EVT VT = Op.getValueType(); 895 int NumElements = VT.getVectorNumElements(); 896 SDValue Src = Op.getOperand(0); 897 EVT SrcVT = Src.getValueType(); 898 int NumSrcElements = SrcVT.getVectorNumElements(); 899 900 // Build a base mask of undef shuffles. 901 SmallVector<int, 16> ShuffleMask; 902 ShuffleMask.resize(NumSrcElements, -1); 903 904 // Place the extended lanes into the correct locations. 905 int ExtLaneScale = NumSrcElements / NumElements; 906 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 907 for (int i = 0; i < NumElements; ++i) 908 ShuffleMask[i * ExtLaneScale + EndianOffset] = i; 909 910 return DAG.getNode( 911 ISD::BITCAST, DL, VT, 912 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); 913 } 914 915 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { 916 SDLoc DL(Op); 917 EVT VT = Op.getValueType(); 918 SDValue Src = Op.getOperand(0); 919 EVT SrcVT = Src.getValueType(); 920 921 // First build an any-extend node which can be legalized above when we 922 // recurse through it. 923 Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); 924 925 // Now we need sign extend. Do this by shifting the elements. Even if these 926 // aren't legal operations, they have a better chance of being legalized 927 // without full scalarization than the sign extension does. 928 unsigned EltWidth = VT.getScalarSizeInBits(); 929 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); 930 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); 931 return DAG.getNode(ISD::SRA, DL, VT, 932 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), 933 ShiftAmount); 934 } 935 936 // Generically expand a vector zext in register to a shuffle of the relevant 937 // lanes into the appropriate locations, a blend of zero into the high bits, 938 // and a bitcast to the wider element type. 939 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { 940 SDLoc DL(Op); 941 EVT VT = Op.getValueType(); 942 int NumElements = VT.getVectorNumElements(); 943 SDValue Src = Op.getOperand(0); 944 EVT SrcVT = Src.getValueType(); 945 int NumSrcElements = SrcVT.getVectorNumElements(); 946 947 // Build up a zero vector to blend into this one. 948 SDValue Zero = DAG.getConstant(0, DL, SrcVT); 949 950 // Shuffle the incoming lanes into the correct position, and pull all other 951 // lanes from the zero vector. 952 SmallVector<int, 16> ShuffleMask; 953 ShuffleMask.reserve(NumSrcElements); 954 for (int i = 0; i < NumSrcElements; ++i) 955 ShuffleMask.push_back(i); 956 957 int ExtLaneScale = NumSrcElements / NumElements; 958 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 959 for (int i = 0; i < NumElements; ++i) 960 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; 961 962 return DAG.getNode(ISD::BITCAST, DL, VT, 963 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); 964 } 965 966 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { 967 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; 968 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) 969 for (int J = ScalarSizeInBytes - 1; J >= 0; --J) 970 ShuffleMask.push_back((I * ScalarSizeInBytes) + J); 971 } 972 973 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { 974 EVT VT = Op.getValueType(); 975 976 // Generate a byte wise shuffle mask for the BSWAP. 977 SmallVector<int, 16> ShuffleMask; 978 createBSWAPShuffleMask(VT, ShuffleMask); 979 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); 980 981 // Only emit a shuffle if the mask is legal. 982 if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) 983 return DAG.UnrollVectorOp(Op.getNode()); 984 985 SDLoc DL(Op); 986 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); 987 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); 988 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 989 } 990 991 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { 992 EVT VT = Op.getValueType(); 993 994 // If we have the scalar operation, it's probably cheaper to unroll it. 995 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) 996 return DAG.UnrollVectorOp(Op.getNode()); 997 998 // If the vector element width is a whole number of bytes, test if its legal 999 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte 1000 // vector. This greatly reduces the number of bit shifts necessary. 1001 unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); 1002 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { 1003 SmallVector<int, 16> BSWAPMask; 1004 createBSWAPShuffleMask(VT, BSWAPMask); 1005 1006 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); 1007 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && 1008 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || 1009 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && 1010 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && 1011 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && 1012 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { 1013 SDLoc DL(Op); 1014 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); 1015 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), 1016 BSWAPMask); 1017 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); 1018 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 1019 } 1020 } 1021 1022 // If we have the appropriate vector bit operations, it is better to use them 1023 // than unrolling and expanding each component. 1024 if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || 1025 !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || 1026 !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || 1027 !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) 1028 return DAG.UnrollVectorOp(Op.getNode()); 1029 1030 // Let LegalizeDAG handle this later. 1031 return Op; 1032 } 1033 1034 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { 1035 // Implement VSELECT in terms of XOR, AND, OR 1036 // on platforms which do not support blend natively. 1037 SDLoc DL(Op); 1038 1039 SDValue Mask = Op.getOperand(0); 1040 SDValue Op1 = Op.getOperand(1); 1041 SDValue Op2 = Op.getOperand(2); 1042 1043 EVT VT = Mask.getValueType(); 1044 1045 // If we can't even use the basic vector operations of 1046 // AND,OR,XOR, we will have to scalarize the op. 1047 // Notice that the operation may be 'promoted' which means that it is 1048 // 'bitcasted' to another type which is handled. 1049 // This operation also isn't safe with AND, OR, XOR when the boolean 1050 // type is 0/1 as we need an all ones vector constant to mask with. 1051 // FIXME: Sign extend 1 to all ones if thats legal on the target. 1052 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 1053 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 1054 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || 1055 TLI.getBooleanContents(Op1.getValueType()) != 1056 TargetLowering::ZeroOrNegativeOneBooleanContent) 1057 return DAG.UnrollVectorOp(Op.getNode()); 1058 1059 // If the mask and the type are different sizes, unroll the vector op. This 1060 // can occur when getSetCCResultType returns something that is different in 1061 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. 1062 if (VT.getSizeInBits() != Op1.getValueSizeInBits()) 1063 return DAG.UnrollVectorOp(Op.getNode()); 1064 1065 // Bitcast the operands to be the same type as the mask. 1066 // This is needed when we select between FP types because 1067 // the mask is a vector of integers. 1068 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); 1069 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); 1070 1071 SDValue AllOnes = DAG.getConstant( 1072 APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); 1073 SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); 1074 1075 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); 1076 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); 1077 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); 1078 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); 1079 } 1080 1081 SDValue VectorLegalizer::ExpandABS(SDValue Op) { 1082 // Attempt to expand using TargetLowering. 1083 SDValue Result; 1084 if (TLI.expandABS(Op.getNode(), Result, DAG)) 1085 return Result; 1086 1087 // Otherwise go ahead and unroll. 1088 return DAG.UnrollVectorOp(Op.getNode()); 1089 } 1090 1091 SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { 1092 // Attempt to expand using TargetLowering. 1093 SDValue Result; 1094 if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) 1095 return Result; 1096 1097 // Otherwise go ahead and unroll. 1098 return DAG.UnrollVectorOp(Op.getNode()); 1099 } 1100 1101 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { 1102 EVT VT = Op.getOperand(0).getValueType(); 1103 SDLoc DL(Op); 1104 1105 // Attempt to expand using TargetLowering. 1106 SDValue Result; 1107 if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) 1108 return Result; 1109 1110 // Make sure that the SINT_TO_FP and SRL instructions are available. 1111 if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || 1112 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) 1113 return DAG.UnrollVectorOp(Op.getNode()); 1114 1115 unsigned BW = VT.getScalarSizeInBits(); 1116 assert((BW == 64 || BW == 32) && 1117 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); 1118 1119 SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); 1120 1121 // Constants to clear the upper part of the word. 1122 // Notice that we can also use SHL+SHR, but using a constant is slightly 1123 // faster on x86. 1124 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; 1125 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); 1126 1127 // Two to the power of half-word-size. 1128 SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType()); 1129 1130 // Clear upper part of LO, lower HI 1131 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); 1132 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); 1133 1134 // Convert hi and lo to floats 1135 // Convert the hi part back to the upper values 1136 // TODO: Can any fast-math-flags be set on these nodes? 1137 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); 1138 fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); 1139 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); 1140 1141 // Add the two halves 1142 return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); 1143 } 1144 1145 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { 1146 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { 1147 SDLoc DL(Op); 1148 SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); 1149 // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. 1150 return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), 1151 Zero, Op.getOperand(0)); 1152 } 1153 return DAG.UnrollVectorOp(Op.getNode()); 1154 } 1155 1156 SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { 1157 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, 1158 // we can defer this to operation legalization where it will be lowered as 1159 // a+(-b). 1160 EVT VT = Op.getValueType(); 1161 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && 1162 TLI.isOperationLegalOrCustom(ISD::FADD, VT)) 1163 return Op; // Defer to LegalizeDAG 1164 1165 return DAG.UnrollVectorOp(Op.getNode()); 1166 } 1167 1168 SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) { 1169 SDValue Result; 1170 if (TLI.expandCTPOP(Op.getNode(), Result, DAG)) 1171 return Result; 1172 1173 return DAG.UnrollVectorOp(Op.getNode()); 1174 } 1175 1176 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { 1177 SDValue Result; 1178 if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) 1179 return Result; 1180 1181 return DAG.UnrollVectorOp(Op.getNode()); 1182 } 1183 1184 SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) { 1185 SDValue Result; 1186 if (TLI.expandCTTZ(Op.getNode(), Result, DAG)) 1187 return Result; 1188 1189 return DAG.UnrollVectorOp(Op.getNode()); 1190 } 1191 1192 SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) { 1193 SDValue Result; 1194 if (TLI.expandFunnelShift(Op.getNode(), Result, DAG)) 1195 return Result; 1196 1197 return DAG.UnrollVectorOp(Op.getNode()); 1198 } 1199 1200 SDValue VectorLegalizer::ExpandROT(SDValue Op) { 1201 SDValue Result; 1202 if (TLI.expandROT(Op.getNode(), Result, DAG)) 1203 return Result; 1204 1205 return DAG.UnrollVectorOp(Op.getNode()); 1206 } 1207 1208 SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { 1209 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG)) 1210 return Expanded; 1211 return DAG.UnrollVectorOp(Op.getNode()); 1212 } 1213 1214 SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { 1215 if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) 1216 return Expanded; 1217 return DAG.UnrollVectorOp(Op.getNode()); 1218 } 1219 1220 SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { 1221 EVT VT = Op.getValueType(); 1222 EVT EltVT = VT.getVectorElementType(); 1223 unsigned NumElems = VT.getVectorNumElements(); 1224 unsigned NumOpers = Op.getNumOperands(); 1225 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1226 EVT ValueVTs[] = {EltVT, MVT::Other}; 1227 SDValue Chain = Op.getOperand(0); 1228 SDLoc dl(Op); 1229 1230 SmallVector<SDValue, 32> OpValues; 1231 SmallVector<SDValue, 32> OpChains; 1232 for (unsigned i = 0; i < NumElems; ++i) { 1233 SmallVector<SDValue, 4> Opers; 1234 SDValue Idx = DAG.getConstant(i, dl, 1235 TLI.getVectorIdxTy(DAG.getDataLayout())); 1236 1237 // The Chain is the first operand. 1238 Opers.push_back(Chain); 1239 1240 // Now process the remaining operands. 1241 for (unsigned j = 1; j < NumOpers; ++j) { 1242 SDValue Oper = Op.getOperand(j); 1243 EVT OperVT = Oper.getValueType(); 1244 1245 if (OperVT.isVector()) 1246 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 1247 EltVT, Oper, Idx); 1248 1249 Opers.push_back(Oper); 1250 } 1251 1252 SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); 1253 1254 OpValues.push_back(ScalarOp.getValue(0)); 1255 OpChains.push_back(ScalarOp.getValue(1)); 1256 } 1257 1258 SDValue Result = DAG.getBuildVector(VT, dl, OpValues); 1259 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); 1260 1261 AddLegalizedOperand(Op.getValue(0), Result); 1262 AddLegalizedOperand(Op.getValue(1), NewChain); 1263 1264 return Op.getResNo() ? NewChain : Result; 1265 } 1266 1267 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { 1268 EVT VT = Op.getValueType(); 1269 unsigned NumElems = VT.getVectorNumElements(); 1270 EVT EltVT = VT.getVectorElementType(); 1271 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); 1272 EVT TmpEltVT = LHS.getValueType().getVectorElementType(); 1273 SDLoc dl(Op); 1274 SmallVector<SDValue, 8> Ops(NumElems); 1275 for (unsigned i = 0; i < NumElems; ++i) { 1276 SDValue LHSElem = DAG.getNode( 1277 ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, 1278 DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); 1279 SDValue RHSElem = DAG.getNode( 1280 ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, 1281 DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); 1282 Ops[i] = DAG.getNode(ISD::SETCC, dl, 1283 TLI.getSetCCResultType(DAG.getDataLayout(), 1284 *DAG.getContext(), TmpEltVT), 1285 LHSElem, RHSElem, CC); 1286 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], 1287 DAG.getConstant(APInt::getAllOnesValue 1288 (EltVT.getSizeInBits()), dl, EltVT), 1289 DAG.getConstant(0, dl, EltVT)); 1290 } 1291 return DAG.getBuildVector(VT, dl, Ops); 1292 } 1293 1294 bool SelectionDAG::LegalizeVectors() { 1295 return VectorLegalizer(*this).Run(); 1296 } 1297