1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements the TargetLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Target/TargetLowering.h" 15 #include "llvm/ADT/BitVector.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/CodeGen/Analysis.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineJumpTableInfo.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/IR/DataLayout.h" 25 #include "llvm/IR/DerivedTypes.h" 26 #include "llvm/IR/GlobalVariable.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCExpr.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/MathExtras.h" 32 #include "llvm/Target/TargetLoweringObjectFile.h" 33 #include "llvm/Target/TargetMachine.h" 34 #include "llvm/Target/TargetRegisterInfo.h" 35 #include "llvm/Target/TargetSubtargetInfo.h" 36 #include <cctype> 37 using namespace llvm; 38 39 /// NOTE: The TargetMachine owns TLOF. 40 TargetLowering::TargetLowering(const TargetMachine &tm) 41 : TargetLoweringBase(tm) {} 42 43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { 44 return nullptr; 45 } 46 47 /// Check whether a given call node is in tail position within its function. If 48 /// so, it sets Chain to the input chain of the tail call. 49 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, 50 SDValue &Chain) const { 51 const Function *F = DAG.getMachineFunction().getFunction(); 52 53 // Conservatively require the attributes of the call to match those of 54 // the return. Ignore noalias because it doesn't affect the call sequence. 55 AttributeSet CallerAttrs = F->getAttributes(); 56 if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex) 57 .removeAttribute(Attribute::NoAlias).hasAttributes()) 58 return false; 59 60 // It's not safe to eliminate the sign / zero extension of the return value. 61 if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 62 CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 63 return false; 64 65 // Check if the only use is a function return node. 66 return isUsedByReturnOnly(Node, Chain); 67 } 68 69 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, 70 const uint32_t *CallerPreservedMask, 71 const SmallVectorImpl<CCValAssign> &ArgLocs, 72 const SmallVectorImpl<SDValue> &OutVals) const { 73 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 74 const CCValAssign &ArgLoc = ArgLocs[I]; 75 if (!ArgLoc.isRegLoc()) 76 continue; 77 unsigned Reg = ArgLoc.getLocReg(); 78 // Only look at callee saved registers. 79 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) 80 continue; 81 // Check that we pass the value used for the caller. 82 // (We look for a CopyFromReg reading a virtual register that is used 83 // for the function live-in value of register Reg) 84 SDValue Value = OutVals[I]; 85 if (Value->getOpcode() != ISD::CopyFromReg) 86 return false; 87 unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); 88 if (MRI.getLiveInPhysReg(ArgReg) != Reg) 89 return false; 90 } 91 return true; 92 } 93 94 /// \brief Set CallLoweringInfo attribute flags based on a call instruction 95 /// and called function attributes. 96 void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, 97 unsigned AttrIdx) { 98 isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); 99 isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); 100 isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); 101 isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); 102 isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); 103 isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); 104 isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); 105 isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); 106 isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf); 107 isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError); 108 Alignment = CS->getParamAlignment(AttrIdx); 109 } 110 111 /// Generate a libcall taking the given operands as arguments and returning a 112 /// result of type RetVT. 113 std::pair<SDValue, SDValue> 114 TargetLowering::makeLibCall(SelectionDAG &DAG, 115 RTLIB::Libcall LC, EVT RetVT, 116 ArrayRef<SDValue> Ops, 117 bool isSigned, SDLoc dl, 118 bool doesNotReturn, 119 bool isReturnValueUsed) const { 120 TargetLowering::ArgListTy Args; 121 Args.reserve(Ops.size()); 122 123 TargetLowering::ArgListEntry Entry; 124 for (SDValue Op : Ops) { 125 Entry.Node = Op; 126 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); 127 Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); 128 Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); 129 Args.push_back(Entry); 130 } 131 132 if (LC == RTLIB::UNKNOWN_LIBCALL) 133 report_fatal_error("Unsupported library call operation!"); 134 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), 135 getPointerTy(DAG.getDataLayout())); 136 137 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); 138 TargetLowering::CallLoweringInfo CLI(DAG); 139 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); 140 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) 141 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) 142 .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) 143 .setSExtResult(signExtend).setZExtResult(!signExtend); 144 return LowerCallTo(CLI); 145 } 146 147 /// Soften the operands of a comparison. This code is shared among BR_CC, 148 /// SELECT_CC, and SETCC handlers. 149 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, 150 SDValue &NewLHS, SDValue &NewRHS, 151 ISD::CondCode &CCCode, 152 SDLoc dl) const { 153 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) 154 && "Unsupported setcc type!"); 155 156 // Expand into one or more soft-fp libcall(s). 157 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; 158 bool ShouldInvertCC = false; 159 switch (CCCode) { 160 case ISD::SETEQ: 161 case ISD::SETOEQ: 162 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : 163 (VT == MVT::f64) ? RTLIB::OEQ_F64 : 164 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; 165 break; 166 case ISD::SETNE: 167 case ISD::SETUNE: 168 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : 169 (VT == MVT::f64) ? RTLIB::UNE_F64 : 170 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128; 171 break; 172 case ISD::SETGE: 173 case ISD::SETOGE: 174 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : 175 (VT == MVT::f64) ? RTLIB::OGE_F64 : 176 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; 177 break; 178 case ISD::SETLT: 179 case ISD::SETOLT: 180 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : 181 (VT == MVT::f64) ? RTLIB::OLT_F64 : 182 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; 183 break; 184 case ISD::SETLE: 185 case ISD::SETOLE: 186 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : 187 (VT == MVT::f64) ? RTLIB::OLE_F64 : 188 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; 189 break; 190 case ISD::SETGT: 191 case ISD::SETOGT: 192 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : 193 (VT == MVT::f64) ? RTLIB::OGT_F64 : 194 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; 195 break; 196 case ISD::SETUO: 197 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : 198 (VT == MVT::f64) ? RTLIB::UO_F64 : 199 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; 200 break; 201 case ISD::SETO: 202 LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : 203 (VT == MVT::f64) ? RTLIB::O_F64 : 204 (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128; 205 break; 206 case ISD::SETONE: 207 // SETONE = SETOLT | SETOGT 208 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : 209 (VT == MVT::f64) ? RTLIB::OLT_F64 : 210 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; 211 LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : 212 (VT == MVT::f64) ? RTLIB::OGT_F64 : 213 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; 214 break; 215 case ISD::SETUEQ: 216 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : 217 (VT == MVT::f64) ? RTLIB::UO_F64 : 218 (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128; 219 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : 220 (VT == MVT::f64) ? RTLIB::OEQ_F64 : 221 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; 222 break; 223 default: 224 // Invert CC for unordered comparisons 225 ShouldInvertCC = true; 226 switch (CCCode) { 227 case ISD::SETULT: 228 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : 229 (VT == MVT::f64) ? RTLIB::OGE_F64 : 230 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; 231 break; 232 case ISD::SETULE: 233 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : 234 (VT == MVT::f64) ? RTLIB::OGT_F64 : 235 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; 236 break; 237 case ISD::SETUGT: 238 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : 239 (VT == MVT::f64) ? RTLIB::OLE_F64 : 240 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; 241 break; 242 case ISD::SETUGE: 243 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : 244 (VT == MVT::f64) ? RTLIB::OLT_F64 : 245 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; 246 break; 247 default: llvm_unreachable("Do not know how to soften this setcc!"); 248 } 249 } 250 251 // Use the target specific return value for comparions lib calls. 252 EVT RetVT = getCmpLibcallReturnType(); 253 SDValue Ops[2] = {NewLHS, NewRHS}; 254 NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, 255 dl).first; 256 NewRHS = DAG.getConstant(0, dl, RetVT); 257 258 CCCode = getCmpLibcallCC(LC1); 259 if (ShouldInvertCC) 260 CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); 261 262 if (LC2 != RTLIB::UNKNOWN_LIBCALL) { 263 SDValue Tmp = DAG.getNode( 264 ISD::SETCC, dl, 265 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), 266 NewLHS, NewRHS, DAG.getCondCode(CCCode)); 267 NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, 268 dl).first; 269 NewLHS = DAG.getNode( 270 ISD::SETCC, dl, 271 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), 272 NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); 273 NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); 274 NewRHS = SDValue(); 275 } 276 } 277 278 /// Return the entry encoding for a jump table in the current function. The 279 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. 280 unsigned TargetLowering::getJumpTableEncoding() const { 281 // In non-pic modes, just use the address of a block. 282 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 283 return MachineJumpTableInfo::EK_BlockAddress; 284 285 // In PIC mode, if the target supports a GPRel32 directive, use it. 286 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr) 287 return MachineJumpTableInfo::EK_GPRel32BlockAddress; 288 289 // Otherwise, use a label difference. 290 return MachineJumpTableInfo::EK_LabelDifference32; 291 } 292 293 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, 294 SelectionDAG &DAG) const { 295 // If our PIC model is GP relative, use the global offset table as the base. 296 unsigned JTEncoding = getJumpTableEncoding(); 297 298 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || 299 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) 300 return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout())); 301 302 return Table; 303 } 304 305 /// This returns the relocation base for the given PIC jumptable, the same as 306 /// getPICJumpTableRelocBase, but as an MCExpr. 307 const MCExpr * 308 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 309 unsigned JTI,MCContext &Ctx) const{ 310 // The normal PIC reloc base is the label at the start of the jump table. 311 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx); 312 } 313 314 bool 315 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 316 // Assume that everything is safe in static mode. 317 if (getTargetMachine().getRelocationModel() == Reloc::Static) 318 return true; 319 320 // In dynamic-no-pic mode, assume that known defined values are safe. 321 if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && 322 GA && GA->getGlobal()->isStrongDefinitionForLinker()) 323 return true; 324 325 // Otherwise assume nothing is safe. 326 return false; 327 } 328 329 //===----------------------------------------------------------------------===// 330 // Optimization Methods 331 //===----------------------------------------------------------------------===// 332 333 /// Check to see if the specified operand of the specified instruction is a 334 /// constant integer. If so, check to see if there are any bits set in the 335 /// constant that are not demanded. If so, shrink the constant and return true. 336 bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, 337 const APInt &Demanded) { 338 SDLoc dl(Op); 339 340 // FIXME: ISD::SELECT, ISD::SELECT_CC 341 switch (Op.getOpcode()) { 342 default: break; 343 case ISD::XOR: 344 case ISD::AND: 345 case ISD::OR: { 346 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 347 if (!C) return false; 348 349 if (Op.getOpcode() == ISD::XOR && 350 (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) 351 return false; 352 353 // if we can expand it to have all bits set, do it 354 if (C->getAPIntValue().intersects(~Demanded)) { 355 EVT VT = Op.getValueType(); 356 SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), 357 DAG.getConstant(Demanded & 358 C->getAPIntValue(), 359 dl, VT)); 360 return CombineTo(Op, New); 361 } 362 363 break; 364 } 365 } 366 367 return false; 368 } 369 370 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. 371 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be 372 /// generalized for targets with other types of implicit widening casts. 373 bool 374 TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, 375 unsigned BitWidth, 376 const APInt &Demanded, 377 SDLoc dl) { 378 assert(Op.getNumOperands() == 2 && 379 "ShrinkDemandedOp only supports binary operators!"); 380 assert(Op.getNode()->getNumValues() == 1 && 381 "ShrinkDemandedOp only supports nodes with one result!"); 382 383 // Early return, as this function cannot handle vector types. 384 if (Op.getValueType().isVector()) 385 return false; 386 387 // Don't do this if the node has another user, which may require the 388 // full value. 389 if (!Op.getNode()->hasOneUse()) 390 return false; 391 392 // Search for the smallest integer type with free casts to and from 393 // Op's type. For expedience, just check power-of-2 integer types. 394 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 395 unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros(); 396 unsigned SmallVTBits = DemandedSize; 397 if (!isPowerOf2_32(SmallVTBits)) 398 SmallVTBits = NextPowerOf2(SmallVTBits); 399 for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { 400 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); 401 if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && 402 TLI.isZExtFree(SmallVT, Op.getValueType())) { 403 // We found a type with free casts. 404 SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, 405 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, 406 Op.getNode()->getOperand(0)), 407 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, 408 Op.getNode()->getOperand(1))); 409 bool NeedZext = DemandedSize > SmallVTBits; 410 SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, 411 dl, Op.getValueType(), X); 412 return CombineTo(Op, Z); 413 } 414 } 415 return false; 416 } 417 418 /// Look at Op. At this point, we know that only the DemandedMask bits of the 419 /// result of Op are ever used downstream. If we can use this information to 420 /// simplify Op, create a new simplified DAG node and return true, returning the 421 /// original and new nodes in Old and New. Otherwise, analyze the expression and 422 /// return a mask of KnownOne and KnownZero bits for the expression (used to 423 /// simplify the caller). The KnownZero/One bits may only be accurate for those 424 /// bits in the DemandedMask. 425 bool TargetLowering::SimplifyDemandedBits(SDValue Op, 426 const APInt &DemandedMask, 427 APInt &KnownZero, 428 APInt &KnownOne, 429 TargetLoweringOpt &TLO, 430 unsigned Depth) const { 431 unsigned BitWidth = DemandedMask.getBitWidth(); 432 assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && 433 "Mask size mismatches value type size!"); 434 APInt NewMask = DemandedMask; 435 SDLoc dl(Op); 436 auto &DL = TLO.DAG.getDataLayout(); 437 438 // Don't know anything. 439 KnownZero = KnownOne = APInt(BitWidth, 0); 440 441 // Other users may use these bits. 442 if (!Op.getNode()->hasOneUse()) { 443 if (Depth != 0) { 444 // If not at the root, Just compute the KnownZero/KnownOne bits to 445 // simplify things downstream. 446 TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); 447 return false; 448 } 449 // If this is the root being simplified, allow it to have multiple uses, 450 // just set the NewMask to all bits. 451 NewMask = APInt::getAllOnesValue(BitWidth); 452 } else if (DemandedMask == 0) { 453 // Not demanding any bits from Op. 454 if (!Op.isUndef()) 455 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); 456 return false; 457 } else if (Depth == 6) { // Limit search depth. 458 return false; 459 } 460 461 APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; 462 switch (Op.getOpcode()) { 463 case ISD::Constant: 464 // We know all of the bits for a constant! 465 KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); 466 KnownZero = ~KnownOne; 467 return false; // Don't fall through, will infinitely loop. 468 case ISD::AND: 469 // If the RHS is a constant, check to see if the LHS would be zero without 470 // using the bits from the RHS. Below, we use knowledge about the RHS to 471 // simplify the LHS, here we're using information from the LHS to simplify 472 // the RHS. 473 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 474 APInt LHSZero, LHSOne; 475 // Do not increment Depth here; that can cause an infinite loop. 476 TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth); 477 // If the LHS already has zeros where RHSC does, this and is dead. 478 if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) 479 return TLO.CombineTo(Op, Op.getOperand(0)); 480 // If any of the set bits in the RHS are known zero on the LHS, shrink 481 // the constant. 482 if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) 483 return true; 484 } 485 486 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 487 KnownOne, TLO, Depth+1)) 488 return true; 489 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 490 if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, 491 KnownZero2, KnownOne2, TLO, Depth+1)) 492 return true; 493 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 494 495 // If all of the demanded bits are known one on one side, return the other. 496 // These bits cannot contribute to the result of the 'and'. 497 if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) 498 return TLO.CombineTo(Op, Op.getOperand(0)); 499 if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) 500 return TLO.CombineTo(Op, Op.getOperand(1)); 501 // If all of the demanded bits in the inputs are known zeros, return zero. 502 if ((NewMask & (KnownZero|KnownZero2)) == NewMask) 503 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); 504 // If the RHS is a constant, see if we can simplify it. 505 if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) 506 return true; 507 // If the operation can be done in a smaller type, do so. 508 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) 509 return true; 510 511 // Output known-1 bits are only known if set in both the LHS & RHS. 512 KnownOne &= KnownOne2; 513 // Output known-0 are known to be clear if zero in either the LHS | RHS. 514 KnownZero |= KnownZero2; 515 break; 516 case ISD::OR: 517 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 518 KnownOne, TLO, Depth+1)) 519 return true; 520 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 521 if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, 522 KnownZero2, KnownOne2, TLO, Depth+1)) 523 return true; 524 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 525 526 // If all of the demanded bits are known zero on one side, return the other. 527 // These bits cannot contribute to the result of the 'or'. 528 if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) 529 return TLO.CombineTo(Op, Op.getOperand(0)); 530 if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) 531 return TLO.CombineTo(Op, Op.getOperand(1)); 532 // If all of the potentially set bits on one side are known to be set on 533 // the other side, just use the 'other' side. 534 if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) 535 return TLO.CombineTo(Op, Op.getOperand(0)); 536 if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) 537 return TLO.CombineTo(Op, Op.getOperand(1)); 538 // If the RHS is a constant, see if we can simplify it. 539 if (TLO.ShrinkDemandedConstant(Op, NewMask)) 540 return true; 541 // If the operation can be done in a smaller type, do so. 542 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) 543 return true; 544 545 // Output known-0 bits are only known if clear in both the LHS & RHS. 546 KnownZero &= KnownZero2; 547 // Output known-1 are known to be set if set in either the LHS | RHS. 548 KnownOne |= KnownOne2; 549 break; 550 case ISD::XOR: 551 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 552 KnownOne, TLO, Depth+1)) 553 return true; 554 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 555 if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, 556 KnownOne2, TLO, Depth+1)) 557 return true; 558 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 559 560 // If all of the demanded bits are known zero on one side, return the other. 561 // These bits cannot contribute to the result of the 'xor'. 562 if ((KnownZero & NewMask) == NewMask) 563 return TLO.CombineTo(Op, Op.getOperand(0)); 564 if ((KnownZero2 & NewMask) == NewMask) 565 return TLO.CombineTo(Op, Op.getOperand(1)); 566 // If the operation can be done in a smaller type, do so. 567 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) 568 return true; 569 570 // If all of the unknown bits are known to be zero on one side or the other 571 // (but not both) turn this into an *inclusive* or. 572 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 573 if ((NewMask & ~KnownZero & ~KnownZero2) == 0) 574 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), 575 Op.getOperand(0), 576 Op.getOperand(1))); 577 578 // Output known-0 bits are known if clear or set in both the LHS & RHS. 579 KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 580 // Output known-1 are known to be set if set in only one of the LHS, RHS. 581 KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 582 583 // If all of the demanded bits on one side are known, and all of the set 584 // bits on that side are also known to be set on the other side, turn this 585 // into an AND, as we know the bits will be cleared. 586 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 587 // NB: it is okay if more bits are known than are requested 588 if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side 589 if (KnownOne == KnownOne2) { // set bits are the same on both sides 590 EVT VT = Op.getValueType(); 591 SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT); 592 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, 593 Op.getOperand(0), ANDC)); 594 } 595 } 596 597 // If the RHS is a constant, see if we can simplify it. 598 // for XOR, we prefer to force bits to 1 if they will make a -1. 599 // if we can't force bits, try to shrink constant 600 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 601 APInt Expanded = C->getAPIntValue() | (~NewMask); 602 // if we can expand it to have all bits set, do it 603 if (Expanded.isAllOnesValue()) { 604 if (Expanded != C->getAPIntValue()) { 605 EVT VT = Op.getValueType(); 606 SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), 607 TLO.DAG.getConstant(Expanded, dl, VT)); 608 return TLO.CombineTo(Op, New); 609 } 610 // if it already has all the bits set, nothing to change 611 // but don't shrink either! 612 } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { 613 return true; 614 } 615 } 616 617 KnownZero = KnownZeroOut; 618 KnownOne = KnownOneOut; 619 break; 620 case ISD::SELECT: 621 if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, 622 KnownOne, TLO, Depth+1)) 623 return true; 624 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, 625 KnownOne2, TLO, Depth+1)) 626 return true; 627 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 628 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 629 630 // If the operands are constants, see if we can simplify them. 631 if (TLO.ShrinkDemandedConstant(Op, NewMask)) 632 return true; 633 634 // Only known if known in both the LHS and RHS. 635 KnownOne &= KnownOne2; 636 KnownZero &= KnownZero2; 637 break; 638 case ISD::SELECT_CC: 639 if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, 640 KnownOne, TLO, Depth+1)) 641 return true; 642 if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, 643 KnownOne2, TLO, Depth+1)) 644 return true; 645 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 646 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 647 648 // If the operands are constants, see if we can simplify them. 649 if (TLO.ShrinkDemandedConstant(Op, NewMask)) 650 return true; 651 652 // Only known if known in both the LHS and RHS. 653 KnownOne &= KnownOne2; 654 KnownZero &= KnownZero2; 655 break; 656 case ISD::SHL: 657 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 658 unsigned ShAmt = SA->getZExtValue(); 659 SDValue InOp = Op.getOperand(0); 660 661 // If the shift count is an invalid immediate, don't do anything. 662 if (ShAmt >= BitWidth) 663 break; 664 665 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a 666 // single shift. We can do this if the bottom bits (which are shifted 667 // out) are never demanded. 668 if (InOp.getOpcode() == ISD::SRL && 669 isa<ConstantSDNode>(InOp.getOperand(1))) { 670 if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { 671 unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); 672 unsigned Opc = ISD::SHL; 673 int Diff = ShAmt-C1; 674 if (Diff < 0) { 675 Diff = -Diff; 676 Opc = ISD::SRL; 677 } 678 679 SDValue NewSA = 680 TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); 681 EVT VT = Op.getValueType(); 682 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, 683 InOp.getOperand(0), NewSA)); 684 } 685 } 686 687 if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), 688 KnownZero, KnownOne, TLO, Depth+1)) 689 return true; 690 691 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits 692 // are not demanded. This will likely allow the anyext to be folded away. 693 if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { 694 SDValue InnerOp = InOp.getNode()->getOperand(0); 695 EVT InnerVT = InnerOp.getValueType(); 696 unsigned InnerBits = InnerVT.getSizeInBits(); 697 if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && 698 isTypeDesirableForOp(ISD::SHL, InnerVT)) { 699 EVT ShTy = getShiftAmountTy(InnerVT, DL); 700 if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) 701 ShTy = InnerVT; 702 SDValue NarrowShl = 703 TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, 704 TLO.DAG.getConstant(ShAmt, dl, ShTy)); 705 return 706 TLO.CombineTo(Op, 707 TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), 708 NarrowShl)); 709 } 710 // Repeat the SHL optimization above in cases where an extension 711 // intervenes: (shl (anyext (shr x, c1)), c2) to 712 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits 713 // aren't demanded (as above) and that the shifted upper c1 bits of 714 // x aren't demanded. 715 if (InOp.hasOneUse() && 716 InnerOp.getOpcode() == ISD::SRL && 717 InnerOp.hasOneUse() && 718 isa<ConstantSDNode>(InnerOp.getOperand(1))) { 719 uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) 720 ->getZExtValue(); 721 if (InnerShAmt < ShAmt && 722 InnerShAmt < InnerBits && 723 NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && 724 NewMask.trunc(ShAmt) == 0) { 725 SDValue NewSA = 726 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, 727 Op.getOperand(1).getValueType()); 728 EVT VT = Op.getValueType(); 729 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, 730 InnerOp.getOperand(0)); 731 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, 732 NewExt, NewSA)); 733 } 734 } 735 } 736 737 KnownZero <<= SA->getZExtValue(); 738 KnownOne <<= SA->getZExtValue(); 739 // low bits known zero. 740 KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue()); 741 } 742 break; 743 case ISD::SRL: 744 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 745 EVT VT = Op.getValueType(); 746 unsigned ShAmt = SA->getZExtValue(); 747 unsigned VTSize = VT.getSizeInBits(); 748 SDValue InOp = Op.getOperand(0); 749 750 // If the shift count is an invalid immediate, don't do anything. 751 if (ShAmt >= BitWidth) 752 break; 753 754 APInt InDemandedMask = (NewMask << ShAmt); 755 756 // If the shift is exact, then it does demand the low bits (and knows that 757 // they are zero). 758 if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) 759 InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt); 760 761 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a 762 // single shift. We can do this if the top bits (which are shifted out) 763 // are never demanded. 764 if (InOp.getOpcode() == ISD::SHL && 765 isa<ConstantSDNode>(InOp.getOperand(1))) { 766 if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { 767 unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); 768 unsigned Opc = ISD::SRL; 769 int Diff = ShAmt-C1; 770 if (Diff < 0) { 771 Diff = -Diff; 772 Opc = ISD::SHL; 773 } 774 775 SDValue NewSA = 776 TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); 777 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, 778 InOp.getOperand(0), NewSA)); 779 } 780 } 781 782 // Compute the new bits that are at the top now. 783 if (SimplifyDemandedBits(InOp, InDemandedMask, 784 KnownZero, KnownOne, TLO, Depth+1)) 785 return true; 786 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 787 KnownZero = KnownZero.lshr(ShAmt); 788 KnownOne = KnownOne.lshr(ShAmt); 789 790 APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); 791 KnownZero |= HighBits; // High bits known zero. 792 } 793 break; 794 case ISD::SRA: 795 // If this is an arithmetic shift right and only the low-bit is set, we can 796 // always convert this into a logical shr, even if the shift amount is 797 // variable. The low bit of the shift cannot be an input sign bit unless 798 // the shift amount is >= the size of the datatype, which is undefined. 799 if (NewMask == 1) 800 return TLO.CombineTo(Op, 801 TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), 802 Op.getOperand(0), Op.getOperand(1))); 803 804 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 805 EVT VT = Op.getValueType(); 806 unsigned ShAmt = SA->getZExtValue(); 807 808 // If the shift count is an invalid immediate, don't do anything. 809 if (ShAmt >= BitWidth) 810 break; 811 812 APInt InDemandedMask = (NewMask << ShAmt); 813 814 // If the shift is exact, then it does demand the low bits (and knows that 815 // they are zero). 816 if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) 817 InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt); 818 819 // If any of the demanded bits are produced by the sign extension, we also 820 // demand the input sign bit. 821 APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); 822 if (HighBits.intersects(NewMask)) 823 InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); 824 825 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, 826 KnownZero, KnownOne, TLO, Depth+1)) 827 return true; 828 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 829 KnownZero = KnownZero.lshr(ShAmt); 830 KnownOne = KnownOne.lshr(ShAmt); 831 832 // Handle the sign bit, adjusted to where it is now in the mask. 833 APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); 834 835 // If the input sign bit is known to be zero, or if none of the top bits 836 // are demanded, turn this into an unsigned shift right. 837 if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { 838 SDNodeFlags Flags; 839 Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()); 840 return TLO.CombineTo(Op, 841 TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), 842 Op.getOperand(1), &Flags)); 843 } 844 845 int Log2 = NewMask.exactLogBase2(); 846 if (Log2 >= 0) { 847 // The bit must come from the sign. 848 SDValue NewSA = 849 TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, 850 Op.getOperand(1).getValueType()); 851 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, 852 Op.getOperand(0), NewSA)); 853 } 854 855 if (KnownOne.intersects(SignBit)) 856 // New bits are known one. 857 KnownOne |= HighBits; 858 } 859 break; 860 case ISD::SIGN_EXTEND_INREG: { 861 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 862 863 APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); 864 // If we only care about the highest bit, don't bother shifting right. 865 if (MsbMask == NewMask) { 866 unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); 867 SDValue InOp = Op.getOperand(0); 868 unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits(); 869 bool AlreadySignExtended = 870 TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; 871 // However if the input is already sign extended we expect the sign 872 // extension to be dropped altogether later and do not simplify. 873 if (!AlreadySignExtended) { 874 // Compute the correct shift amount type, which must be getShiftAmountTy 875 // for scalar types after legalization. 876 EVT ShiftAmtTy = Op.getValueType(); 877 if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) 878 ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); 879 880 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, 881 ShiftAmtTy); 882 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, 883 Op.getValueType(), InOp, 884 ShiftAmt)); 885 } 886 } 887 888 // Sign extension. Compute the demanded bits in the result that are not 889 // present in the input. 890 APInt NewBits = 891 APInt::getHighBitsSet(BitWidth, 892 BitWidth - ExVT.getScalarType().getSizeInBits()); 893 894 // If none of the extended bits are demanded, eliminate the sextinreg. 895 if ((NewBits & NewMask) == 0) 896 return TLO.CombineTo(Op, Op.getOperand(0)); 897 898 APInt InSignBit = 899 APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); 900 APInt InputDemandedBits = 901 APInt::getLowBitsSet(BitWidth, 902 ExVT.getScalarType().getSizeInBits()) & 903 NewMask; 904 905 // Since the sign extended bits are demanded, we know that the sign 906 // bit is demanded. 907 InputDemandedBits |= InSignBit; 908 909 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, 910 KnownZero, KnownOne, TLO, Depth+1)) 911 return true; 912 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 913 914 // If the sign bit of the input is known set or clear, then we know the 915 // top bits of the result. 916 917 // If the input sign bit is known zero, convert this into a zero extension. 918 if (KnownZero.intersects(InSignBit)) 919 return TLO.CombineTo(Op, 920 TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); 921 922 if (KnownOne.intersects(InSignBit)) { // Input sign bit known set 923 KnownOne |= NewBits; 924 KnownZero &= ~NewBits; 925 } else { // Input sign bit unknown 926 KnownZero &= ~NewBits; 927 KnownOne &= ~NewBits; 928 } 929 break; 930 } 931 case ISD::BUILD_PAIR: { 932 EVT HalfVT = Op.getOperand(0).getValueType(); 933 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); 934 935 APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); 936 APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); 937 938 APInt KnownZeroLo, KnownOneLo; 939 APInt KnownZeroHi, KnownOneHi; 940 941 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, 942 KnownOneLo, TLO, Depth + 1)) 943 return true; 944 945 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, 946 KnownOneHi, TLO, Depth + 1)) 947 return true; 948 949 KnownZero = KnownZeroLo.zext(BitWidth) | 950 KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); 951 952 KnownOne = KnownOneLo.zext(BitWidth) | 953 KnownOneHi.zext(BitWidth).shl(HalfBitWidth); 954 break; 955 } 956 case ISD::ZERO_EXTEND: { 957 unsigned OperandBitWidth = 958 Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); 959 APInt InMask = NewMask.trunc(OperandBitWidth); 960 961 // If none of the top bits are demanded, convert this into an any_extend. 962 APInt NewBits = 963 APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; 964 if (!NewBits.intersects(NewMask)) 965 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, 966 Op.getValueType(), 967 Op.getOperand(0))); 968 969 if (SimplifyDemandedBits(Op.getOperand(0), InMask, 970 KnownZero, KnownOne, TLO, Depth+1)) 971 return true; 972 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 973 KnownZero = KnownZero.zext(BitWidth); 974 KnownOne = KnownOne.zext(BitWidth); 975 KnownZero |= NewBits; 976 break; 977 } 978 case ISD::SIGN_EXTEND: { 979 EVT InVT = Op.getOperand(0).getValueType(); 980 unsigned InBits = InVT.getScalarType().getSizeInBits(); 981 APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); 982 APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); 983 APInt NewBits = ~InMask & NewMask; 984 985 // If none of the top bits are demanded, convert this into an any_extend. 986 if (NewBits == 0) 987 return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, 988 Op.getValueType(), 989 Op.getOperand(0))); 990 991 // Since some of the sign extended bits are demanded, we know that the sign 992 // bit is demanded. 993 APInt InDemandedBits = InMask & NewMask; 994 InDemandedBits |= InSignBit; 995 InDemandedBits = InDemandedBits.trunc(InBits); 996 997 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 998 KnownOne, TLO, Depth+1)) 999 return true; 1000 KnownZero = KnownZero.zext(BitWidth); 1001 KnownOne = KnownOne.zext(BitWidth); 1002 1003 // If the sign bit is known zero, convert this to a zero extend. 1004 if (KnownZero.intersects(InSignBit)) 1005 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, 1006 Op.getValueType(), 1007 Op.getOperand(0))); 1008 1009 // If the sign bit is known one, the top bits match. 1010 if (KnownOne.intersects(InSignBit)) { 1011 KnownOne |= NewBits; 1012 assert((KnownZero & NewBits) == 0); 1013 } else { // Otherwise, top bits aren't known. 1014 assert((KnownOne & NewBits) == 0); 1015 assert((KnownZero & NewBits) == 0); 1016 } 1017 break; 1018 } 1019 case ISD::ANY_EXTEND: { 1020 unsigned OperandBitWidth = 1021 Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); 1022 APInt InMask = NewMask.trunc(OperandBitWidth); 1023 if (SimplifyDemandedBits(Op.getOperand(0), InMask, 1024 KnownZero, KnownOne, TLO, Depth+1)) 1025 return true; 1026 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 1027 KnownZero = KnownZero.zext(BitWidth); 1028 KnownOne = KnownOne.zext(BitWidth); 1029 break; 1030 } 1031 case ISD::TRUNCATE: { 1032 // Simplify the input, using demanded bit information, and compute the known 1033 // zero/one bits live out. 1034 unsigned OperandBitWidth = 1035 Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); 1036 APInt TruncMask = NewMask.zext(OperandBitWidth); 1037 if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, 1038 KnownZero, KnownOne, TLO, Depth+1)) 1039 return true; 1040 KnownZero = KnownZero.trunc(BitWidth); 1041 KnownOne = KnownOne.trunc(BitWidth); 1042 1043 // If the input is only used by this truncate, see if we can shrink it based 1044 // on the known demanded bits. 1045 if (Op.getOperand(0).getNode()->hasOneUse()) { 1046 SDValue In = Op.getOperand(0); 1047 switch (In.getOpcode()) { 1048 default: break; 1049 case ISD::SRL: 1050 // Shrink SRL by a constant if none of the high bits shifted in are 1051 // demanded. 1052 if (TLO.LegalTypes() && 1053 !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) 1054 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is 1055 // undesirable. 1056 break; 1057 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); 1058 if (!ShAmt) 1059 break; 1060 SDValue Shift = In.getOperand(1); 1061 if (TLO.LegalTypes()) { 1062 uint64_t ShVal = ShAmt->getZExtValue(); 1063 Shift = TLO.DAG.getConstant(ShVal, dl, 1064 getShiftAmountTy(Op.getValueType(), DL)); 1065 } 1066 1067 APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, 1068 OperandBitWidth - BitWidth); 1069 HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); 1070 1071 if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { 1072 // None of the shifted in bits are needed. Add a truncate of the 1073 // shift input, then shift it. 1074 SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, 1075 Op.getValueType(), 1076 In.getOperand(0)); 1077 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, 1078 Op.getValueType(), 1079 NewTrunc, 1080 Shift)); 1081 } 1082 break; 1083 } 1084 } 1085 1086 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 1087 break; 1088 } 1089 case ISD::AssertZext: { 1090 // AssertZext demands all of the high bits, plus any of the low bits 1091 // demanded by its users. 1092 EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 1093 APInt InMask = APInt::getLowBitsSet(BitWidth, 1094 VT.getSizeInBits()); 1095 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, 1096 KnownZero, KnownOne, TLO, Depth+1)) 1097 return true; 1098 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 1099 1100 KnownZero |= ~InMask & NewMask; 1101 break; 1102 } 1103 case ISD::BITCAST: 1104 // If this is an FP->Int bitcast and if the sign bit is the only 1105 // thing demanded, turn this into a FGETSIGN. 1106 if (!TLO.LegalOperations() && 1107 !Op.getValueType().isVector() && 1108 !Op.getOperand(0).getValueType().isVector() && 1109 NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && 1110 Op.getOperand(0).getValueType().isFloatingPoint()) { 1111 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); 1112 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); 1113 if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && 1114 Op.getOperand(0).getValueType() != MVT::f128) { 1115 // Cannot eliminate/lower SHL for f128 yet. 1116 EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; 1117 // Make a FGETSIGN + SHL to move the sign bit into the appropriate 1118 // place. We expect the SHL to be eliminated by other optimizations. 1119 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); 1120 unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits(); 1121 if (!OpVTLegal && OpVTSizeInBits > 32) 1122 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); 1123 unsigned ShVal = Op.getValueType().getSizeInBits()-1; 1124 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); 1125 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, 1126 Op.getValueType(), 1127 Sign, ShAmt)); 1128 } 1129 } 1130 break; 1131 case ISD::ADD: 1132 case ISD::MUL: 1133 case ISD::SUB: { 1134 // Add, Sub, and Mul don't demand any bits in positions beyond that 1135 // of the highest bit demanded of them. 1136 APInt LoMask = APInt::getLowBitsSet(BitWidth, 1137 BitWidth - NewMask.countLeadingZeros()); 1138 if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, 1139 KnownOne2, TLO, Depth+1)) 1140 return true; 1141 if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, 1142 KnownOne2, TLO, Depth+1)) 1143 return true; 1144 // See if the operation should be performed at a smaller bit width. 1145 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) 1146 return true; 1147 } 1148 // FALL THROUGH 1149 default: 1150 // Just use computeKnownBits to compute output bits. 1151 TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); 1152 break; 1153 } 1154 1155 // If we know the value of all of the demanded bits, return this as a 1156 // constant. 1157 if ((NewMask & (KnownZero|KnownOne)) == NewMask) { 1158 // Avoid folding to a constant if any OpaqueConstant is involved. 1159 const SDNode *N = Op.getNode(); 1160 for (SDNodeIterator I = SDNodeIterator::begin(N), 1161 E = SDNodeIterator::end(N); I != E; ++I) { 1162 SDNode *Op = *I; 1163 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 1164 if (C->isOpaque()) 1165 return false; 1166 } 1167 return TLO.CombineTo(Op, 1168 TLO.DAG.getConstant(KnownOne, dl, Op.getValueType())); 1169 } 1170 1171 return false; 1172 } 1173 1174 /// Determine which of the bits specified in Mask are known to be either zero or 1175 /// one and return them in the KnownZero/KnownOne bitsets. 1176 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 1177 APInt &KnownZero, 1178 APInt &KnownOne, 1179 const SelectionDAG &DAG, 1180 unsigned Depth) const { 1181 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1182 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1183 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1184 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1185 "Should use MaskedValueIsZero if you don't know whether Op" 1186 " is a target node!"); 1187 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); 1188 } 1189 1190 /// This method can be implemented by targets that want to expose additional 1191 /// information about sign bits to the DAG Combiner. 1192 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, 1193 const SelectionDAG &, 1194 unsigned Depth) const { 1195 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1196 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1197 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1198 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1199 "Should use ComputeNumSignBits if you don't know whether Op" 1200 " is a target node!"); 1201 return 1; 1202 } 1203 1204 bool TargetLowering::isConstTrueVal(const SDNode *N) const { 1205 if (!N) 1206 return false; 1207 1208 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 1209 if (!CN) { 1210 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); 1211 if (!BV) 1212 return false; 1213 1214 BitVector UndefElements; 1215 CN = BV->getConstantSplatNode(&UndefElements); 1216 // Only interested in constant splats, and we don't try to handle undef 1217 // elements in identifying boolean constants. 1218 if (!CN || UndefElements.none()) 1219 return false; 1220 } 1221 1222 switch (getBooleanContents(N->getValueType(0))) { 1223 case UndefinedBooleanContent: 1224 return CN->getAPIntValue()[0]; 1225 case ZeroOrOneBooleanContent: 1226 return CN->isOne(); 1227 case ZeroOrNegativeOneBooleanContent: 1228 return CN->isAllOnesValue(); 1229 } 1230 1231 llvm_unreachable("Invalid boolean contents"); 1232 } 1233 1234 bool TargetLowering::isConstFalseVal(const SDNode *N) const { 1235 if (!N) 1236 return false; 1237 1238 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); 1239 if (!CN) { 1240 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); 1241 if (!BV) 1242 return false; 1243 1244 BitVector UndefElements; 1245 CN = BV->getConstantSplatNode(&UndefElements); 1246 // Only interested in constant splats, and we don't try to handle undef 1247 // elements in identifying boolean constants. 1248 if (!CN || UndefElements.none()) 1249 return false; 1250 } 1251 1252 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) 1253 return !CN->getAPIntValue()[0]; 1254 1255 return CN->isNullValue(); 1256 } 1257 1258 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, 1259 bool SExt) const { 1260 if (VT == MVT::i1) 1261 return N->isOne(); 1262 1263 TargetLowering::BooleanContent Cnt = getBooleanContents(VT); 1264 switch (Cnt) { 1265 case TargetLowering::ZeroOrOneBooleanContent: 1266 // An extended value of 1 is always true, unless its original type is i1, 1267 // in which case it will be sign extended to -1. 1268 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); 1269 case TargetLowering::UndefinedBooleanContent: 1270 case TargetLowering::ZeroOrNegativeOneBooleanContent: 1271 return N->isAllOnesValue() && SExt; 1272 } 1273 llvm_unreachable("Unexpected enumeration."); 1274 } 1275 1276 /// This helper function of SimplifySetCC tries to optimize the comparison when 1277 /// either operand of the SetCC node is a bitwise-and instruction. 1278 SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, 1279 ISD::CondCode Cond, 1280 DAGCombinerInfo &DCI, 1281 SDLoc DL) const { 1282 // Match these patterns in any of their permutations: 1283 // (X & Y) == Y 1284 // (X & Y) != Y 1285 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) 1286 std::swap(N0, N1); 1287 1288 EVT OpVT = N0.getValueType(); 1289 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || 1290 (Cond != ISD::SETEQ && Cond != ISD::SETNE)) 1291 return SDValue(); 1292 1293 SDValue X, Y; 1294 if (N0.getOperand(0) == N1) { 1295 X = N0.getOperand(1); 1296 Y = N0.getOperand(0); 1297 } else if (N0.getOperand(1) == N1) { 1298 X = N0.getOperand(0); 1299 Y = N0.getOperand(1); 1300 } else { 1301 return SDValue(); 1302 } 1303 1304 SelectionDAG &DAG = DCI.DAG; 1305 SDValue Zero = DAG.getConstant(0, DL, OpVT); 1306 if (DAG.isKnownToBeAPowerOfTwo(Y)) { 1307 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. 1308 // Note that where Y is variable and is known to have at most one bit set 1309 // (for example, if it is Z & 1) we cannot do this; the expressions are not 1310 // equivalent when Y == 0. 1311 Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); 1312 if (DCI.isBeforeLegalizeOps() || 1313 isCondCodeLegal(Cond, N0.getSimpleValueType())) 1314 return DAG.getSetCC(DL, VT, N0, Zero, Cond); 1315 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) { 1316 // If the target supports an 'and-not' or 'and-complement' logic operation, 1317 // try to use that to make a comparison operation more efficient. 1318 // But don't do this transform if the mask is a single bit because there are 1319 // more efficient ways to deal with that case (for example, 'bt' on x86 or 1320 // 'rlwinm' on PPC). 1321 1322 // Bail out if the compare operand that we want to turn into a zero is 1323 // already a zero (otherwise, infinite loop). 1324 auto *YConst = dyn_cast<ConstantSDNode>(Y); 1325 if (YConst && YConst->isNullValue()) 1326 return SDValue(); 1327 1328 // Transform this into: ~X & Y == 0. 1329 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT); 1330 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y); 1331 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond); 1332 } 1333 1334 return SDValue(); 1335 } 1336 1337 /// Try to simplify a setcc built with the specified operands and cc. If it is 1338 /// unable to simplify it, return a null SDValue. 1339 SDValue 1340 TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, 1341 ISD::CondCode Cond, bool foldBooleans, 1342 DAGCombinerInfo &DCI, SDLoc dl) const { 1343 SelectionDAG &DAG = DCI.DAG; 1344 1345 // These setcc operations always fold. 1346 switch (Cond) { 1347 default: break; 1348 case ISD::SETFALSE: 1349 case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); 1350 case ISD::SETTRUE: 1351 case ISD::SETTRUE2: { 1352 TargetLowering::BooleanContent Cnt = 1353 getBooleanContents(N0->getValueType(0)); 1354 return DAG.getConstant( 1355 Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, 1356 VT); 1357 } 1358 } 1359 1360 // Ensure that the constant occurs on the RHS, and fold constant 1361 // comparisons. 1362 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); 1363 if (isa<ConstantSDNode>(N0.getNode()) && 1364 (DCI.isBeforeLegalizeOps() || 1365 isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) 1366 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); 1367 1368 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { 1369 const APInt &C1 = N1C->getAPIntValue(); 1370 1371 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an 1372 // equality comparison, then we're just comparing whether X itself is 1373 // zero. 1374 if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && 1375 N0.getOperand(0).getOpcode() == ISD::CTLZ && 1376 N0.getOperand(1).getOpcode() == ISD::Constant) { 1377 const APInt &ShAmt 1378 = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 1379 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 1380 ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { 1381 if ((C1 == 0) == (Cond == ISD::SETEQ)) { 1382 // (srl (ctlz x), 5) == 0 -> X != 0 1383 // (srl (ctlz x), 5) != 1 -> X != 0 1384 Cond = ISD::SETNE; 1385 } else { 1386 // (srl (ctlz x), 5) != 0 -> X == 0 1387 // (srl (ctlz x), 5) == 1 -> X == 0 1388 Cond = ISD::SETEQ; 1389 } 1390 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); 1391 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), 1392 Zero, Cond); 1393 } 1394 } 1395 1396 SDValue CTPOP = N0; 1397 // Look through truncs that don't change the value of a ctpop. 1398 if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) 1399 CTPOP = N0.getOperand(0); 1400 1401 if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && 1402 (N0 == CTPOP || N0.getValueType().getSizeInBits() > 1403 Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { 1404 EVT CTVT = CTPOP.getValueType(); 1405 SDValue CTOp = CTPOP.getOperand(0); 1406 1407 // (ctpop x) u< 2 -> (x & x-1) == 0 1408 // (ctpop x) u> 1 -> (x & x-1) != 0 1409 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ 1410 SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, 1411 DAG.getConstant(1, dl, CTVT)); 1412 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); 1413 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; 1414 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); 1415 } 1416 1417 // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. 1418 } 1419 1420 // (zext x) == C --> x == (trunc C) 1421 // (sext x) == C --> x == (trunc C) 1422 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 1423 DCI.isBeforeLegalize() && N0->hasOneUse()) { 1424 unsigned MinBits = N0.getValueSizeInBits(); 1425 SDValue PreExt; 1426 bool Signed = false; 1427 if (N0->getOpcode() == ISD::ZERO_EXTEND) { 1428 // ZExt 1429 MinBits = N0->getOperand(0).getValueSizeInBits(); 1430 PreExt = N0->getOperand(0); 1431 } else if (N0->getOpcode() == ISD::AND) { 1432 // DAGCombine turns costly ZExts into ANDs 1433 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) 1434 if ((C->getAPIntValue()+1).isPowerOf2()) { 1435 MinBits = C->getAPIntValue().countTrailingOnes(); 1436 PreExt = N0->getOperand(0); 1437 } 1438 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) { 1439 // SExt 1440 MinBits = N0->getOperand(0).getValueSizeInBits(); 1441 PreExt = N0->getOperand(0); 1442 Signed = true; 1443 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) { 1444 // ZEXTLOAD / SEXTLOAD 1445 if (LN0->getExtensionType() == ISD::ZEXTLOAD) { 1446 MinBits = LN0->getMemoryVT().getSizeInBits(); 1447 PreExt = N0; 1448 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) { 1449 Signed = true; 1450 MinBits = LN0->getMemoryVT().getSizeInBits(); 1451 PreExt = N0; 1452 } 1453 } 1454 1455 // Figure out how many bits we need to preserve this constant. 1456 unsigned ReqdBits = Signed ? 1457 C1.getBitWidth() - C1.getNumSignBits() + 1 : 1458 C1.getActiveBits(); 1459 1460 // Make sure we're not losing bits from the constant. 1461 if (MinBits > 0 && 1462 MinBits < C1.getBitWidth() && 1463 MinBits >= ReqdBits) { 1464 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); 1465 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { 1466 // Will get folded away. 1467 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); 1468 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); 1469 return DAG.getSetCC(dl, VT, Trunc, C, Cond); 1470 } 1471 1472 // If truncating the setcc operands is not desirable, we can still 1473 // simplify the expression in some cases: 1474 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc) 1475 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc)) 1476 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc)) 1477 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc) 1478 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc)) 1479 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc) 1480 SDValue TopSetCC = N0->getOperand(0); 1481 unsigned N0Opc = N0->getOpcode(); 1482 bool SExt = (N0Opc == ISD::SIGN_EXTEND); 1483 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 && 1484 TopSetCC.getOpcode() == ISD::SETCC && 1485 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) && 1486 (isConstFalseVal(N1C) || 1487 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { 1488 1489 bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || 1490 (!N1C->isNullValue() && Cond == ISD::SETNE); 1491 1492 if (!Inverse) 1493 return TopSetCC; 1494 1495 ISD::CondCode InvCond = ISD::getSetCCInverse( 1496 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), 1497 TopSetCC.getOperand(0).getValueType().isInteger()); 1498 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), 1499 TopSetCC.getOperand(1), 1500 InvCond); 1501 1502 } 1503 } 1504 } 1505 1506 // If the LHS is '(and load, const)', the RHS is 0, 1507 // the test is for equality or unsigned, and all 1 bits of the const are 1508 // in the same partial word, see if we can shorten the load. 1509 if (DCI.isBeforeLegalize() && 1510 !ISD::isSignedIntSetCC(Cond) && 1511 N0.getOpcode() == ISD::AND && C1 == 0 && 1512 N0.getNode()->hasOneUse() && 1513 isa<LoadSDNode>(N0.getOperand(0)) && 1514 N0.getOperand(0).getNode()->hasOneUse() && 1515 isa<ConstantSDNode>(N0.getOperand(1))) { 1516 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); 1517 APInt bestMask; 1518 unsigned bestWidth = 0, bestOffset = 0; 1519 if (!Lod->isVolatile() && Lod->isUnindexed()) { 1520 unsigned origWidth = N0.getValueType().getSizeInBits(); 1521 unsigned maskWidth = origWidth; 1522 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 1523 // 8 bits, but have to be careful... 1524 if (Lod->getExtensionType() != ISD::NON_EXTLOAD) 1525 origWidth = Lod->getMemoryVT().getSizeInBits(); 1526 const APInt &Mask = 1527 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 1528 for (unsigned width = origWidth / 2; width>=8; width /= 2) { 1529 APInt newMask = APInt::getLowBitsSet(maskWidth, width); 1530 for (unsigned offset=0; offset<origWidth/width; offset++) { 1531 if ((newMask & Mask) == Mask) { 1532 if (!DAG.getDataLayout().isLittleEndian()) 1533 bestOffset = (origWidth/width - offset - 1) * (width/8); 1534 else 1535 bestOffset = (uint64_t)offset * (width/8); 1536 bestMask = Mask.lshr(offset * (width/8) * 8); 1537 bestWidth = width; 1538 break; 1539 } 1540 newMask = newMask << width; 1541 } 1542 } 1543 } 1544 if (bestWidth) { 1545 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); 1546 if (newVT.isRound()) { 1547 EVT PtrType = Lod->getOperand(1).getValueType(); 1548 SDValue Ptr = Lod->getBasePtr(); 1549 if (bestOffset != 0) 1550 Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), 1551 DAG.getConstant(bestOffset, dl, PtrType)); 1552 unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); 1553 SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, 1554 Lod->getPointerInfo().getWithOffset(bestOffset), 1555 false, false, false, NewAlign); 1556 return DAG.getSetCC(dl, VT, 1557 DAG.getNode(ISD::AND, dl, newVT, NewLoad, 1558 DAG.getConstant(bestMask.trunc(bestWidth), 1559 dl, newVT)), 1560 DAG.getConstant(0LL, dl, newVT), Cond); 1561 } 1562 } 1563 } 1564 1565 // If the LHS is a ZERO_EXTEND, perform the comparison on the input. 1566 if (N0.getOpcode() == ISD::ZERO_EXTEND) { 1567 unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); 1568 1569 // If the comparison constant has bits in the upper part, the 1570 // zero-extended value could never match. 1571 if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), 1572 C1.getBitWidth() - InSize))) { 1573 switch (Cond) { 1574 case ISD::SETUGT: 1575 case ISD::SETUGE: 1576 case ISD::SETEQ: return DAG.getConstant(0, dl, VT); 1577 case ISD::SETULT: 1578 case ISD::SETULE: 1579 case ISD::SETNE: return DAG.getConstant(1, dl, VT); 1580 case ISD::SETGT: 1581 case ISD::SETGE: 1582 // True if the sign bit of C1 is set. 1583 return DAG.getConstant(C1.isNegative(), dl, VT); 1584 case ISD::SETLT: 1585 case ISD::SETLE: 1586 // True if the sign bit of C1 isn't set. 1587 return DAG.getConstant(C1.isNonNegative(), dl, VT); 1588 default: 1589 break; 1590 } 1591 } 1592 1593 // Otherwise, we can perform the comparison with the low bits. 1594 switch (Cond) { 1595 case ISD::SETEQ: 1596 case ISD::SETNE: 1597 case ISD::SETUGT: 1598 case ISD::SETUGE: 1599 case ISD::SETULT: 1600 case ISD::SETULE: { 1601 EVT newVT = N0.getOperand(0).getValueType(); 1602 if (DCI.isBeforeLegalizeOps() || 1603 (isOperationLegal(ISD::SETCC, newVT) && 1604 getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { 1605 EVT NewSetCCVT = 1606 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); 1607 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); 1608 1609 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), 1610 NewConst, Cond); 1611 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); 1612 } 1613 break; 1614 } 1615 default: 1616 break; // todo, be more careful with signed comparisons 1617 } 1618 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 1619 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { 1620 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); 1621 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); 1622 EVT ExtDstTy = N0.getValueType(); 1623 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); 1624 1625 // If the constant doesn't fit into the number of bits for the source of 1626 // the sign extension, it is impossible for both sides to be equal. 1627 if (C1.getMinSignedBits() > ExtSrcTyBits) 1628 return DAG.getConstant(Cond == ISD::SETNE, dl, VT); 1629 1630 SDValue ZextOp; 1631 EVT Op0Ty = N0.getOperand(0).getValueType(); 1632 if (Op0Ty == ExtSrcTy) { 1633 ZextOp = N0.getOperand(0); 1634 } else { 1635 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); 1636 ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), 1637 DAG.getConstant(Imm, dl, Op0Ty)); 1638 } 1639 if (!DCI.isCalledByLegalizer()) 1640 DCI.AddToWorklist(ZextOp.getNode()); 1641 // Otherwise, make this a use of a zext. 1642 return DAG.getSetCC(dl, VT, ZextOp, 1643 DAG.getConstant(C1 & APInt::getLowBitsSet( 1644 ExtDstTyBits, 1645 ExtSrcTyBits), 1646 dl, ExtDstTy), 1647 Cond); 1648 } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && 1649 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { 1650 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC 1651 if (N0.getOpcode() == ISD::SETCC && 1652 isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { 1653 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); 1654 if (TrueWhenTrue) 1655 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); 1656 // Invert the condition. 1657 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 1658 CC = ISD::getSetCCInverse(CC, 1659 N0.getOperand(0).getValueType().isInteger()); 1660 if (DCI.isBeforeLegalizeOps() || 1661 isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) 1662 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); 1663 } 1664 1665 if ((N0.getOpcode() == ISD::XOR || 1666 (N0.getOpcode() == ISD::AND && 1667 N0.getOperand(0).getOpcode() == ISD::XOR && 1668 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && 1669 isa<ConstantSDNode>(N0.getOperand(1)) && 1670 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { 1671 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We 1672 // can only do this if the top bits are known zero. 1673 unsigned BitWidth = N0.getValueSizeInBits(); 1674 if (DAG.MaskedValueIsZero(N0, 1675 APInt::getHighBitsSet(BitWidth, 1676 BitWidth-1))) { 1677 // Okay, get the un-inverted input value. 1678 SDValue Val; 1679 if (N0.getOpcode() == ISD::XOR) 1680 Val = N0.getOperand(0); 1681 else { 1682 assert(N0.getOpcode() == ISD::AND && 1683 N0.getOperand(0).getOpcode() == ISD::XOR); 1684 // ((X^1)&1)^1 -> X & 1 1685 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), 1686 N0.getOperand(0).getOperand(0), 1687 N0.getOperand(1)); 1688 } 1689 1690 return DAG.getSetCC(dl, VT, Val, N1, 1691 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); 1692 } 1693 } else if (N1C->getAPIntValue() == 1 && 1694 (VT == MVT::i1 || 1695 getBooleanContents(N0->getValueType(0)) == 1696 ZeroOrOneBooleanContent)) { 1697 SDValue Op0 = N0; 1698 if (Op0.getOpcode() == ISD::TRUNCATE) 1699 Op0 = Op0.getOperand(0); 1700 1701 if ((Op0.getOpcode() == ISD::XOR) && 1702 Op0.getOperand(0).getOpcode() == ISD::SETCC && 1703 Op0.getOperand(1).getOpcode() == ISD::SETCC) { 1704 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) 1705 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; 1706 return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), 1707 Cond); 1708 } 1709 if (Op0.getOpcode() == ISD::AND && 1710 isa<ConstantSDNode>(Op0.getOperand(1)) && 1711 cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { 1712 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. 1713 if (Op0.getValueType().bitsGT(VT)) 1714 Op0 = DAG.getNode(ISD::AND, dl, VT, 1715 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), 1716 DAG.getConstant(1, dl, VT)); 1717 else if (Op0.getValueType().bitsLT(VT)) 1718 Op0 = DAG.getNode(ISD::AND, dl, VT, 1719 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), 1720 DAG.getConstant(1, dl, VT)); 1721 1722 return DAG.getSetCC(dl, VT, Op0, 1723 DAG.getConstant(0, dl, Op0.getValueType()), 1724 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); 1725 } 1726 if (Op0.getOpcode() == ISD::AssertZext && 1727 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) 1728 return DAG.getSetCC(dl, VT, Op0, 1729 DAG.getConstant(0, dl, Op0.getValueType()), 1730 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); 1731 } 1732 } 1733 1734 APInt MinVal, MaxVal; 1735 unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); 1736 if (ISD::isSignedIntSetCC(Cond)) { 1737 MinVal = APInt::getSignedMinValue(OperandBitSize); 1738 MaxVal = APInt::getSignedMaxValue(OperandBitSize); 1739 } else { 1740 MinVal = APInt::getMinValue(OperandBitSize); 1741 MaxVal = APInt::getMaxValue(OperandBitSize); 1742 } 1743 1744 // Canonicalize GE/LE comparisons to use GT/LT comparisons. 1745 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { 1746 if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true 1747 // X >= C0 --> X > (C0 - 1) 1748 APInt C = C1 - 1; 1749 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; 1750 if ((DCI.isBeforeLegalizeOps() || 1751 isCondCodeLegal(NewCC, VT.getSimpleVT())) && 1752 (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && 1753 isLegalICmpImmediate(C.getSExtValue())))) { 1754 return DAG.getSetCC(dl, VT, N0, 1755 DAG.getConstant(C, dl, N1.getValueType()), 1756 NewCC); 1757 } 1758 } 1759 1760 if (Cond == ISD::SETLE || Cond == ISD::SETULE) { 1761 if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true 1762 // X <= C0 --> X < (C0 + 1) 1763 APInt C = C1 + 1; 1764 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; 1765 if ((DCI.isBeforeLegalizeOps() || 1766 isCondCodeLegal(NewCC, VT.getSimpleVT())) && 1767 (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && 1768 isLegalICmpImmediate(C.getSExtValue())))) { 1769 return DAG.getSetCC(dl, VT, N0, 1770 DAG.getConstant(C, dl, N1.getValueType()), 1771 NewCC); 1772 } 1773 } 1774 1775 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) 1776 return DAG.getConstant(0, dl, VT); // X < MIN --> false 1777 if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) 1778 return DAG.getConstant(1, dl, VT); // X >= MIN --> true 1779 if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) 1780 return DAG.getConstant(0, dl, VT); // X > MAX --> false 1781 if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) 1782 return DAG.getConstant(1, dl, VT); // X <= MAX --> true 1783 1784 // Canonicalize setgt X, Min --> setne X, Min 1785 if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) 1786 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); 1787 // Canonicalize setlt X, Max --> setne X, Max 1788 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) 1789 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); 1790 1791 // If we have setult X, 1, turn it into seteq X, 0 1792 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) 1793 return DAG.getSetCC(dl, VT, N0, 1794 DAG.getConstant(MinVal, dl, N0.getValueType()), 1795 ISD::SETEQ); 1796 // If we have setugt X, Max-1, turn it into seteq X, Max 1797 if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) 1798 return DAG.getSetCC(dl, VT, N0, 1799 DAG.getConstant(MaxVal, dl, N0.getValueType()), 1800 ISD::SETEQ); 1801 1802 // If we have "setcc X, C0", check to see if we can shrink the immediate 1803 // by changing cc. 1804 1805 // SETUGT X, SINTMAX -> SETLT X, 0 1806 if (Cond == ISD::SETUGT && 1807 C1 == APInt::getSignedMaxValue(OperandBitSize)) 1808 return DAG.getSetCC(dl, VT, N0, 1809 DAG.getConstant(0, dl, N1.getValueType()), 1810 ISD::SETLT); 1811 1812 // SETULT X, SINTMIN -> SETGT X, -1 1813 if (Cond == ISD::SETULT && 1814 C1 == APInt::getSignedMinValue(OperandBitSize)) { 1815 SDValue ConstMinusOne = 1816 DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, 1817 N1.getValueType()); 1818 return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); 1819 } 1820 1821 // Fold bit comparisons when we can. 1822 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 1823 (VT == N0.getValueType() || 1824 (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && 1825 N0.getOpcode() == ISD::AND) { 1826 auto &DL = DAG.getDataLayout(); 1827 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 1828 EVT ShiftTy = DCI.isBeforeLegalize() 1829 ? getPointerTy(DL) 1830 : getShiftAmountTy(N0.getValueType(), DL); 1831 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 1832 // Perform the xform if the AND RHS is a single bit. 1833 if (AndRHS->getAPIntValue().isPowerOf2()) { 1834 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1835 DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, 1836 DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, 1837 ShiftTy))); 1838 } 1839 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { 1840 // (X & 8) == 8 --> (X & 8) >> 3 1841 // Perform the xform if C1 is a single bit. 1842 if (C1.isPowerOf2()) { 1843 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1844 DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, 1845 DAG.getConstant(C1.logBase2(), dl, 1846 ShiftTy))); 1847 } 1848 } 1849 } 1850 } 1851 1852 if (C1.getMinSignedBits() <= 64 && 1853 !isLegalICmpImmediate(C1.getSExtValue())) { 1854 // (X & -256) == 256 -> (X >> 8) == 1 1855 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 1856 N0.getOpcode() == ISD::AND && N0.hasOneUse()) { 1857 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 1858 const APInt &AndRHSC = AndRHS->getAPIntValue(); 1859 if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { 1860 unsigned ShiftBits = AndRHSC.countTrailingZeros(); 1861 auto &DL = DAG.getDataLayout(); 1862 EVT ShiftTy = DCI.isBeforeLegalize() 1863 ? getPointerTy(DL) 1864 : getShiftAmountTy(N0.getValueType(), DL); 1865 EVT CmpTy = N0.getValueType(); 1866 SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), 1867 DAG.getConstant(ShiftBits, dl, 1868 ShiftTy)); 1869 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); 1870 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); 1871 } 1872 } 1873 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || 1874 Cond == ISD::SETULE || Cond == ISD::SETUGT) { 1875 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); 1876 // X < 0x100000000 -> (X >> 32) < 1 1877 // X >= 0x100000000 -> (X >> 32) >= 1 1878 // X <= 0x0ffffffff -> (X >> 32) < 1 1879 // X > 0x0ffffffff -> (X >> 32) >= 1 1880 unsigned ShiftBits; 1881 APInt NewC = C1; 1882 ISD::CondCode NewCond = Cond; 1883 if (AdjOne) { 1884 ShiftBits = C1.countTrailingOnes(); 1885 NewC = NewC + 1; 1886 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 1887 } else { 1888 ShiftBits = C1.countTrailingZeros(); 1889 } 1890 NewC = NewC.lshr(ShiftBits); 1891 if (ShiftBits && NewC.getMinSignedBits() <= 64 && 1892 isLegalICmpImmediate(NewC.getSExtValue())) { 1893 auto &DL = DAG.getDataLayout(); 1894 EVT ShiftTy = DCI.isBeforeLegalize() 1895 ? getPointerTy(DL) 1896 : getShiftAmountTy(N0.getValueType(), DL); 1897 EVT CmpTy = N0.getValueType(); 1898 SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, 1899 DAG.getConstant(ShiftBits, dl, ShiftTy)); 1900 SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); 1901 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); 1902 } 1903 } 1904 } 1905 } 1906 1907 if (isa<ConstantFPSDNode>(N0.getNode())) { 1908 // Constant fold or commute setcc. 1909 SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); 1910 if (O.getNode()) return O; 1911 } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { 1912 // If the RHS of an FP comparison is a constant, simplify it away in 1913 // some cases. 1914 if (CFP->getValueAPF().isNaN()) { 1915 // If an operand is known to be a nan, we can fold it. 1916 switch (ISD::getUnorderedFlavor(Cond)) { 1917 default: llvm_unreachable("Unknown flavor!"); 1918 case 0: // Known false. 1919 return DAG.getConstant(0, dl, VT); 1920 case 1: // Known true. 1921 return DAG.getConstant(1, dl, VT); 1922 case 2: // Undefined. 1923 return DAG.getUNDEF(VT); 1924 } 1925 } 1926 1927 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the 1928 // constant if knowing that the operand is non-nan is enough. We prefer to 1929 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to 1930 // materialize 0.0. 1931 if (Cond == ISD::SETO || Cond == ISD::SETUO) 1932 return DAG.getSetCC(dl, VT, N0, N0, Cond); 1933 1934 // If the condition is not legal, see if we can find an equivalent one 1935 // which is legal. 1936 if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { 1937 // If the comparison was an awkward floating-point == or != and one of 1938 // the comparison operands is infinity or negative infinity, convert the 1939 // condition to a less-awkward <= or >=. 1940 if (CFP->getValueAPF().isInfinity()) { 1941 if (CFP->getValueAPF().isNegative()) { 1942 if (Cond == ISD::SETOEQ && 1943 isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) 1944 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); 1945 if (Cond == ISD::SETUEQ && 1946 isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) 1947 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); 1948 if (Cond == ISD::SETUNE && 1949 isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) 1950 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); 1951 if (Cond == ISD::SETONE && 1952 isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) 1953 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); 1954 } else { 1955 if (Cond == ISD::SETOEQ && 1956 isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) 1957 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); 1958 if (Cond == ISD::SETUEQ && 1959 isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) 1960 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); 1961 if (Cond == ISD::SETUNE && 1962 isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) 1963 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); 1964 if (Cond == ISD::SETONE && 1965 isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) 1966 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); 1967 } 1968 } 1969 } 1970 } 1971 1972 if (N0 == N1) { 1973 // The sext(setcc()) => setcc() optimization relies on the appropriate 1974 // constant being emitted. 1975 uint64_t EqVal = 0; 1976 switch (getBooleanContents(N0.getValueType())) { 1977 case UndefinedBooleanContent: 1978 case ZeroOrOneBooleanContent: 1979 EqVal = ISD::isTrueWhenEqual(Cond); 1980 break; 1981 case ZeroOrNegativeOneBooleanContent: 1982 EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; 1983 break; 1984 } 1985 1986 // We can always fold X == X for integer setcc's. 1987 if (N0.getValueType().isInteger()) { 1988 return DAG.getConstant(EqVal, dl, VT); 1989 } 1990 unsigned UOF = ISD::getUnorderedFlavor(Cond); 1991 if (UOF == 2) // FP operators that are undefined on NaNs. 1992 return DAG.getConstant(EqVal, dl, VT); 1993 if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) 1994 return DAG.getConstant(EqVal, dl, VT); 1995 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO 1996 // if it is not already. 1997 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; 1998 if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || 1999 getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) 2000 return DAG.getSetCC(dl, VT, N0, N1, NewCond); 2001 } 2002 2003 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 2004 N0.getValueType().isInteger()) { 2005 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || 2006 N0.getOpcode() == ISD::XOR) { 2007 // Simplify (X+Y) == (X+Z) --> Y == Z 2008 if (N0.getOpcode() == N1.getOpcode()) { 2009 if (N0.getOperand(0) == N1.getOperand(0)) 2010 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); 2011 if (N0.getOperand(1) == N1.getOperand(1)) 2012 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); 2013 if (DAG.isCommutativeBinOp(N0.getOpcode())) { 2014 // If X op Y == Y op X, try other combinations. 2015 if (N0.getOperand(0) == N1.getOperand(1)) 2016 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), 2017 Cond); 2018 if (N0.getOperand(1) == N1.getOperand(0)) 2019 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), 2020 Cond); 2021 } 2022 } 2023 2024 // If RHS is a legal immediate value for a compare instruction, we need 2025 // to be careful about increasing register pressure needlessly. 2026 bool LegalRHSImm = false; 2027 2028 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { 2029 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2030 // Turn (X+C1) == C2 --> X == C2-C1 2031 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { 2032 return DAG.getSetCC(dl, VT, N0.getOperand(0), 2033 DAG.getConstant(RHSC->getAPIntValue()- 2034 LHSR->getAPIntValue(), 2035 dl, N0.getValueType()), Cond); 2036 } 2037 2038 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. 2039 if (N0.getOpcode() == ISD::XOR) 2040 // If we know that all of the inverted bits are zero, don't bother 2041 // performing the inversion. 2042 if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) 2043 return 2044 DAG.getSetCC(dl, VT, N0.getOperand(0), 2045 DAG.getConstant(LHSR->getAPIntValue() ^ 2046 RHSC->getAPIntValue(), 2047 dl, N0.getValueType()), 2048 Cond); 2049 } 2050 2051 // Turn (C1-X) == C2 --> X == C1-C2 2052 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { 2053 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { 2054 return 2055 DAG.getSetCC(dl, VT, N0.getOperand(1), 2056 DAG.getConstant(SUBC->getAPIntValue() - 2057 RHSC->getAPIntValue(), 2058 dl, N0.getValueType()), 2059 Cond); 2060 } 2061 } 2062 2063 // Could RHSC fold directly into a compare? 2064 if (RHSC->getValueType(0).getSizeInBits() <= 64) 2065 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); 2066 } 2067 2068 // Simplify (X+Z) == X --> Z == 0 2069 // Don't do this if X is an immediate that can fold into a cmp 2070 // instruction and X+Z has other uses. It could be an induction variable 2071 // chain, and the transform would increase register pressure. 2072 if (!LegalRHSImm || N0.getNode()->hasOneUse()) { 2073 if (N0.getOperand(0) == N1) 2074 return DAG.getSetCC(dl, VT, N0.getOperand(1), 2075 DAG.getConstant(0, dl, N0.getValueType()), Cond); 2076 if (N0.getOperand(1) == N1) { 2077 if (DAG.isCommutativeBinOp(N0.getOpcode())) 2078 return DAG.getSetCC(dl, VT, N0.getOperand(0), 2079 DAG.getConstant(0, dl, N0.getValueType()), 2080 Cond); 2081 if (N0.getNode()->hasOneUse()) { 2082 assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); 2083 auto &DL = DAG.getDataLayout(); 2084 // (Z-X) == X --> Z == X<<1 2085 SDValue SH = DAG.getNode( 2086 ISD::SHL, dl, N1.getValueType(), N1, 2087 DAG.getConstant(1, dl, 2088 getShiftAmountTy(N1.getValueType(), DL))); 2089 if (!DCI.isCalledByLegalizer()) 2090 DCI.AddToWorklist(SH.getNode()); 2091 return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); 2092 } 2093 } 2094 } 2095 } 2096 2097 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || 2098 N1.getOpcode() == ISD::XOR) { 2099 // Simplify X == (X+Z) --> Z == 0 2100 if (N1.getOperand(0) == N0) 2101 return DAG.getSetCC(dl, VT, N1.getOperand(1), 2102 DAG.getConstant(0, dl, N1.getValueType()), Cond); 2103 if (N1.getOperand(1) == N0) { 2104 if (DAG.isCommutativeBinOp(N1.getOpcode())) 2105 return DAG.getSetCC(dl, VT, N1.getOperand(0), 2106 DAG.getConstant(0, dl, N1.getValueType()), Cond); 2107 if (N1.getNode()->hasOneUse()) { 2108 assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); 2109 auto &DL = DAG.getDataLayout(); 2110 // X == (Z-X) --> X<<1 == Z 2111 SDValue SH = DAG.getNode( 2112 ISD::SHL, dl, N1.getValueType(), N0, 2113 DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL))); 2114 if (!DCI.isCalledByLegalizer()) 2115 DCI.AddToWorklist(SH.getNode()); 2116 return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); 2117 } 2118 } 2119 } 2120 2121 if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl)) 2122 return V; 2123 } 2124 2125 // Fold away ALL boolean setcc's. 2126 SDValue Temp; 2127 if (N0.getValueType() == MVT::i1 && foldBooleans) { 2128 switch (Cond) { 2129 default: llvm_unreachable("Unknown integer setcc!"); 2130 case ISD::SETEQ: // X == Y -> ~(X^Y) 2131 Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); 2132 N0 = DAG.getNOT(dl, Temp, MVT::i1); 2133 if (!DCI.isCalledByLegalizer()) 2134 DCI.AddToWorklist(Temp.getNode()); 2135 break; 2136 case ISD::SETNE: // X != Y --> (X^Y) 2137 N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); 2138 break; 2139 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y 2140 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y 2141 Temp = DAG.getNOT(dl, N0, MVT::i1); 2142 N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); 2143 if (!DCI.isCalledByLegalizer()) 2144 DCI.AddToWorklist(Temp.getNode()); 2145 break; 2146 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X 2147 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X 2148 Temp = DAG.getNOT(dl, N1, MVT::i1); 2149 N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); 2150 if (!DCI.isCalledByLegalizer()) 2151 DCI.AddToWorklist(Temp.getNode()); 2152 break; 2153 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y 2154 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y 2155 Temp = DAG.getNOT(dl, N0, MVT::i1); 2156 N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); 2157 if (!DCI.isCalledByLegalizer()) 2158 DCI.AddToWorklist(Temp.getNode()); 2159 break; 2160 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X 2161 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X 2162 Temp = DAG.getNOT(dl, N1, MVT::i1); 2163 N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); 2164 break; 2165 } 2166 if (VT != MVT::i1) { 2167 if (!DCI.isCalledByLegalizer()) 2168 DCI.AddToWorklist(N0.getNode()); 2169 // FIXME: If running after legalize, we probably can't do this. 2170 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); 2171 } 2172 return N0; 2173 } 2174 2175 // Could not fold it. 2176 return SDValue(); 2177 } 2178 2179 /// Returns true (and the GlobalValue and the offset) if the node is a 2180 /// GlobalAddress + offset. 2181 bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, 2182 int64_t &Offset) const { 2183 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) { 2184 GA = GASD->getGlobal(); 2185 Offset += GASD->getOffset(); 2186 return true; 2187 } 2188 2189 if (N->getOpcode() == ISD::ADD) { 2190 SDValue N1 = N->getOperand(0); 2191 SDValue N2 = N->getOperand(1); 2192 if (isGAPlusOffset(N1.getNode(), GA, Offset)) { 2193 if (auto *V = dyn_cast<ConstantSDNode>(N2)) { 2194 Offset += V->getSExtValue(); 2195 return true; 2196 } 2197 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { 2198 if (auto *V = dyn_cast<ConstantSDNode>(N1)) { 2199 Offset += V->getSExtValue(); 2200 return true; 2201 } 2202 } 2203 } 2204 2205 return false; 2206 } 2207 2208 SDValue TargetLowering::PerformDAGCombine(SDNode *N, 2209 DAGCombinerInfo &DCI) const { 2210 // Default implementation: no optimization. 2211 return SDValue(); 2212 } 2213 2214 //===----------------------------------------------------------------------===// 2215 // Inline Assembler Implementation Methods 2216 //===----------------------------------------------------------------------===// 2217 2218 TargetLowering::ConstraintType 2219 TargetLowering::getConstraintType(StringRef Constraint) const { 2220 unsigned S = Constraint.size(); 2221 2222 if (S == 1) { 2223 switch (Constraint[0]) { 2224 default: break; 2225 case 'r': return C_RegisterClass; 2226 case 'm': // memory 2227 case 'o': // offsetable 2228 case 'V': // not offsetable 2229 return C_Memory; 2230 case 'i': // Simple Integer or Relocatable Constant 2231 case 'n': // Simple Integer 2232 case 'E': // Floating Point Constant 2233 case 'F': // Floating Point Constant 2234 case 's': // Relocatable Constant 2235 case 'p': // Address. 2236 case 'X': // Allow ANY value. 2237 case 'I': // Target registers. 2238 case 'J': 2239 case 'K': 2240 case 'L': 2241 case 'M': 2242 case 'N': 2243 case 'O': 2244 case 'P': 2245 case '<': 2246 case '>': 2247 return C_Other; 2248 } 2249 } 2250 2251 if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { 2252 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}" 2253 return C_Memory; 2254 return C_Register; 2255 } 2256 return C_Unknown; 2257 } 2258 2259 /// Try to replace an X constraint, which matches anything, with another that 2260 /// has more specific requirements based on the type of the corresponding 2261 /// operand. 2262 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ 2263 if (ConstraintVT.isInteger()) 2264 return "r"; 2265 if (ConstraintVT.isFloatingPoint()) 2266 return "f"; // works for many targets 2267 return nullptr; 2268 } 2269 2270 /// Lower the specified operand into the Ops vector. 2271 /// If it is invalid, don't add anything to Ops. 2272 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, 2273 std::string &Constraint, 2274 std::vector<SDValue> &Ops, 2275 SelectionDAG &DAG) const { 2276 2277 if (Constraint.length() > 1) return; 2278 2279 char ConstraintLetter = Constraint[0]; 2280 switch (ConstraintLetter) { 2281 default: break; 2282 case 'X': // Allows any operand; labels (basic block) use this. 2283 if (Op.getOpcode() == ISD::BasicBlock) { 2284 Ops.push_back(Op); 2285 return; 2286 } 2287 // fall through 2288 case 'i': // Simple Integer or Relocatable Constant 2289 case 'n': // Simple Integer 2290 case 's': { // Relocatable Constant 2291 // These operands are interested in values of the form (GV+C), where C may 2292 // be folded in as an offset of GV, or it may be explicitly added. Also, it 2293 // is possible and fine if either GV or C are missing. 2294 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2295 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 2296 2297 // If we have "(add GV, C)", pull out GV/C 2298 if (Op.getOpcode() == ISD::ADD) { 2299 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 2300 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 2301 if (!C || !GA) { 2302 C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); 2303 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); 2304 } 2305 if (!C || !GA) { 2306 C = nullptr; 2307 GA = nullptr; 2308 } 2309 } 2310 2311 // If we find a valid operand, map to the TargetXXX version so that the 2312 // value itself doesn't get selected. 2313 if (GA) { // Either &GV or &GV+C 2314 if (ConstraintLetter != 'n') { 2315 int64_t Offs = GA->getOffset(); 2316 if (C) Offs += C->getZExtValue(); 2317 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), 2318 C ? SDLoc(C) : SDLoc(), 2319 Op.getValueType(), Offs)); 2320 } 2321 return; 2322 } 2323 if (C) { // just C, no GV. 2324 // Simple constants are not allowed for 's'. 2325 if (ConstraintLetter != 's') { 2326 // gcc prints these as sign extended. Sign extend value to 64 bits 2327 // now; without this it would get ZExt'd later in 2328 // ScheduleDAGSDNodes::EmitNode, which is very generic. 2329 Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), 2330 SDLoc(C), MVT::i64)); 2331 } 2332 return; 2333 } 2334 break; 2335 } 2336 } 2337 } 2338 2339 std::pair<unsigned, const TargetRegisterClass *> 2340 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, 2341 StringRef Constraint, 2342 MVT VT) const { 2343 if (Constraint.empty() || Constraint[0] != '{') 2344 return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); 2345 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); 2346 2347 // Remove the braces from around the name. 2348 StringRef RegName(Constraint.data()+1, Constraint.size()-2); 2349 2350 std::pair<unsigned, const TargetRegisterClass*> R = 2351 std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); 2352 2353 // Figure out which register class contains this reg. 2354 for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), 2355 E = RI->regclass_end(); RCI != E; ++RCI) { 2356 const TargetRegisterClass *RC = *RCI; 2357 2358 // If none of the value types for this register class are valid, we 2359 // can't use it. For example, 64-bit reg classes on 32-bit targets. 2360 if (!isLegalRC(RC)) 2361 continue; 2362 2363 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 2364 I != E; ++I) { 2365 if (RegName.equals_lower(RI->getRegAsmName(*I))) { 2366 std::pair<unsigned, const TargetRegisterClass*> S = 2367 std::make_pair(*I, RC); 2368 2369 // If this register class has the requested value type, return it, 2370 // otherwise keep searching and return the first class found 2371 // if no other is found which explicitly has the requested type. 2372 if (RC->hasType(VT)) 2373 return S; 2374 else if (!R.second) 2375 R = S; 2376 } 2377 } 2378 } 2379 2380 return R; 2381 } 2382 2383 //===----------------------------------------------------------------------===// 2384 // Constraint Selection. 2385 2386 /// Return true of this is an input operand that is a matching constraint like 2387 /// "4". 2388 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { 2389 assert(!ConstraintCode.empty() && "No known constraint!"); 2390 return isdigit(static_cast<unsigned char>(ConstraintCode[0])); 2391 } 2392 2393 /// If this is an input matching constraint, this method returns the output 2394 /// operand it matches. 2395 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { 2396 assert(!ConstraintCode.empty() && "No known constraint!"); 2397 return atoi(ConstraintCode.c_str()); 2398 } 2399 2400 /// Split up the constraint string from the inline assembly value into the 2401 /// specific constraints and their prefixes, and also tie in the associated 2402 /// operand values. 2403 /// If this returns an empty vector, and if the constraint string itself 2404 /// isn't empty, there was an error parsing. 2405 TargetLowering::AsmOperandInfoVector 2406 TargetLowering::ParseConstraints(const DataLayout &DL, 2407 const TargetRegisterInfo *TRI, 2408 ImmutableCallSite CS) const { 2409 /// Information about all of the constraints. 2410 AsmOperandInfoVector ConstraintOperands; 2411 const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 2412 unsigned maCount = 0; // Largest number of multiple alternative constraints. 2413 2414 // Do a prepass over the constraints, canonicalizing them, and building up the 2415 // ConstraintOperands list. 2416 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 2417 unsigned ResNo = 0; // ResNo - The result number of the next output. 2418 2419 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { 2420 ConstraintOperands.emplace_back(std::move(CI)); 2421 AsmOperandInfo &OpInfo = ConstraintOperands.back(); 2422 2423 // Update multiple alternative constraint count. 2424 if (OpInfo.multipleAlternatives.size() > maCount) 2425 maCount = OpInfo.multipleAlternatives.size(); 2426 2427 OpInfo.ConstraintVT = MVT::Other; 2428 2429 // Compute the value type for each operand. 2430 switch (OpInfo.Type) { 2431 case InlineAsm::isOutput: 2432 // Indirect outputs just consume an argument. 2433 if (OpInfo.isIndirect) { 2434 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 2435 break; 2436 } 2437 2438 // The return value of the call is this value. As such, there is no 2439 // corresponding argument. 2440 assert(!CS.getType()->isVoidTy() && 2441 "Bad inline asm!"); 2442 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { 2443 OpInfo.ConstraintVT = 2444 getSimpleValueType(DL, STy->getElementType(ResNo)); 2445 } else { 2446 assert(ResNo == 0 && "Asm only has one result!"); 2447 OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType()); 2448 } 2449 ++ResNo; 2450 break; 2451 case InlineAsm::isInput: 2452 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 2453 break; 2454 case InlineAsm::isClobber: 2455 // Nothing to do. 2456 break; 2457 } 2458 2459 if (OpInfo.CallOperandVal) { 2460 llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); 2461 if (OpInfo.isIndirect) { 2462 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); 2463 if (!PtrTy) 2464 report_fatal_error("Indirect operand for inline asm not a pointer!"); 2465 OpTy = PtrTy->getElementType(); 2466 } 2467 2468 // Look for vector wrapped in a struct. e.g. { <16 x i8> }. 2469 if (StructType *STy = dyn_cast<StructType>(OpTy)) 2470 if (STy->getNumElements() == 1) 2471 OpTy = STy->getElementType(0); 2472 2473 // If OpTy is not a single value, it may be a struct/union that we 2474 // can tile with integers. 2475 if (!OpTy->isSingleValueType() && OpTy->isSized()) { 2476 unsigned BitSize = DL.getTypeSizeInBits(OpTy); 2477 switch (BitSize) { 2478 default: break; 2479 case 1: 2480 case 8: 2481 case 16: 2482 case 32: 2483 case 64: 2484 case 128: 2485 OpInfo.ConstraintVT = 2486 MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); 2487 break; 2488 } 2489 } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { 2490 unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace()); 2491 OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); 2492 } else { 2493 OpInfo.ConstraintVT = MVT::getVT(OpTy, true); 2494 } 2495 } 2496 } 2497 2498 // If we have multiple alternative constraints, select the best alternative. 2499 if (!ConstraintOperands.empty()) { 2500 if (maCount) { 2501 unsigned bestMAIndex = 0; 2502 int bestWeight = -1; 2503 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match. 2504 int weight = -1; 2505 unsigned maIndex; 2506 // Compute the sums of the weights for each alternative, keeping track 2507 // of the best (highest weight) one so far. 2508 for (maIndex = 0; maIndex < maCount; ++maIndex) { 2509 int weightSum = 0; 2510 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); 2511 cIndex != eIndex; ++cIndex) { 2512 AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; 2513 if (OpInfo.Type == InlineAsm::isClobber) 2514 continue; 2515 2516 // If this is an output operand with a matching input operand, 2517 // look up the matching input. If their types mismatch, e.g. one 2518 // is an integer, the other is floating point, or their sizes are 2519 // different, flag it as an maCantMatch. 2520 if (OpInfo.hasMatchingInput()) { 2521 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; 2522 if (OpInfo.ConstraintVT != Input.ConstraintVT) { 2523 if ((OpInfo.ConstraintVT.isInteger() != 2524 Input.ConstraintVT.isInteger()) || 2525 (OpInfo.ConstraintVT.getSizeInBits() != 2526 Input.ConstraintVT.getSizeInBits())) { 2527 weightSum = -1; // Can't match. 2528 break; 2529 } 2530 } 2531 } 2532 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); 2533 if (weight == -1) { 2534 weightSum = -1; 2535 break; 2536 } 2537 weightSum += weight; 2538 } 2539 // Update best. 2540 if (weightSum > bestWeight) { 2541 bestWeight = weightSum; 2542 bestMAIndex = maIndex; 2543 } 2544 } 2545 2546 // Now select chosen alternative in each constraint. 2547 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); 2548 cIndex != eIndex; ++cIndex) { 2549 AsmOperandInfo& cInfo = ConstraintOperands[cIndex]; 2550 if (cInfo.Type == InlineAsm::isClobber) 2551 continue; 2552 cInfo.selectAlternative(bestMAIndex); 2553 } 2554 } 2555 } 2556 2557 // Check and hook up tied operands, choose constraint code to use. 2558 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); 2559 cIndex != eIndex; ++cIndex) { 2560 AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; 2561 2562 // If this is an output operand with a matching input operand, look up the 2563 // matching input. If their types mismatch, e.g. one is an integer, the 2564 // other is floating point, or their sizes are different, flag it as an 2565 // error. 2566 if (OpInfo.hasMatchingInput()) { 2567 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; 2568 2569 if (OpInfo.ConstraintVT != Input.ConstraintVT) { 2570 std::pair<unsigned, const TargetRegisterClass *> MatchRC = 2571 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, 2572 OpInfo.ConstraintVT); 2573 std::pair<unsigned, const TargetRegisterClass *> InputRC = 2574 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, 2575 Input.ConstraintVT); 2576 if ((OpInfo.ConstraintVT.isInteger() != 2577 Input.ConstraintVT.isInteger()) || 2578 (MatchRC.second != InputRC.second)) { 2579 report_fatal_error("Unsupported asm: input constraint" 2580 " with a matching output constraint of" 2581 " incompatible type!"); 2582 } 2583 } 2584 } 2585 } 2586 2587 return ConstraintOperands; 2588 } 2589 2590 /// Return an integer indicating how general CT is. 2591 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { 2592 switch (CT) { 2593 case TargetLowering::C_Other: 2594 case TargetLowering::C_Unknown: 2595 return 0; 2596 case TargetLowering::C_Register: 2597 return 1; 2598 case TargetLowering::C_RegisterClass: 2599 return 2; 2600 case TargetLowering::C_Memory: 2601 return 3; 2602 } 2603 llvm_unreachable("Invalid constraint type"); 2604 } 2605 2606 /// Examine constraint type and operand type and determine a weight value. 2607 /// This object must already have been set up with the operand type 2608 /// and the current alternative constraint selected. 2609 TargetLowering::ConstraintWeight 2610 TargetLowering::getMultipleConstraintMatchWeight( 2611 AsmOperandInfo &info, int maIndex) const { 2612 InlineAsm::ConstraintCodeVector *rCodes; 2613 if (maIndex >= (int)info.multipleAlternatives.size()) 2614 rCodes = &info.Codes; 2615 else 2616 rCodes = &info.multipleAlternatives[maIndex].Codes; 2617 ConstraintWeight BestWeight = CW_Invalid; 2618 2619 // Loop over the options, keeping track of the most general one. 2620 for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { 2621 ConstraintWeight weight = 2622 getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); 2623 if (weight > BestWeight) 2624 BestWeight = weight; 2625 } 2626 2627 return BestWeight; 2628 } 2629 2630 /// Examine constraint type and operand type and determine a weight value. 2631 /// This object must already have been set up with the operand type 2632 /// and the current alternative constraint selected. 2633 TargetLowering::ConstraintWeight 2634 TargetLowering::getSingleConstraintMatchWeight( 2635 AsmOperandInfo &info, const char *constraint) const { 2636 ConstraintWeight weight = CW_Invalid; 2637 Value *CallOperandVal = info.CallOperandVal; 2638 // If we don't have a value, we can't do a match, 2639 // but allow it at the lowest weight. 2640 if (!CallOperandVal) 2641 return CW_Default; 2642 // Look at the constraint type. 2643 switch (*constraint) { 2644 case 'i': // immediate integer. 2645 case 'n': // immediate integer with a known value. 2646 if (isa<ConstantInt>(CallOperandVal)) 2647 weight = CW_Constant; 2648 break; 2649 case 's': // non-explicit intregal immediate. 2650 if (isa<GlobalValue>(CallOperandVal)) 2651 weight = CW_Constant; 2652 break; 2653 case 'E': // immediate float if host format. 2654 case 'F': // immediate float. 2655 if (isa<ConstantFP>(CallOperandVal)) 2656 weight = CW_Constant; 2657 break; 2658 case '<': // memory operand with autodecrement. 2659 case '>': // memory operand with autoincrement. 2660 case 'm': // memory operand. 2661 case 'o': // offsettable memory operand 2662 case 'V': // non-offsettable memory operand 2663 weight = CW_Memory; 2664 break; 2665 case 'r': // general register. 2666 case 'g': // general register, memory operand or immediate integer. 2667 // note: Clang converts "g" to "imr". 2668 if (CallOperandVal->getType()->isIntegerTy()) 2669 weight = CW_Register; 2670 break; 2671 case 'X': // any operand. 2672 default: 2673 weight = CW_Default; 2674 break; 2675 } 2676 return weight; 2677 } 2678 2679 /// If there are multiple different constraints that we could pick for this 2680 /// operand (e.g. "imr") try to pick the 'best' one. 2681 /// This is somewhat tricky: constraints fall into four classes: 2682 /// Other -> immediates and magic values 2683 /// Register -> one specific register 2684 /// RegisterClass -> a group of regs 2685 /// Memory -> memory 2686 /// Ideally, we would pick the most specific constraint possible: if we have 2687 /// something that fits into a register, we would pick it. The problem here 2688 /// is that if we have something that could either be in a register or in 2689 /// memory that use of the register could cause selection of *other* 2690 /// operands to fail: they might only succeed if we pick memory. Because of 2691 /// this the heuristic we use is: 2692 /// 2693 /// 1) If there is an 'other' constraint, and if the operand is valid for 2694 /// that constraint, use it. This makes us take advantage of 'i' 2695 /// constraints when available. 2696 /// 2) Otherwise, pick the most general constraint present. This prefers 2697 /// 'm' over 'r', for example. 2698 /// 2699 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, 2700 const TargetLowering &TLI, 2701 SDValue Op, SelectionDAG *DAG) { 2702 assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); 2703 unsigned BestIdx = 0; 2704 TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; 2705 int BestGenerality = -1; 2706 2707 // Loop over the options, keeping track of the most general one. 2708 for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { 2709 TargetLowering::ConstraintType CType = 2710 TLI.getConstraintType(OpInfo.Codes[i]); 2711 2712 // If this is an 'other' constraint, see if the operand is valid for it. 2713 // For example, on X86 we might have an 'rI' constraint. If the operand 2714 // is an integer in the range [0..31] we want to use I (saving a load 2715 // of a register), otherwise we must use 'r'. 2716 if (CType == TargetLowering::C_Other && Op.getNode()) { 2717 assert(OpInfo.Codes[i].size() == 1 && 2718 "Unhandled multi-letter 'other' constraint"); 2719 std::vector<SDValue> ResultOps; 2720 TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], 2721 ResultOps, *DAG); 2722 if (!ResultOps.empty()) { 2723 BestType = CType; 2724 BestIdx = i; 2725 break; 2726 } 2727 } 2728 2729 // Things with matching constraints can only be registers, per gcc 2730 // documentation. This mainly affects "g" constraints. 2731 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) 2732 continue; 2733 2734 // This constraint letter is more general than the previous one, use it. 2735 int Generality = getConstraintGenerality(CType); 2736 if (Generality > BestGenerality) { 2737 BestType = CType; 2738 BestIdx = i; 2739 BestGenerality = Generality; 2740 } 2741 } 2742 2743 OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; 2744 OpInfo.ConstraintType = BestType; 2745 } 2746 2747 /// Determines the constraint code and constraint type to use for the specific 2748 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. 2749 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, 2750 SDValue Op, 2751 SelectionDAG *DAG) const { 2752 assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); 2753 2754 // Single-letter constraints ('r') are very common. 2755 if (OpInfo.Codes.size() == 1) { 2756 OpInfo.ConstraintCode = OpInfo.Codes[0]; 2757 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); 2758 } else { 2759 ChooseConstraint(OpInfo, *this, Op, DAG); 2760 } 2761 2762 // 'X' matches anything. 2763 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { 2764 // Labels and constants are handled elsewhere ('X' is the only thing 2765 // that matches labels). For Functions, the type here is the type of 2766 // the result, which is not what we want to look at; leave them alone. 2767 Value *v = OpInfo.CallOperandVal; 2768 if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { 2769 OpInfo.CallOperandVal = v; 2770 return; 2771 } 2772 2773 // Otherwise, try to resolve it to something we know about by looking at 2774 // the actual operand type. 2775 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { 2776 OpInfo.ConstraintCode = Repl; 2777 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); 2778 } 2779 } 2780 } 2781 2782 /// \brief Given an exact SDIV by a constant, create a multiplication 2783 /// with the multiplicative inverse of the constant. 2784 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, 2785 SDLoc dl, SelectionDAG &DAG, 2786 std::vector<SDNode *> &Created) { 2787 assert(d != 0 && "Division by zero!"); 2788 2789 // Shift the value upfront if it is even, so the LSB is one. 2790 unsigned ShAmt = d.countTrailingZeros(); 2791 if (ShAmt) { 2792 // TODO: For UDIV use SRL instead of SRA. 2793 SDValue Amt = 2794 DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(), 2795 DAG.getDataLayout())); 2796 SDNodeFlags Flags; 2797 Flags.setExact(true); 2798 Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); 2799 Created.push_back(Op1.getNode()); 2800 d = d.ashr(ShAmt); 2801 } 2802 2803 // Calculate the multiplicative inverse, using Newton's method. 2804 APInt t, xn = d; 2805 while ((t = d*xn) != 1) 2806 xn *= APInt(d.getBitWidth(), 2) - t; 2807 2808 SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType()); 2809 SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); 2810 Created.push_back(Mul.getNode()); 2811 return Mul; 2812 } 2813 2814 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 2815 SelectionDAG &DAG, 2816 std::vector<SDNode *> *Created) const { 2817 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2818 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2819 if (TLI.isIntDivCheap(N->getValueType(0), Attr)) 2820 return SDValue(N,0); // Lower SDIV as SDIV 2821 return SDValue(); 2822 } 2823 2824 /// \brief Given an ISD::SDIV node expressing a divide by constant, 2825 /// return a DAG expression to select that will generate the same value by 2826 /// multiplying by a magic number. 2827 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 2828 SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, 2829 SelectionDAG &DAG, bool IsAfterLegalization, 2830 std::vector<SDNode *> *Created) const { 2831 assert(Created && "No vector to hold sdiv ops."); 2832 2833 EVT VT = N->getValueType(0); 2834 SDLoc dl(N); 2835 2836 // Check to see if we can do this. 2837 // FIXME: We should be more aggressive here. 2838 if (!isTypeLegal(VT)) 2839 return SDValue(); 2840 2841 // If the sdiv has an 'exact' bit we can use a simpler lowering. 2842 if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact()) 2843 return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created); 2844 2845 APInt::ms magics = Divisor.magic(); 2846 2847 // Multiply the numerator (operand 0) by the magic value 2848 // FIXME: We should support doing a MUL in a wider type 2849 SDValue Q; 2850 if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : 2851 isOperationLegalOrCustom(ISD::MULHS, VT)) 2852 Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), 2853 DAG.getConstant(magics.m, dl, VT)); 2854 else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : 2855 isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) 2856 Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), 2857 N->getOperand(0), 2858 DAG.getConstant(magics.m, dl, VT)).getNode(), 1); 2859 else 2860 return SDValue(); // No mulhs or equvialent 2861 // If d > 0 and m < 0, add the numerator 2862 if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { 2863 Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); 2864 Created->push_back(Q.getNode()); 2865 } 2866 // If d < 0 and m > 0, subtract the numerator. 2867 if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { 2868 Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); 2869 Created->push_back(Q.getNode()); 2870 } 2871 auto &DL = DAG.getDataLayout(); 2872 // Shift right algebraic if shift value is nonzero 2873 if (magics.s > 0) { 2874 Q = DAG.getNode( 2875 ISD::SRA, dl, VT, Q, 2876 DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); 2877 Created->push_back(Q.getNode()); 2878 } 2879 // Extract the sign bit and add it to the quotient 2880 SDValue T = 2881 DAG.getNode(ISD::SRL, dl, VT, Q, 2882 DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, 2883 getShiftAmountTy(Q.getValueType(), DL))); 2884 Created->push_back(T.getNode()); 2885 return DAG.getNode(ISD::ADD, dl, VT, Q, T); 2886 } 2887 2888 /// \brief Given an ISD::UDIV node expressing a divide by constant, 2889 /// return a DAG expression to select that will generate the same value by 2890 /// multiplying by a magic number. 2891 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 2892 SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, 2893 SelectionDAG &DAG, bool IsAfterLegalization, 2894 std::vector<SDNode *> *Created) const { 2895 assert(Created && "No vector to hold udiv ops."); 2896 2897 EVT VT = N->getValueType(0); 2898 SDLoc dl(N); 2899 auto &DL = DAG.getDataLayout(); 2900 2901 // Check to see if we can do this. 2902 // FIXME: We should be more aggressive here. 2903 if (!isTypeLegal(VT)) 2904 return SDValue(); 2905 2906 // FIXME: We should use a narrower constant when the upper 2907 // bits are known to be zero. 2908 APInt::mu magics = Divisor.magicu(); 2909 2910 SDValue Q = N->getOperand(0); 2911 2912 // If the divisor is even, we can avoid using the expensive fixup by shifting 2913 // the divided value upfront. 2914 if (magics.a != 0 && !Divisor[0]) { 2915 unsigned Shift = Divisor.countTrailingZeros(); 2916 Q = DAG.getNode( 2917 ISD::SRL, dl, VT, Q, 2918 DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL))); 2919 Created->push_back(Q.getNode()); 2920 2921 // Get magic number for the shifted divisor. 2922 magics = Divisor.lshr(Shift).magicu(Shift); 2923 assert(magics.a == 0 && "Should use cheap fixup now"); 2924 } 2925 2926 // Multiply the numerator (operand 0) by the magic value 2927 // FIXME: We should support doing a MUL in a wider type 2928 if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : 2929 isOperationLegalOrCustom(ISD::MULHU, VT)) 2930 Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT)); 2931 else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : 2932 isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) 2933 Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, 2934 DAG.getConstant(magics.m, dl, VT)).getNode(), 1); 2935 else 2936 return SDValue(); // No mulhu or equvialent 2937 2938 Created->push_back(Q.getNode()); 2939 2940 if (magics.a == 0) { 2941 assert(magics.s < Divisor.getBitWidth() && 2942 "We shouldn't generate an undefined shift!"); 2943 return DAG.getNode( 2944 ISD::SRL, dl, VT, Q, 2945 DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); 2946 } else { 2947 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); 2948 Created->push_back(NPQ.getNode()); 2949 NPQ = DAG.getNode( 2950 ISD::SRL, dl, VT, NPQ, 2951 DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL))); 2952 Created->push_back(NPQ.getNode()); 2953 NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); 2954 Created->push_back(NPQ.getNode()); 2955 return DAG.getNode( 2956 ISD::SRL, dl, VT, NPQ, 2957 DAG.getConstant(magics.s - 1, dl, 2958 getShiftAmountTy(NPQ.getValueType(), DL))); 2959 } 2960 } 2961 2962 bool TargetLowering:: 2963 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { 2964 if (!isa<ConstantSDNode>(Op.getOperand(0))) { 2965 DAG.getContext()->emitError("argument to '__builtin_return_address' must " 2966 "be a constant integer"); 2967 return true; 2968 } 2969 2970 return false; 2971 } 2972 2973 //===----------------------------------------------------------------------===// 2974 // Legalization Utilities 2975 //===----------------------------------------------------------------------===// 2976 2977 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, 2978 SelectionDAG &DAG, SDValue LL, SDValue LH, 2979 SDValue RL, SDValue RH) const { 2980 EVT VT = N->getValueType(0); 2981 SDLoc dl(N); 2982 2983 bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT); 2984 bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT); 2985 bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); 2986 bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); 2987 if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { 2988 unsigned OuterBitSize = VT.getSizeInBits(); 2989 unsigned InnerBitSize = HiLoVT.getSizeInBits(); 2990 unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); 2991 unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); 2992 2993 // LL, LH, RL, and RH must be either all NULL or all set to a value. 2994 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || 2995 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); 2996 2997 if (!LL.getNode() && !RL.getNode() && 2998 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { 2999 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0)); 3000 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1)); 3001 } 3002 3003 if (!LL.getNode()) 3004 return false; 3005 3006 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); 3007 if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && 3008 DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { 3009 // The inputs are both zero-extended. 3010 if (HasUMUL_LOHI) { 3011 // We can emit a umul_lohi. 3012 Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, 3013 RL); 3014 Hi = SDValue(Lo.getNode(), 1); 3015 return true; 3016 } 3017 if (HasMULHU) { 3018 // We can emit a mulhu+mul. 3019 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); 3020 Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); 3021 return true; 3022 } 3023 } 3024 if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { 3025 // The input values are both sign-extended. 3026 if (HasSMUL_LOHI) { 3027 // We can emit a smul_lohi. 3028 Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, 3029 RL); 3030 Hi = SDValue(Lo.getNode(), 1); 3031 return true; 3032 } 3033 if (HasMULHS) { 3034 // We can emit a mulhs+mul. 3035 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); 3036 Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL); 3037 return true; 3038 } 3039 } 3040 3041 if (!LH.getNode() && !RH.getNode() && 3042 isOperationLegalOrCustom(ISD::SRL, VT) && 3043 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { 3044 auto &DL = DAG.getDataLayout(); 3045 unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); 3046 SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL)); 3047 LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); 3048 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); 3049 RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); 3050 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); 3051 } 3052 3053 if (!LH.getNode()) 3054 return false; 3055 3056 if (HasUMUL_LOHI) { 3057 // Lo,Hi = umul LHS, RHS. 3058 SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, 3059 DAG.getVTList(HiLoVT, HiLoVT), LL, RL); 3060 Lo = UMulLOHI; 3061 Hi = UMulLOHI.getValue(1); 3062 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); 3063 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); 3064 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); 3065 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); 3066 return true; 3067 } 3068 if (HasMULHU) { 3069 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); 3070 Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); 3071 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); 3072 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); 3073 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); 3074 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); 3075 return true; 3076 } 3077 } 3078 return false; 3079 } 3080 3081 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, 3082 SelectionDAG &DAG) const { 3083 EVT VT = Node->getOperand(0).getValueType(); 3084 EVT NVT = Node->getValueType(0); 3085 SDLoc dl(SDValue(Node, 0)); 3086 3087 // FIXME: Only f32 to i64 conversions are supported. 3088 if (VT != MVT::f32 || NVT != MVT::i64) 3089 return false; 3090 3091 // Expand f32 -> i64 conversion 3092 // This algorithm comes from compiler-rt's implementation of fixsfdi: 3093 // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c 3094 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), 3095 VT.getSizeInBits()); 3096 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); 3097 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); 3098 SDValue Bias = DAG.getConstant(127, dl, IntVT); 3099 SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl, 3100 IntVT); 3101 SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); 3102 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); 3103 3104 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); 3105 3106 auto &DL = DAG.getDataLayout(); 3107 SDValue ExponentBits = DAG.getNode( 3108 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), 3109 DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL))); 3110 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); 3111 3112 SDValue Sign = DAG.getNode( 3113 ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), 3114 DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL))); 3115 Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); 3116 3117 SDValue R = DAG.getNode(ISD::OR, dl, IntVT, 3118 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), 3119 DAG.getConstant(0x00800000, dl, IntVT)); 3120 3121 R = DAG.getZExtOrTrunc(R, dl, NVT); 3122 3123 R = DAG.getSelectCC( 3124 dl, Exponent, ExponentLoBit, 3125 DAG.getNode(ISD::SHL, dl, NVT, R, 3126 DAG.getZExtOrTrunc( 3127 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), 3128 dl, getShiftAmountTy(IntVT, DL))), 3129 DAG.getNode(ISD::SRL, dl, NVT, R, 3130 DAG.getZExtOrTrunc( 3131 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), 3132 dl, getShiftAmountTy(IntVT, DL))), 3133 ISD::SETGT); 3134 3135 SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, 3136 DAG.getNode(ISD::XOR, dl, NVT, R, Sign), 3137 Sign); 3138 3139 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT), 3140 DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); 3141 return true; 3142 } 3143 3144 SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, 3145 SelectionDAG &DAG) const { 3146 SDLoc SL(LD); 3147 SDValue Chain = LD->getChain(); 3148 SDValue BasePTR = LD->getBasePtr(); 3149 EVT SrcVT = LD->getMemoryVT(); 3150 ISD::LoadExtType ExtType = LD->getExtensionType(); 3151 3152 unsigned NumElem = SrcVT.getVectorNumElements(); 3153 3154 EVT SrcEltVT = SrcVT.getScalarType(); 3155 EVT DstEltVT = LD->getValueType(0).getScalarType(); 3156 3157 unsigned Stride = SrcEltVT.getSizeInBits() / 8; 3158 assert(SrcEltVT.isByteSized()); 3159 3160 EVT PtrVT = BasePTR.getValueType(); 3161 3162 SmallVector<SDValue, 8> Vals; 3163 SmallVector<SDValue, 8> LoadChains; 3164 3165 for (unsigned Idx = 0; Idx < NumElem; ++Idx) { 3166 SDValue ScalarLoad = DAG.getExtLoad( 3167 ExtType, SL, DstEltVT, 3168 Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), 3169 SrcEltVT, 3170 LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), 3171 MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); 3172 3173 BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR, 3174 DAG.getConstant(Stride, SL, PtrVT)); 3175 3176 Vals.push_back(ScalarLoad.getValue(0)); 3177 LoadChains.push_back(ScalarLoad.getValue(1)); 3178 } 3179 3180 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); 3181 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals); 3182 3183 return DAG.getMergeValues({ Value, NewChain }, SL); 3184 } 3185 3186 // FIXME: This relies on each element having a byte size, otherwise the stride 3187 // is 0 and just overwrites the same location. ExpandStore currently expects 3188 // this broken behavior. 3189 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, 3190 SelectionDAG &DAG) const { 3191 SDLoc SL(ST); 3192 3193 SDValue Chain = ST->getChain(); 3194 SDValue BasePtr = ST->getBasePtr(); 3195 SDValue Value = ST->getValue(); 3196 EVT StVT = ST->getMemoryVT(); 3197 3198 unsigned Alignment = ST->getAlignment(); 3199 bool isVolatile = ST->isVolatile(); 3200 bool isNonTemporal = ST->isNonTemporal(); 3201 AAMDNodes AAInfo = ST->getAAInfo(); 3202 3203 // The type of the data we want to save 3204 EVT RegVT = Value.getValueType(); 3205 EVT RegSclVT = RegVT.getScalarType(); 3206 3207 // The type of data as saved in memory. 3208 EVT MemSclVT = StVT.getScalarType(); 3209 3210 EVT PtrVT = BasePtr.getValueType(); 3211 3212 // Store Stride in bytes 3213 unsigned Stride = MemSclVT.getSizeInBits() / 8; 3214 EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); 3215 unsigned NumElem = StVT.getVectorNumElements(); 3216 3217 // Extract each of the elements from the original vector and save them into 3218 // memory individually. 3219 SmallVector<SDValue, 8> Stores; 3220 for (unsigned Idx = 0; Idx < NumElem; ++Idx) { 3221 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, 3222 DAG.getConstant(Idx, SL, IdxVT)); 3223 3224 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, 3225 DAG.getConstant(Idx * Stride, SL, PtrVT)); 3226 3227 // This scalar TruncStore may be illegal, but we legalize it later. 3228 SDValue Store = DAG.getTruncStore( 3229 Chain, SL, Elt, Ptr, 3230 ST->getPointerInfo().getWithOffset(Idx * Stride), MemSclVT, 3231 isVolatile, isNonTemporal, MinAlign(Alignment, Idx * Stride), 3232 AAInfo); 3233 3234 Stores.push_back(Store); 3235 } 3236 3237 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores); 3238 } 3239 3240 std::pair<SDValue, SDValue> 3241 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { 3242 assert(LD->getAddressingMode() == ISD::UNINDEXED && 3243 "unaligned indexed loads not implemented!"); 3244 SDValue Chain = LD->getChain(); 3245 SDValue Ptr = LD->getBasePtr(); 3246 EVT VT = LD->getValueType(0); 3247 EVT LoadedVT = LD->getMemoryVT(); 3248 SDLoc dl(LD); 3249 if (VT.isFloatingPoint() || VT.isVector()) { 3250 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); 3251 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { 3252 if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { 3253 // Scalarize the load and let the individual components be handled. 3254 SDValue Scalarized = scalarizeVectorLoad(LD, DAG); 3255 return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); 3256 } 3257 3258 // Expand to a (misaligned) integer load of the same size, 3259 // then bitconvert to floating point or vector. 3260 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, 3261 LD->getMemOperand()); 3262 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); 3263 if (LoadedVT != VT) 3264 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : 3265 ISD::ANY_EXTEND, dl, VT, Result); 3266 3267 return std::make_pair(Result, newLoad.getValue(1)); 3268 } 3269 3270 // Copy the value to a (aligned) stack slot using (unaligned) integer 3271 // loads and stores, then do a (aligned) load from the stack slot. 3272 MVT RegVT = getRegisterType(*DAG.getContext(), intVT); 3273 unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; 3274 unsigned RegBytes = RegVT.getSizeInBits() / 8; 3275 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; 3276 3277 // Make sure the stack slot is also aligned for the register type. 3278 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); 3279 3280 SmallVector<SDValue, 8> Stores; 3281 SDValue StackPtr = StackBase; 3282 unsigned Offset = 0; 3283 3284 EVT PtrVT = Ptr.getValueType(); 3285 EVT StackPtrVT = StackPtr.getValueType(); 3286 3287 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); 3288 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); 3289 3290 // Do all but one copies using the full register width. 3291 for (unsigned i = 1; i < NumRegs; i++) { 3292 // Load one integer register's worth from the original location. 3293 SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, 3294 LD->getPointerInfo().getWithOffset(Offset), 3295 LD->isVolatile(), LD->isNonTemporal(), 3296 LD->isInvariant(), 3297 MinAlign(LD->getAlignment(), Offset), 3298 LD->getAAInfo()); 3299 // Follow the load with a store to the stack slot. Remember the store. 3300 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, 3301 MachinePointerInfo(), false, false, 0)); 3302 // Increment the pointers. 3303 Offset += RegBytes; 3304 Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); 3305 StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr, 3306 StackPtrIncrement); 3307 } 3308 3309 // The last copy may be partial. Do an extending load. 3310 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 3311 8 * (LoadedBytes - Offset)); 3312 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, 3313 LD->getPointerInfo().getWithOffset(Offset), 3314 MemVT, LD->isVolatile(), 3315 LD->isNonTemporal(), 3316 LD->isInvariant(), 3317 MinAlign(LD->getAlignment(), Offset), 3318 LD->getAAInfo()); 3319 // Follow the load with a store to the stack slot. Remember the store. 3320 // On big-endian machines this requires a truncating store to ensure 3321 // that the bits end up in the right place. 3322 Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, 3323 MachinePointerInfo(), MemVT, 3324 false, false, 0)); 3325 3326 // The order of the stores doesn't matter - say it with a TokenFactor. 3327 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 3328 3329 // Finally, perform the original load only redirected to the stack slot. 3330 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, 3331 MachinePointerInfo(), LoadedVT, false,false, false, 3332 0); 3333 3334 // Callers expect a MERGE_VALUES node. 3335 return std::make_pair(Load, TF); 3336 } 3337 3338 assert(LoadedVT.isInteger() && !LoadedVT.isVector() && 3339 "Unaligned load of unsupported type."); 3340 3341 // Compute the new VT that is half the size of the old one. This is an 3342 // integer MVT. 3343 unsigned NumBits = LoadedVT.getSizeInBits(); 3344 EVT NewLoadedVT; 3345 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); 3346 NumBits >>= 1; 3347 3348 unsigned Alignment = LD->getAlignment(); 3349 unsigned IncrementSize = NumBits / 8; 3350 ISD::LoadExtType HiExtType = LD->getExtensionType(); 3351 3352 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. 3353 if (HiExtType == ISD::NON_EXTLOAD) 3354 HiExtType = ISD::ZEXTLOAD; 3355 3356 // Load the value in two parts 3357 SDValue Lo, Hi; 3358 if (DAG.getDataLayout().isLittleEndian()) { 3359 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), 3360 NewLoadedVT, LD->isVolatile(), 3361 LD->isNonTemporal(), LD->isInvariant(), Alignment, 3362 LD->getAAInfo()); 3363 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, 3364 DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); 3365 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, 3366 LD->getPointerInfo().getWithOffset(IncrementSize), 3367 NewLoadedVT, LD->isVolatile(), 3368 LD->isNonTemporal(),LD->isInvariant(), 3369 MinAlign(Alignment, IncrementSize), LD->getAAInfo()); 3370 } else { 3371 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), 3372 NewLoadedVT, LD->isVolatile(), 3373 LD->isNonTemporal(), LD->isInvariant(), Alignment, 3374 LD->getAAInfo()); 3375 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, 3376 DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); 3377 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, 3378 LD->getPointerInfo().getWithOffset(IncrementSize), 3379 NewLoadedVT, LD->isVolatile(), 3380 LD->isNonTemporal(), LD->isInvariant(), 3381 MinAlign(Alignment, IncrementSize), LD->getAAInfo()); 3382 } 3383 3384 // aggregate the two parts 3385 SDValue ShiftAmount = 3386 DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(), 3387 DAG.getDataLayout())); 3388 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); 3389 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); 3390 3391 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), 3392 Hi.getValue(1)); 3393 3394 return std::make_pair(Result, TF); 3395 } 3396 3397 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, 3398 SelectionDAG &DAG) const { 3399 assert(ST->getAddressingMode() == ISD::UNINDEXED && 3400 "unaligned indexed stores not implemented!"); 3401 SDValue Chain = ST->getChain(); 3402 SDValue Ptr = ST->getBasePtr(); 3403 SDValue Val = ST->getValue(); 3404 EVT VT = Val.getValueType(); 3405 int Alignment = ST->getAlignment(); 3406 3407 SDLoc dl(ST); 3408 if (ST->getMemoryVT().isFloatingPoint() || 3409 ST->getMemoryVT().isVector()) { 3410 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 3411 if (isTypeLegal(intVT)) { 3412 if (!isOperationLegalOrCustom(ISD::STORE, intVT)) { 3413 // Scalarize the store and let the individual components be handled. 3414 SDValue Result = scalarizeVectorStore(ST, DAG); 3415 3416 return Result; 3417 } 3418 // Expand to a bitconvert of the value to the integer type of the 3419 // same size, then a (misaligned) int store. 3420 // FIXME: Does not handle truncating floating point stores! 3421 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); 3422 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), 3423 ST->isVolatile(), ST->isNonTemporal(), Alignment); 3424 return Result; 3425 } 3426 // Do a (aligned) store to a stack slot, then copy from the stack slot 3427 // to the final destination using (unaligned) integer loads and stores. 3428 EVT StoredVT = ST->getMemoryVT(); 3429 MVT RegVT = 3430 getRegisterType(*DAG.getContext(), 3431 EVT::getIntegerVT(*DAG.getContext(), 3432 StoredVT.getSizeInBits())); 3433 EVT PtrVT = Ptr.getValueType(); 3434 unsigned StoredBytes = StoredVT.getSizeInBits() / 8; 3435 unsigned RegBytes = RegVT.getSizeInBits() / 8; 3436 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; 3437 3438 // Make sure the stack slot is also aligned for the register type. 3439 SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); 3440 3441 // Perform the original store, only redirected to the stack slot. 3442 SDValue Store = DAG.getTruncStore(Chain, dl, 3443 Val, StackPtr, MachinePointerInfo(), 3444 StoredVT, false, false, 0); 3445 3446 EVT StackPtrVT = StackPtr.getValueType(); 3447 3448 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); 3449 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); 3450 SmallVector<SDValue, 8> Stores; 3451 unsigned Offset = 0; 3452 3453 // Do all but one copies using the full register width. 3454 for (unsigned i = 1; i < NumRegs; i++) { 3455 // Load one integer register's worth from the stack slot. 3456 SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, 3457 MachinePointerInfo(), 3458 false, false, false, 0); 3459 // Store it to the final location. Remember the store. 3460 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, 3461 ST->getPointerInfo().getWithOffset(Offset), 3462 ST->isVolatile(), ST->isNonTemporal(), 3463 MinAlign(ST->getAlignment(), Offset))); 3464 // Increment the pointers. 3465 Offset += RegBytes; 3466 StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, 3467 StackPtr, StackPtrIncrement); 3468 Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); 3469 } 3470 3471 // The last store may be partial. Do a truncating store. On big-endian 3472 // machines this requires an extending load from the stack slot to ensure 3473 // that the bits are in the right place. 3474 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 3475 8 * (StoredBytes - Offset)); 3476 3477 // Load from the stack slot. 3478 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, 3479 MachinePointerInfo(), 3480 MemVT, false, false, false, 0); 3481 3482 Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, 3483 ST->getPointerInfo() 3484 .getWithOffset(Offset), 3485 MemVT, ST->isVolatile(), 3486 ST->isNonTemporal(), 3487 MinAlign(ST->getAlignment(), Offset), 3488 ST->getAAInfo())); 3489 // The order of the stores doesn't matter - say it with a TokenFactor. 3490 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); 3491 return Result; 3492 } 3493 3494 assert(ST->getMemoryVT().isInteger() && 3495 !ST->getMemoryVT().isVector() && 3496 "Unaligned store of unknown type."); 3497 // Get the half-size VT 3498 EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); 3499 int NumBits = NewStoredVT.getSizeInBits(); 3500 int IncrementSize = NumBits / 8; 3501 3502 // Divide the stored value in two parts. 3503 SDValue ShiftAmount = 3504 DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(), 3505 DAG.getDataLayout())); 3506 SDValue Lo = Val; 3507 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); 3508 3509 // Store the two parts 3510 SDValue Store1, Store2; 3511 Store1 = DAG.getTruncStore(Chain, dl, 3512 DAG.getDataLayout().isLittleEndian() ? Lo : Hi, 3513 Ptr, ST->getPointerInfo(), NewStoredVT, 3514 ST->isVolatile(), ST->isNonTemporal(), Alignment); 3515 3516 EVT PtrVT = Ptr.getValueType(); 3517 Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, 3518 DAG.getConstant(IncrementSize, dl, PtrVT)); 3519 Alignment = MinAlign(Alignment, IncrementSize); 3520 Store2 = DAG.getTruncStore( 3521 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, 3522 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, 3523 ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo()); 3524 3525 SDValue Result = 3526 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); 3527 return Result; 3528 } 3529 3530 //===----------------------------------------------------------------------===// 3531 // Implementation of Emulated TLS Model 3532 //===----------------------------------------------------------------------===// 3533 3534 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, 3535 SelectionDAG &DAG) const { 3536 // Access to address of TLS varialbe xyz is lowered to a function call: 3537 // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) 3538 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3539 PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); 3540 SDLoc dl(GA); 3541 3542 ArgListTy Args; 3543 ArgListEntry Entry; 3544 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); 3545 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent()); 3546 StringRef EmuTlsVarName(NameString); 3547 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); 3548 assert(EmuTlsVar && "Cannot find EmuTlsVar "); 3549 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); 3550 Entry.Ty = VoidPtrType; 3551 Args.push_back(Entry); 3552 3553 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); 3554 3555 TargetLowering::CallLoweringInfo CLI(DAG); 3556 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); 3557 CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); 3558 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 3559 3560 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. 3561 // At last for X86 targets, maybe good for other targets too? 3562 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3563 MFI->setAdjustsStack(true); // Is this only for X86 target? 3564 MFI->setHasCalls(true); 3565 3566 assert((GA->getOffset() == 0) && 3567 "Emulated TLS must have zero offset in GlobalAddressSDNode"); 3568 return CallResult.first; 3569 } 3570