1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SystemZTargetLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SystemZISelLowering.h" 15 #include "SystemZCallingConv.h" 16 #include "SystemZConstantPoolValue.h" 17 #include "SystemZMachineFunctionInfo.h" 18 #include "SystemZTargetMachine.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include <cctype> 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "systemz-lower" 29 30 namespace { 31 // Represents a sequence for extracting a 0/1 value from an IPM result: 32 // (((X ^ XORValue) + AddValue) >> Bit) 33 struct IPMConversion { 34 IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) 35 : XORValue(xorValue), AddValue(addValue), Bit(bit) {} 36 37 int64_t XORValue; 38 int64_t AddValue; 39 unsigned Bit; 40 }; 41 42 // Represents information about a comparison. 43 struct Comparison { 44 Comparison(SDValue Op0In, SDValue Op1In) 45 : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} 46 47 // The operands to the comparison. 48 SDValue Op0, Op1; 49 50 // The opcode that should be used to compare Op0 and Op1. 51 unsigned Opcode; 52 53 // A SystemZICMP value. Only used for integer comparisons. 54 unsigned ICmpType; 55 56 // The mask of CC values that Opcode can produce. 57 unsigned CCValid; 58 59 // The mask of CC values for which the original condition is true. 60 unsigned CCMask; 61 }; 62 } // end anonymous namespace 63 64 // Classify VT as either 32 or 64 bit. 65 static bool is32Bit(EVT VT) { 66 switch (VT.getSimpleVT().SimpleTy) { 67 case MVT::i32: 68 return true; 69 case MVT::i64: 70 return false; 71 default: 72 llvm_unreachable("Unsupported type"); 73 } 74 } 75 76 // Return a version of MachineOperand that can be safely used before the 77 // final use. 78 static MachineOperand earlyUseOperand(MachineOperand Op) { 79 if (Op.isReg()) 80 Op.setIsKill(false); 81 return Op; 82 } 83 84 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, 85 const SystemZSubtarget &STI) 86 : TargetLowering(tm), Subtarget(STI) { 87 MVT PtrVT = getPointerTy(); 88 89 // Set up the register classes. 90 if (Subtarget.hasHighWord()) 91 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); 92 else 93 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 94 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 95 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 96 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 97 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 98 99 // Compute derived properties from the register classes 100 computeRegisterProperties(Subtarget.getRegisterInfo()); 101 102 // Set up special registers. 103 setExceptionPointerRegister(SystemZ::R6D); 104 setExceptionSelectorRegister(SystemZ::R7D); 105 setStackPointerRegisterToSaveRestore(SystemZ::R15D); 106 107 // TODO: It may be better to default to latency-oriented scheduling, however 108 // LLVM's current latency-oriented scheduler can't handle physreg definitions 109 // such as SystemZ has with CC, so set this to the register-pressure 110 // scheduler, because it can. 111 setSchedulingPreference(Sched::RegPressure); 112 113 setBooleanContents(ZeroOrOneBooleanContent); 114 setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? 115 116 // Instructions are strings of 2-byte aligned 2-byte values. 117 setMinFunctionAlignment(2); 118 119 // Handle operations that are handled in a similar way for all types. 120 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 121 I <= MVT::LAST_FP_VALUETYPE; 122 ++I) { 123 MVT VT = MVT::SimpleValueType(I); 124 if (isTypeLegal(VT)) { 125 // Lower SET_CC into an IPM-based sequence. 126 setOperationAction(ISD::SETCC, VT, Custom); 127 128 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 129 setOperationAction(ISD::SELECT, VT, Expand); 130 131 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 132 setOperationAction(ISD::SELECT_CC, VT, Custom); 133 setOperationAction(ISD::BR_CC, VT, Custom); 134 } 135 } 136 137 // Expand jump table branches as address arithmetic followed by an 138 // indirect jump. 139 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 140 141 // Expand BRCOND into a BR_CC (see above). 142 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 143 144 // Handle integer types. 145 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 146 I <= MVT::LAST_INTEGER_VALUETYPE; 147 ++I) { 148 MVT VT = MVT::SimpleValueType(I); 149 if (isTypeLegal(VT)) { 150 // Expand individual DIV and REMs into DIVREMs. 151 setOperationAction(ISD::SDIV, VT, Expand); 152 setOperationAction(ISD::UDIV, VT, Expand); 153 setOperationAction(ISD::SREM, VT, Expand); 154 setOperationAction(ISD::UREM, VT, Expand); 155 setOperationAction(ISD::SDIVREM, VT, Custom); 156 setOperationAction(ISD::UDIVREM, VT, Custom); 157 158 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and 159 // stores, putting a serialization instruction after the stores. 160 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); 161 setOperationAction(ISD::ATOMIC_STORE, VT, Custom); 162 163 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are 164 // available, or if the operand is constant. 165 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 166 167 // Use POPCNT on z196 and above. 168 if (Subtarget.hasPopulationCount()) 169 setOperationAction(ISD::CTPOP, VT, Custom); 170 else 171 setOperationAction(ISD::CTPOP, VT, Expand); 172 173 // No special instructions for these. 174 setOperationAction(ISD::CTTZ, VT, Expand); 175 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 176 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 177 setOperationAction(ISD::ROTR, VT, Expand); 178 179 // Use *MUL_LOHI where possible instead of MULH*. 180 setOperationAction(ISD::MULHS, VT, Expand); 181 setOperationAction(ISD::MULHU, VT, Expand); 182 setOperationAction(ISD::SMUL_LOHI, VT, Custom); 183 setOperationAction(ISD::UMUL_LOHI, VT, Custom); 184 185 // Only z196 and above have native support for conversions to unsigned. 186 if (!Subtarget.hasFPExtension()) 187 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 188 } 189 } 190 191 // Type legalization will convert 8- and 16-bit atomic operations into 192 // forms that operate on i32s (but still keeping the original memory VT). 193 // Lower them into full i32 operations. 194 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 195 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 196 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 197 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 198 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 199 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 200 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 201 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 202 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 203 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 204 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 205 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 206 207 // z10 has instructions for signed but not unsigned FP conversion. 208 // Handle unsigned 32-bit types as signed 64-bit types. 209 if (!Subtarget.hasFPExtension()) { 210 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 211 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 212 } 213 214 // We have native support for a 64-bit CTLZ, via FLOGR. 215 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 216 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 217 218 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 219 setOperationAction(ISD::OR, MVT::i64, Custom); 220 221 // FIXME: Can we support these natively? 222 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 223 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 224 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 225 226 // We have native instructions for i8, i16 and i32 extensions, but not i1. 227 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 228 for (MVT VT : MVT::integer_valuetypes()) { 229 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 230 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 231 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 232 } 233 234 // Handle the various types of symbolic address. 235 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 236 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 237 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 238 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 239 setOperationAction(ISD::JumpTable, PtrVT, Custom); 240 241 // We need to handle dynamic allocations specially because of the 242 // 160-byte area at the bottom of the stack. 243 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 244 245 // Use custom expanders so that we can force the function to use 246 // a frame pointer. 247 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 248 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 249 250 // Handle prefetches with PFD or PFDRL. 251 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 252 253 // Handle floating-point types. 254 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 255 I <= MVT::LAST_FP_VALUETYPE; 256 ++I) { 257 MVT VT = MVT::SimpleValueType(I); 258 if (isTypeLegal(VT)) { 259 // We can use FI for FRINT. 260 setOperationAction(ISD::FRINT, VT, Legal); 261 262 // We can use the extended form of FI for other rounding operations. 263 if (Subtarget.hasFPExtension()) { 264 setOperationAction(ISD::FNEARBYINT, VT, Legal); 265 setOperationAction(ISD::FFLOOR, VT, Legal); 266 setOperationAction(ISD::FCEIL, VT, Legal); 267 setOperationAction(ISD::FTRUNC, VT, Legal); 268 setOperationAction(ISD::FROUND, VT, Legal); 269 } 270 271 // No special instructions for these. 272 setOperationAction(ISD::FSIN, VT, Expand); 273 setOperationAction(ISD::FCOS, VT, Expand); 274 setOperationAction(ISD::FREM, VT, Expand); 275 } 276 } 277 278 // We have fused multiply-addition for f32 and f64 but not f128. 279 setOperationAction(ISD::FMA, MVT::f32, Legal); 280 setOperationAction(ISD::FMA, MVT::f64, Legal); 281 setOperationAction(ISD::FMA, MVT::f128, Expand); 282 283 // Needed so that we don't try to implement f128 constant loads using 284 // a load-and-extend of a f80 constant (in cases where the constant 285 // would fit in an f80). 286 for (MVT VT : MVT::fp_valuetypes()) 287 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); 288 289 // Floating-point truncation and stores need to be done separately. 290 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 291 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 292 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 293 294 // We have 64-bit FPR<->GPR moves, but need special handling for 295 // 32-bit forms. 296 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 297 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 298 299 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 300 // structure, but VAEND is a no-op. 301 setOperationAction(ISD::VASTART, MVT::Other, Custom); 302 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 303 setOperationAction(ISD::VAEND, MVT::Other, Expand); 304 305 // Codes for which we want to perform some z-specific combinations. 306 setTargetDAGCombine(ISD::SIGN_EXTEND); 307 308 // Handle intrinsics. 309 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 310 311 // We want to use MVC in preference to even a single load/store pair. 312 MaxStoresPerMemcpy = 0; 313 MaxStoresPerMemcpyOptSize = 0; 314 315 // The main memset sequence is a byte store followed by an MVC. 316 // Two STC or MV..I stores win over that, but the kind of fused stores 317 // generated by target-independent code don't when the byte value is 318 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 319 // than "STC;MVC". Handle the choice in target-specific code instead. 320 MaxStoresPerMemset = 0; 321 MaxStoresPerMemsetOptSize = 0; 322 } 323 324 EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 325 if (!VT.isVector()) 326 return MVT::i32; 327 return VT.changeVectorElementTypeToInteger(); 328 } 329 330 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 331 VT = VT.getScalarType(); 332 333 if (!VT.isSimple()) 334 return false; 335 336 switch (VT.getSimpleVT().SimpleTy) { 337 case MVT::f32: 338 case MVT::f64: 339 return true; 340 case MVT::f128: 341 return false; 342 default: 343 break; 344 } 345 346 return false; 347 } 348 349 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 350 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 351 return Imm.isZero() || Imm.isNegZero(); 352 } 353 354 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 355 // We can use CGFI or CLGFI. 356 return isInt<32>(Imm) || isUInt<32>(Imm); 357 } 358 359 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { 360 // We can use ALGFI or SLGFI. 361 return isUInt<32>(Imm) || isUInt<32>(-Imm); 362 } 363 364 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 365 unsigned, 366 unsigned, 367 bool *Fast) const { 368 // Unaligned accesses should never be slower than the expanded version. 369 // We check specifically for aligned accesses in the few cases where 370 // they are required. 371 if (Fast) 372 *Fast = true; 373 return true; 374 } 375 376 bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, 377 Type *Ty) const { 378 // Punt on globals for now, although they can be used in limited 379 // RELATIVE LONG cases. 380 if (AM.BaseGV) 381 return false; 382 383 // Require a 20-bit signed offset. 384 if (!isInt<20>(AM.BaseOffs)) 385 return false; 386 387 // Indexing is OK but no scale factor can be applied. 388 return AM.Scale == 0 || AM.Scale == 1; 389 } 390 391 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { 392 if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) 393 return false; 394 unsigned FromBits = FromType->getPrimitiveSizeInBits(); 395 unsigned ToBits = ToType->getPrimitiveSizeInBits(); 396 return FromBits > ToBits; 397 } 398 399 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { 400 if (!FromVT.isInteger() || !ToVT.isInteger()) 401 return false; 402 unsigned FromBits = FromVT.getSizeInBits(); 403 unsigned ToBits = ToVT.getSizeInBits(); 404 return FromBits > ToBits; 405 } 406 407 //===----------------------------------------------------------------------===// 408 // Inline asm support 409 //===----------------------------------------------------------------------===// 410 411 TargetLowering::ConstraintType 412 SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { 413 if (Constraint.size() == 1) { 414 switch (Constraint[0]) { 415 case 'a': // Address register 416 case 'd': // Data register (equivalent to 'r') 417 case 'f': // Floating-point register 418 case 'h': // High-part register 419 case 'r': // General-purpose register 420 return C_RegisterClass; 421 422 case 'Q': // Memory with base and unsigned 12-bit displacement 423 case 'R': // Likewise, plus an index 424 case 'S': // Memory with base and signed 20-bit displacement 425 case 'T': // Likewise, plus an index 426 case 'm': // Equivalent to 'T'. 427 return C_Memory; 428 429 case 'I': // Unsigned 8-bit constant 430 case 'J': // Unsigned 12-bit constant 431 case 'K': // Signed 16-bit constant 432 case 'L': // Signed 20-bit displacement (on all targets we support) 433 case 'M': // 0x7fffffff 434 return C_Other; 435 436 default: 437 break; 438 } 439 } 440 return TargetLowering::getConstraintType(Constraint); 441 } 442 443 TargetLowering::ConstraintWeight SystemZTargetLowering:: 444 getSingleConstraintMatchWeight(AsmOperandInfo &info, 445 const char *constraint) const { 446 ConstraintWeight weight = CW_Invalid; 447 Value *CallOperandVal = info.CallOperandVal; 448 // If we don't have a value, we can't do a match, 449 // but allow it at the lowest weight. 450 if (!CallOperandVal) 451 return CW_Default; 452 Type *type = CallOperandVal->getType(); 453 // Look at the constraint type. 454 switch (*constraint) { 455 default: 456 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 457 break; 458 459 case 'a': // Address register 460 case 'd': // Data register (equivalent to 'r') 461 case 'h': // High-part register 462 case 'r': // General-purpose register 463 if (CallOperandVal->getType()->isIntegerTy()) 464 weight = CW_Register; 465 break; 466 467 case 'f': // Floating-point register 468 if (type->isFloatingPointTy()) 469 weight = CW_Register; 470 break; 471 472 case 'I': // Unsigned 8-bit constant 473 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 474 if (isUInt<8>(C->getZExtValue())) 475 weight = CW_Constant; 476 break; 477 478 case 'J': // Unsigned 12-bit constant 479 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 480 if (isUInt<12>(C->getZExtValue())) 481 weight = CW_Constant; 482 break; 483 484 case 'K': // Signed 16-bit constant 485 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 486 if (isInt<16>(C->getSExtValue())) 487 weight = CW_Constant; 488 break; 489 490 case 'L': // Signed 20-bit displacement (on all targets we support) 491 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 492 if (isInt<20>(C->getSExtValue())) 493 weight = CW_Constant; 494 break; 495 496 case 'M': // 0x7fffffff 497 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 498 if (C->getZExtValue() == 0x7fffffff) 499 weight = CW_Constant; 500 break; 501 } 502 return weight; 503 } 504 505 // Parse a "{tNNN}" register constraint for which the register type "t" 506 // has already been verified. MC is the class associated with "t" and 507 // Map maps 0-based register numbers to LLVM register numbers. 508 static std::pair<unsigned, const TargetRegisterClass *> 509 parseRegisterNumber(const std::string &Constraint, 510 const TargetRegisterClass *RC, const unsigned *Map) { 511 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 512 if (isdigit(Constraint[2])) { 513 std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); 514 unsigned Index = atoi(Suffix.c_str()); 515 if (Index < 16 && Map[Index]) 516 return std::make_pair(Map[Index], RC); 517 } 518 return std::make_pair(0U, nullptr); 519 } 520 521 std::pair<unsigned, const TargetRegisterClass *> 522 SystemZTargetLowering::getRegForInlineAsmConstraint( 523 const TargetRegisterInfo *TRI, const std::string &Constraint, 524 MVT VT) const { 525 if (Constraint.size() == 1) { 526 // GCC Constraint Letters 527 switch (Constraint[0]) { 528 default: break; 529 case 'd': // Data register (equivalent to 'r') 530 case 'r': // General-purpose register 531 if (VT == MVT::i64) 532 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 533 else if (VT == MVT::i128) 534 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 535 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 536 537 case 'a': // Address register 538 if (VT == MVT::i64) 539 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 540 else if (VT == MVT::i128) 541 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 542 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 543 544 case 'h': // High-part register (an LLVM extension) 545 return std::make_pair(0U, &SystemZ::GRH32BitRegClass); 546 547 case 'f': // Floating-point register 548 if (VT == MVT::f64) 549 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 550 else if (VT == MVT::f128) 551 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 552 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 553 } 554 } 555 if (Constraint[0] == '{') { 556 // We need to override the default register parsing for GPRs and FPRs 557 // because the interpretation depends on VT. The internal names of 558 // the registers are also different from the external names 559 // (F0D and F0S instead of F0, etc.). 560 if (Constraint[1] == 'r') { 561 if (VT == MVT::i32) 562 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 563 SystemZMC::GR32Regs); 564 if (VT == MVT::i128) 565 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 566 SystemZMC::GR128Regs); 567 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 568 SystemZMC::GR64Regs); 569 } 570 if (Constraint[1] == 'f') { 571 if (VT == MVT::f32) 572 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 573 SystemZMC::FP32Regs); 574 if (VT == MVT::f128) 575 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 576 SystemZMC::FP128Regs); 577 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 578 SystemZMC::FP64Regs); 579 } 580 } 581 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 582 } 583 584 void SystemZTargetLowering:: 585 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 586 std::vector<SDValue> &Ops, 587 SelectionDAG &DAG) const { 588 // Only support length 1 constraints for now. 589 if (Constraint.length() == 1) { 590 switch (Constraint[0]) { 591 case 'I': // Unsigned 8-bit constant 592 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 593 if (isUInt<8>(C->getZExtValue())) 594 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 595 Op.getValueType())); 596 return; 597 598 case 'J': // Unsigned 12-bit constant 599 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 600 if (isUInt<12>(C->getZExtValue())) 601 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 602 Op.getValueType())); 603 return; 604 605 case 'K': // Signed 16-bit constant 606 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 607 if (isInt<16>(C->getSExtValue())) 608 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 609 Op.getValueType())); 610 return; 611 612 case 'L': // Signed 20-bit displacement (on all targets we support) 613 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 614 if (isInt<20>(C->getSExtValue())) 615 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 616 Op.getValueType())); 617 return; 618 619 case 'M': // 0x7fffffff 620 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 621 if (C->getZExtValue() == 0x7fffffff) 622 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 623 Op.getValueType())); 624 return; 625 } 626 } 627 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 628 } 629 630 //===----------------------------------------------------------------------===// 631 // Calling conventions 632 //===----------------------------------------------------------------------===// 633 634 #include "SystemZGenCallingConv.inc" 635 636 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, 637 Type *ToType) const { 638 return isTruncateFree(FromType, ToType); 639 } 640 641 bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 642 if (!CI->isTailCall()) 643 return false; 644 return true; 645 } 646 647 // Value is a value that has been passed to us in the location described by VA 648 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 649 // any loads onto Chain. 650 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, 651 CCValAssign &VA, SDValue Chain, 652 SDValue Value) { 653 // If the argument has been promoted from a smaller type, insert an 654 // assertion to capture this. 655 if (VA.getLocInfo() == CCValAssign::SExt) 656 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 657 DAG.getValueType(VA.getValVT())); 658 else if (VA.getLocInfo() == CCValAssign::ZExt) 659 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 660 DAG.getValueType(VA.getValVT())); 661 662 if (VA.isExtInLoc()) 663 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 664 else if (VA.getLocInfo() == CCValAssign::Indirect) 665 Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, 666 MachinePointerInfo(), false, false, false, 0); 667 else 668 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 669 return Value; 670 } 671 672 // Value is a value of type VA.getValVT() that we need to copy into 673 // the location described by VA. Return a copy of Value converted to 674 // VA.getValVT(). The caller is responsible for handling indirect values. 675 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, 676 CCValAssign &VA, SDValue Value) { 677 switch (VA.getLocInfo()) { 678 case CCValAssign::SExt: 679 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 680 case CCValAssign::ZExt: 681 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 682 case CCValAssign::AExt: 683 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 684 case CCValAssign::Full: 685 return Value; 686 default: 687 llvm_unreachable("Unhandled getLocInfo()"); 688 } 689 } 690 691 SDValue SystemZTargetLowering:: 692 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 693 const SmallVectorImpl<ISD::InputArg> &Ins, 694 SDLoc DL, SelectionDAG &DAG, 695 SmallVectorImpl<SDValue> &InVals) const { 696 MachineFunction &MF = DAG.getMachineFunction(); 697 MachineFrameInfo *MFI = MF.getFrameInfo(); 698 MachineRegisterInfo &MRI = MF.getRegInfo(); 699 SystemZMachineFunctionInfo *FuncInfo = 700 MF.getInfo<SystemZMachineFunctionInfo>(); 701 auto *TFL = 702 static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); 703 704 // Assign locations to all of the incoming arguments. 705 SmallVector<CCValAssign, 16> ArgLocs; 706 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 707 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 708 709 unsigned NumFixedGPRs = 0; 710 unsigned NumFixedFPRs = 0; 711 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 712 SDValue ArgValue; 713 CCValAssign &VA = ArgLocs[I]; 714 EVT LocVT = VA.getLocVT(); 715 if (VA.isRegLoc()) { 716 // Arguments passed in registers 717 const TargetRegisterClass *RC; 718 switch (LocVT.getSimpleVT().SimpleTy) { 719 default: 720 // Integers smaller than i64 should be promoted to i64. 721 llvm_unreachable("Unexpected argument type"); 722 case MVT::i32: 723 NumFixedGPRs += 1; 724 RC = &SystemZ::GR32BitRegClass; 725 break; 726 case MVT::i64: 727 NumFixedGPRs += 1; 728 RC = &SystemZ::GR64BitRegClass; 729 break; 730 case MVT::f32: 731 NumFixedFPRs += 1; 732 RC = &SystemZ::FP32BitRegClass; 733 break; 734 case MVT::f64: 735 NumFixedFPRs += 1; 736 RC = &SystemZ::FP64BitRegClass; 737 break; 738 } 739 740 unsigned VReg = MRI.createVirtualRegister(RC); 741 MRI.addLiveIn(VA.getLocReg(), VReg); 742 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 743 } else { 744 assert(VA.isMemLoc() && "Argument not register or memory"); 745 746 // Create the frame index object for this incoming parameter. 747 int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8, 748 VA.getLocMemOffset(), true); 749 750 // Create the SelectionDAG nodes corresponding to a load 751 // from this parameter. Unpromoted ints and floats are 752 // passed as right-justified 8-byte values. 753 EVT PtrVT = getPointerTy(); 754 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 755 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 756 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, 757 DAG.getIntPtrConstant(4, DL)); 758 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 759 MachinePointerInfo::getFixedStack(FI), 760 false, false, false, 0); 761 } 762 763 // Convert the value of the argument register into the value that's 764 // being passed. 765 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 766 } 767 768 if (IsVarArg) { 769 // Save the number of non-varargs registers for later use by va_start, etc. 770 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 771 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 772 773 // Likewise the address (in the form of a frame index) of where the 774 // first stack vararg would be. The 1-byte size here is arbitrary. 775 int64_t StackSize = CCInfo.getNextStackOffset(); 776 FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true)); 777 778 // ...and a similar frame index for the caller-allocated save area 779 // that will be used to store the incoming registers. 780 int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); 781 unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true); 782 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 783 784 // Store the FPR varargs in the reserved frame slots. (We store the 785 // GPRs as part of the prologue.) 786 if (NumFixedFPRs < SystemZ::NumArgFPRs) { 787 SDValue MemOps[SystemZ::NumArgFPRs]; 788 for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { 789 unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); 790 int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); 791 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 792 unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], 793 &SystemZ::FP64BitRegClass); 794 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 795 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 796 MachinePointerInfo::getFixedStack(FI), 797 false, false, 0); 798 799 } 800 // Join the stores, which are independent of one another. 801 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 802 makeArrayRef(&MemOps[NumFixedFPRs], 803 SystemZ::NumArgFPRs-NumFixedFPRs)); 804 } 805 } 806 807 return Chain; 808 } 809 810 static bool canUseSiblingCall(const CCState &ArgCCInfo, 811 SmallVectorImpl<CCValAssign> &ArgLocs) { 812 // Punt if there are any indirect or stack arguments, or if the call 813 // needs the call-saved argument register R6. 814 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 815 CCValAssign &VA = ArgLocs[I]; 816 if (VA.getLocInfo() == CCValAssign::Indirect) 817 return false; 818 if (!VA.isRegLoc()) 819 return false; 820 unsigned Reg = VA.getLocReg(); 821 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) 822 return false; 823 } 824 return true; 825 } 826 827 SDValue 828 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 829 SmallVectorImpl<SDValue> &InVals) const { 830 SelectionDAG &DAG = CLI.DAG; 831 SDLoc &DL = CLI.DL; 832 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 833 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 834 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 835 SDValue Chain = CLI.Chain; 836 SDValue Callee = CLI.Callee; 837 bool &IsTailCall = CLI.IsTailCall; 838 CallingConv::ID CallConv = CLI.CallConv; 839 bool IsVarArg = CLI.IsVarArg; 840 MachineFunction &MF = DAG.getMachineFunction(); 841 EVT PtrVT = getPointerTy(); 842 843 // Analyze the operands of the call, assigning locations to each operand. 844 SmallVector<CCValAssign, 16> ArgLocs; 845 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 846 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 847 848 // We don't support GuaranteedTailCallOpt, only automatically-detected 849 // sibling calls. 850 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) 851 IsTailCall = false; 852 853 // Get a count of how many bytes are to be pushed on the stack. 854 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 855 856 // Mark the start of the call. 857 if (!IsTailCall) 858 Chain = DAG.getCALLSEQ_START(Chain, 859 DAG.getConstant(NumBytes, DL, PtrVT, true), 860 DL); 861 862 // Copy argument values to their designated locations. 863 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 864 SmallVector<SDValue, 8> MemOpChains; 865 SDValue StackPtr; 866 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 867 CCValAssign &VA = ArgLocs[I]; 868 SDValue ArgValue = OutVals[I]; 869 870 if (VA.getLocInfo() == CCValAssign::Indirect) { 871 // Store the argument in a stack slot and pass its address. 872 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 873 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 874 MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot, 875 MachinePointerInfo::getFixedStack(FI), 876 false, false, 0)); 877 ArgValue = SpillSlot; 878 } else 879 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 880 881 if (VA.isRegLoc()) 882 // Queue up the argument copies and emit them at the end. 883 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 884 else { 885 assert(VA.isMemLoc() && "Argument not register or memory"); 886 887 // Work out the address of the stack slot. Unpromoted ints and 888 // floats are passed as right-justified 8-byte values. 889 if (!StackPtr.getNode()) 890 StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); 891 unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset(); 892 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 893 Offset += 4; 894 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 895 DAG.getIntPtrConstant(Offset, DL)); 896 897 // Emit the store. 898 MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, 899 MachinePointerInfo(), 900 false, false, 0)); 901 } 902 } 903 904 // Join the stores, which are independent of one another. 905 if (!MemOpChains.empty()) 906 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 907 908 // Accept direct calls by converting symbolic call addresses to the 909 // associated Target* opcodes. Force %r1 to be used for indirect 910 // tail calls. 911 SDValue Glue; 912 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 913 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 914 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 915 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 916 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 917 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 918 } else if (IsTailCall) { 919 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); 920 Glue = Chain.getValue(1); 921 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); 922 } 923 924 // Build a sequence of copy-to-reg nodes, chained and glued together. 925 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 926 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 927 RegsToPass[I].second, Glue); 928 Glue = Chain.getValue(1); 929 } 930 931 // The first call operand is the chain and the second is the target address. 932 SmallVector<SDValue, 8> Ops; 933 Ops.push_back(Chain); 934 Ops.push_back(Callee); 935 936 // Add argument registers to the end of the list so that they are 937 // known live into the call. 938 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 939 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 940 RegsToPass[I].second.getValueType())); 941 942 // Add a register mask operand representing the call-preserved registers. 943 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 944 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 945 assert(Mask && "Missing call preserved mask for calling convention"); 946 Ops.push_back(DAG.getRegisterMask(Mask)); 947 948 // Glue the call to the argument copies, if any. 949 if (Glue.getNode()) 950 Ops.push_back(Glue); 951 952 // Emit the call. 953 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 954 if (IsTailCall) 955 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); 956 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); 957 Glue = Chain.getValue(1); 958 959 // Mark the end of the call, which is glued to the call itself. 960 Chain = DAG.getCALLSEQ_END(Chain, 961 DAG.getConstant(NumBytes, DL, PtrVT, true), 962 DAG.getConstant(0, DL, PtrVT, true), 963 Glue, DL); 964 Glue = Chain.getValue(1); 965 966 // Assign locations to each value returned by this call. 967 SmallVector<CCValAssign, 16> RetLocs; 968 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 969 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 970 971 // Copy all of the result registers out of their specified physreg. 972 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 973 CCValAssign &VA = RetLocs[I]; 974 975 // Copy the value out, gluing the copy to the end of the call sequence. 976 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 977 VA.getLocVT(), Glue); 978 Chain = RetValue.getValue(1); 979 Glue = RetValue.getValue(2); 980 981 // Convert the value of the return register into the value that's 982 // being returned. 983 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 984 } 985 986 return Chain; 987 } 988 989 SDValue 990 SystemZTargetLowering::LowerReturn(SDValue Chain, 991 CallingConv::ID CallConv, bool IsVarArg, 992 const SmallVectorImpl<ISD::OutputArg> &Outs, 993 const SmallVectorImpl<SDValue> &OutVals, 994 SDLoc DL, SelectionDAG &DAG) const { 995 MachineFunction &MF = DAG.getMachineFunction(); 996 997 // Assign locations to each returned value. 998 SmallVector<CCValAssign, 16> RetLocs; 999 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 1000 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 1001 1002 // Quick exit for void returns 1003 if (RetLocs.empty()) 1004 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); 1005 1006 // Copy the result values into the output registers. 1007 SDValue Glue; 1008 SmallVector<SDValue, 4> RetOps; 1009 RetOps.push_back(Chain); 1010 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 1011 CCValAssign &VA = RetLocs[I]; 1012 SDValue RetValue = OutVals[I]; 1013 1014 // Make the return register live on exit. 1015 assert(VA.isRegLoc() && "Can only return in registers!"); 1016 1017 // Promote the value as required. 1018 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 1019 1020 // Chain and glue the copies together. 1021 unsigned Reg = VA.getLocReg(); 1022 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 1023 Glue = Chain.getValue(1); 1024 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 1025 } 1026 1027 // Update chain and glue. 1028 RetOps[0] = Chain; 1029 if (Glue.getNode()) 1030 RetOps.push_back(Glue); 1031 1032 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); 1033 } 1034 1035 SDValue SystemZTargetLowering:: 1036 prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { 1037 return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); 1038 } 1039 1040 // Return true if Op is an intrinsic node with chain that returns the CC value 1041 // as its only (other) argument. Provide the associated SystemZISD opcode and 1042 // the mask of valid CC values if so. 1043 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, 1044 unsigned &CCValid) { 1045 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1046 switch (Id) { 1047 case Intrinsic::s390_tbegin: 1048 Opcode = SystemZISD::TBEGIN; 1049 CCValid = SystemZ::CCMASK_TBEGIN; 1050 return true; 1051 1052 case Intrinsic::s390_tbegin_nofloat: 1053 Opcode = SystemZISD::TBEGIN_NOFLOAT; 1054 CCValid = SystemZ::CCMASK_TBEGIN; 1055 return true; 1056 1057 case Intrinsic::s390_tend: 1058 Opcode = SystemZISD::TEND; 1059 CCValid = SystemZ::CCMASK_TEND; 1060 return true; 1061 1062 default: 1063 return false; 1064 } 1065 } 1066 1067 // Emit an intrinsic with chain with a glued value instead of its CC result. 1068 static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, 1069 unsigned Opcode) { 1070 // Copy all operands except the intrinsic ID. 1071 unsigned NumOps = Op.getNumOperands(); 1072 SmallVector<SDValue, 6> Ops; 1073 Ops.reserve(NumOps - 1); 1074 Ops.push_back(Op.getOperand(0)); 1075 for (unsigned I = 2; I < NumOps; ++I) 1076 Ops.push_back(Op.getOperand(I)); 1077 1078 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 1079 SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1080 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 1081 SDValue OldChain = SDValue(Op.getNode(), 1); 1082 SDValue NewChain = SDValue(Intr.getNode(), 0); 1083 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); 1084 return Intr; 1085 } 1086 1087 // CC is a comparison that will be implemented using an integer or 1088 // floating-point comparison. Return the condition code mask for 1089 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 1090 // unsigned comparisons and clear for signed ones. In the floating-point 1091 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 1092 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 1093 #define CONV(X) \ 1094 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 1095 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 1096 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 1097 1098 switch (CC) { 1099 default: 1100 llvm_unreachable("Invalid integer condition!"); 1101 1102 CONV(EQ); 1103 CONV(NE); 1104 CONV(GT); 1105 CONV(GE); 1106 CONV(LT); 1107 CONV(LE); 1108 1109 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 1110 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 1111 } 1112 #undef CONV 1113 } 1114 1115 // Return a sequence for getting a 1 from an IPM result when CC has a 1116 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. 1117 // The handling of CC values outside CCValid doesn't matter. 1118 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { 1119 // Deal with cases where the result can be taken directly from a bit 1120 // of the IPM result. 1121 if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) 1122 return IPMConversion(0, 0, SystemZ::IPM_CC); 1123 if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) 1124 return IPMConversion(0, 0, SystemZ::IPM_CC + 1); 1125 1126 // Deal with cases where we can add a value to force the sign bit 1127 // to contain the right value. Putting the bit in 31 means we can 1128 // use SRL rather than RISBG(L), and also makes it easier to get a 1129 // 0/-1 value, so it has priority over the other tests below. 1130 // 1131 // These sequences rely on the fact that the upper two bits of the 1132 // IPM result are zero. 1133 uint64_t TopBit = uint64_t(1) << 31; 1134 if (CCMask == (CCValid & SystemZ::CCMASK_0)) 1135 return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); 1136 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) 1137 return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); 1138 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1139 | SystemZ::CCMASK_1 1140 | SystemZ::CCMASK_2))) 1141 return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); 1142 if (CCMask == (CCValid & SystemZ::CCMASK_3)) 1143 return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); 1144 if (CCMask == (CCValid & (SystemZ::CCMASK_1 1145 | SystemZ::CCMASK_2 1146 | SystemZ::CCMASK_3))) 1147 return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); 1148 1149 // Next try inverting the value and testing a bit. 0/1 could be 1150 // handled this way too, but we dealt with that case above. 1151 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) 1152 return IPMConversion(-1, 0, SystemZ::IPM_CC); 1153 1154 // Handle cases where adding a value forces a non-sign bit to contain 1155 // the right value. 1156 if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) 1157 return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); 1158 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) 1159 return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); 1160 1161 // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are 1162 // can be done by inverting the low CC bit and applying one of the 1163 // sign-based extractions above. 1164 if (CCMask == (CCValid & SystemZ::CCMASK_1)) 1165 return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); 1166 if (CCMask == (CCValid & SystemZ::CCMASK_2)) 1167 return IPMConversion(1 << SystemZ::IPM_CC, 1168 TopBit - (3 << SystemZ::IPM_CC), 31); 1169 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1170 | SystemZ::CCMASK_1 1171 | SystemZ::CCMASK_3))) 1172 return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); 1173 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1174 | SystemZ::CCMASK_2 1175 | SystemZ::CCMASK_3))) 1176 return IPMConversion(1 << SystemZ::IPM_CC, 1177 TopBit - (1 << SystemZ::IPM_CC), 31); 1178 1179 llvm_unreachable("Unexpected CC combination"); 1180 } 1181 1182 // If C can be converted to a comparison against zero, adjust the operands 1183 // as necessary. 1184 static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { 1185 if (C.ICmpType == SystemZICMP::UnsignedOnly) 1186 return; 1187 1188 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); 1189 if (!ConstOp1) 1190 return; 1191 1192 int64_t Value = ConstOp1->getSExtValue(); 1193 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || 1194 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || 1195 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || 1196 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { 1197 C.CCMask ^= SystemZ::CCMASK_CMP_EQ; 1198 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); 1199 } 1200 } 1201 1202 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, 1203 // adjust the operands as necessary. 1204 static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { 1205 // For us to make any changes, it must a comparison between a single-use 1206 // load and a constant. 1207 if (!C.Op0.hasOneUse() || 1208 C.Op0.getOpcode() != ISD::LOAD || 1209 C.Op1.getOpcode() != ISD::Constant) 1210 return; 1211 1212 // We must have an 8- or 16-bit load. 1213 auto *Load = cast<LoadSDNode>(C.Op0); 1214 unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); 1215 if (NumBits != 8 && NumBits != 16) 1216 return; 1217 1218 // The load must be an extending one and the constant must be within the 1219 // range of the unextended value. 1220 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); 1221 uint64_t Value = ConstOp1->getZExtValue(); 1222 uint64_t Mask = (1 << NumBits) - 1; 1223 if (Load->getExtensionType() == ISD::SEXTLOAD) { 1224 // Make sure that ConstOp1 is in range of C.Op0. 1225 int64_t SignedValue = ConstOp1->getSExtValue(); 1226 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) 1227 return; 1228 if (C.ICmpType != SystemZICMP::SignedOnly) { 1229 // Unsigned comparison between two sign-extended values is equivalent 1230 // to unsigned comparison between two zero-extended values. 1231 Value &= Mask; 1232 } else if (NumBits == 8) { 1233 // Try to treat the comparison as unsigned, so that we can use CLI. 1234 // Adjust CCMask and Value as necessary. 1235 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) 1236 // Test whether the high bit of the byte is set. 1237 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; 1238 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) 1239 // Test whether the high bit of the byte is clear. 1240 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; 1241 else 1242 // No instruction exists for this combination. 1243 return; 1244 C.ICmpType = SystemZICMP::UnsignedOnly; 1245 } 1246 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 1247 if (Value > Mask) 1248 return; 1249 assert(C.ICmpType == SystemZICMP::Any && 1250 "Signedness shouldn't matter here."); 1251 } else 1252 return; 1253 1254 // Make sure that the first operand is an i32 of the right extension type. 1255 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? 1256 ISD::SEXTLOAD : 1257 ISD::ZEXTLOAD); 1258 if (C.Op0.getValueType() != MVT::i32 || 1259 Load->getExtensionType() != ExtType) 1260 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, 1261 Load->getChain(), Load->getBasePtr(), 1262 Load->getPointerInfo(), Load->getMemoryVT(), 1263 Load->isVolatile(), Load->isNonTemporal(), 1264 Load->isInvariant(), Load->getAlignment()); 1265 1266 // Make sure that the second operand is an i32 with the right value. 1267 if (C.Op1.getValueType() != MVT::i32 || 1268 Value != ConstOp1->getZExtValue()) 1269 C.Op1 = DAG.getConstant(Value, DL, MVT::i32); 1270 } 1271 1272 // Return true if Op is either an unextended load, or a load suitable 1273 // for integer register-memory comparisons of type ICmpType. 1274 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { 1275 auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); 1276 if (Load) { 1277 // There are no instructions to compare a register with a memory byte. 1278 if (Load->getMemoryVT() == MVT::i8) 1279 return false; 1280 // Otherwise decide on extension type. 1281 switch (Load->getExtensionType()) { 1282 case ISD::NON_EXTLOAD: 1283 return true; 1284 case ISD::SEXTLOAD: 1285 return ICmpType != SystemZICMP::UnsignedOnly; 1286 case ISD::ZEXTLOAD: 1287 return ICmpType != SystemZICMP::SignedOnly; 1288 default: 1289 break; 1290 } 1291 } 1292 return false; 1293 } 1294 1295 // Return true if it is better to swap the operands of C. 1296 static bool shouldSwapCmpOperands(const Comparison &C) { 1297 // Leave f128 comparisons alone, since they have no memory forms. 1298 if (C.Op0.getValueType() == MVT::f128) 1299 return false; 1300 1301 // Always keep a floating-point constant second, since comparisons with 1302 // zero can use LOAD TEST and comparisons with other constants make a 1303 // natural memory operand. 1304 if (isa<ConstantFPSDNode>(C.Op1)) 1305 return false; 1306 1307 // Never swap comparisons with zero since there are many ways to optimize 1308 // those later. 1309 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 1310 if (ConstOp1 && ConstOp1->getZExtValue() == 0) 1311 return false; 1312 1313 // Also keep natural memory operands second if the loaded value is 1314 // only used here. Several comparisons have memory forms. 1315 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) 1316 return false; 1317 1318 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. 1319 // In that case we generally prefer the memory to be second. 1320 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { 1321 // The only exceptions are when the second operand is a constant and 1322 // we can use things like CHHSI. 1323 if (!ConstOp1) 1324 return true; 1325 // The unsigned memory-immediate instructions can handle 16-bit 1326 // unsigned integers. 1327 if (C.ICmpType != SystemZICMP::SignedOnly && 1328 isUInt<16>(ConstOp1->getZExtValue())) 1329 return false; 1330 // The signed memory-immediate instructions can handle 16-bit 1331 // signed integers. 1332 if (C.ICmpType != SystemZICMP::UnsignedOnly && 1333 isInt<16>(ConstOp1->getSExtValue())) 1334 return false; 1335 return true; 1336 } 1337 1338 // Try to promote the use of CGFR and CLGFR. 1339 unsigned Opcode0 = C.Op0.getOpcode(); 1340 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) 1341 return true; 1342 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) 1343 return true; 1344 if (C.ICmpType != SystemZICMP::SignedOnly && 1345 Opcode0 == ISD::AND && 1346 C.Op0.getOperand(1).getOpcode() == ISD::Constant && 1347 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) 1348 return true; 1349 1350 return false; 1351 } 1352 1353 // Return a version of comparison CC mask CCMask in which the LT and GT 1354 // actions are swapped. 1355 static unsigned reverseCCMask(unsigned CCMask) { 1356 return ((CCMask & SystemZ::CCMASK_CMP_EQ) | 1357 (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | 1358 (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | 1359 (CCMask & SystemZ::CCMASK_CMP_UO)); 1360 } 1361 1362 // Check whether C tests for equality between X and Y and whether X - Y 1363 // or Y - X is also computed. In that case it's better to compare the 1364 // result of the subtraction against zero. 1365 static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) { 1366 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 1367 C.CCMask == SystemZ::CCMASK_CMP_NE) { 1368 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { 1369 SDNode *N = *I; 1370 if (N->getOpcode() == ISD::SUB && 1371 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || 1372 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { 1373 C.Op0 = SDValue(N, 0); 1374 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); 1375 return; 1376 } 1377 } 1378 } 1379 } 1380 1381 // Check whether C compares a floating-point value with zero and if that 1382 // floating-point value is also negated. In this case we can use the 1383 // negation to set CC, so avoiding separate LOAD AND TEST and 1384 // LOAD (NEGATIVE/COMPLEMENT) instructions. 1385 static void adjustForFNeg(Comparison &C) { 1386 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); 1387 if (C1 && C1->isZero()) { 1388 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { 1389 SDNode *N = *I; 1390 if (N->getOpcode() == ISD::FNEG) { 1391 C.Op0 = SDValue(N, 0); 1392 C.CCMask = reverseCCMask(C.CCMask); 1393 return; 1394 } 1395 } 1396 } 1397 } 1398 1399 // Check whether C compares (shl X, 32) with 0 and whether X is 1400 // also sign-extended. In that case it is better to test the result 1401 // of the sign extension using LTGFR. 1402 // 1403 // This case is important because InstCombine transforms a comparison 1404 // with (sext (trunc X)) into a comparison with (shl X, 32). 1405 static void adjustForLTGFR(Comparison &C) { 1406 // Check for a comparison between (shl X, 32) and 0. 1407 if (C.Op0.getOpcode() == ISD::SHL && 1408 C.Op0.getValueType() == MVT::i64 && 1409 C.Op1.getOpcode() == ISD::Constant && 1410 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1411 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 1412 if (C1 && C1->getZExtValue() == 32) { 1413 SDValue ShlOp0 = C.Op0.getOperand(0); 1414 // See whether X has any SIGN_EXTEND_INREG uses. 1415 for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) { 1416 SDNode *N = *I; 1417 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && 1418 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { 1419 C.Op0 = SDValue(N, 0); 1420 return; 1421 } 1422 } 1423 } 1424 } 1425 } 1426 1427 // If C compares the truncation of an extending load, try to compare 1428 // the untruncated value instead. This exposes more opportunities to 1429 // reuse CC. 1430 static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) { 1431 if (C.Op0.getOpcode() == ISD::TRUNCATE && 1432 C.Op0.getOperand(0).getOpcode() == ISD::LOAD && 1433 C.Op1.getOpcode() == ISD::Constant && 1434 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1435 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); 1436 if (L->getMemoryVT().getStoreSizeInBits() 1437 <= C.Op0.getValueType().getSizeInBits()) { 1438 unsigned Type = L->getExtensionType(); 1439 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || 1440 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { 1441 C.Op0 = C.Op0.getOperand(0); 1442 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); 1443 } 1444 } 1445 } 1446 } 1447 1448 // Return true if shift operation N has an in-range constant shift value. 1449 // Store it in ShiftVal if so. 1450 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { 1451 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1452 if (!Shift) 1453 return false; 1454 1455 uint64_t Amount = Shift->getZExtValue(); 1456 if (Amount >= N.getValueType().getSizeInBits()) 1457 return false; 1458 1459 ShiftVal = Amount; 1460 return true; 1461 } 1462 1463 // Check whether an AND with Mask is suitable for a TEST UNDER MASK 1464 // instruction and whether the CC value is descriptive enough to handle 1465 // a comparison of type Opcode between the AND result and CmpVal. 1466 // CCMask says which comparison result is being tested and BitSize is 1467 // the number of bits in the operands. If TEST UNDER MASK can be used, 1468 // return the corresponding CC mask, otherwise return 0. 1469 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, 1470 uint64_t Mask, uint64_t CmpVal, 1471 unsigned ICmpType) { 1472 assert(Mask != 0 && "ANDs with zero should have been removed by now"); 1473 1474 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. 1475 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && 1476 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) 1477 return 0; 1478 1479 // Work out the masks for the lowest and highest bits. 1480 unsigned HighShift = 63 - countLeadingZeros(Mask); 1481 uint64_t High = uint64_t(1) << HighShift; 1482 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask); 1483 1484 // Signed ordered comparisons are effectively unsigned if the sign 1485 // bit is dropped. 1486 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); 1487 1488 // Check for equality comparisons with 0, or the equivalent. 1489 if (CmpVal == 0) { 1490 if (CCMask == SystemZ::CCMASK_CMP_EQ) 1491 return SystemZ::CCMASK_TM_ALL_0; 1492 if (CCMask == SystemZ::CCMASK_CMP_NE) 1493 return SystemZ::CCMASK_TM_SOME_1; 1494 } 1495 if (EffectivelyUnsigned && CmpVal <= Low) { 1496 if (CCMask == SystemZ::CCMASK_CMP_LT) 1497 return SystemZ::CCMASK_TM_ALL_0; 1498 if (CCMask == SystemZ::CCMASK_CMP_GE) 1499 return SystemZ::CCMASK_TM_SOME_1; 1500 } 1501 if (EffectivelyUnsigned && CmpVal < Low) { 1502 if (CCMask == SystemZ::CCMASK_CMP_LE) 1503 return SystemZ::CCMASK_TM_ALL_0; 1504 if (CCMask == SystemZ::CCMASK_CMP_GT) 1505 return SystemZ::CCMASK_TM_SOME_1; 1506 } 1507 1508 // Check for equality comparisons with the mask, or the equivalent. 1509 if (CmpVal == Mask) { 1510 if (CCMask == SystemZ::CCMASK_CMP_EQ) 1511 return SystemZ::CCMASK_TM_ALL_1; 1512 if (CCMask == SystemZ::CCMASK_CMP_NE) 1513 return SystemZ::CCMASK_TM_SOME_0; 1514 } 1515 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { 1516 if (CCMask == SystemZ::CCMASK_CMP_GT) 1517 return SystemZ::CCMASK_TM_ALL_1; 1518 if (CCMask == SystemZ::CCMASK_CMP_LE) 1519 return SystemZ::CCMASK_TM_SOME_0; 1520 } 1521 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { 1522 if (CCMask == SystemZ::CCMASK_CMP_GE) 1523 return SystemZ::CCMASK_TM_ALL_1; 1524 if (CCMask == SystemZ::CCMASK_CMP_LT) 1525 return SystemZ::CCMASK_TM_SOME_0; 1526 } 1527 1528 // Check for ordered comparisons with the top bit. 1529 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { 1530 if (CCMask == SystemZ::CCMASK_CMP_LE) 1531 return SystemZ::CCMASK_TM_MSB_0; 1532 if (CCMask == SystemZ::CCMASK_CMP_GT) 1533 return SystemZ::CCMASK_TM_MSB_1; 1534 } 1535 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { 1536 if (CCMask == SystemZ::CCMASK_CMP_LT) 1537 return SystemZ::CCMASK_TM_MSB_0; 1538 if (CCMask == SystemZ::CCMASK_CMP_GE) 1539 return SystemZ::CCMASK_TM_MSB_1; 1540 } 1541 1542 // If there are just two bits, we can do equality checks for Low and High 1543 // as well. 1544 if (Mask == Low + High) { 1545 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) 1546 return SystemZ::CCMASK_TM_MIXED_MSB_0; 1547 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) 1548 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; 1549 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) 1550 return SystemZ::CCMASK_TM_MIXED_MSB_1; 1551 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) 1552 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; 1553 } 1554 1555 // Looks like we've exhausted our options. 1556 return 0; 1557 } 1558 1559 // See whether C can be implemented as a TEST UNDER MASK instruction. 1560 // Update the arguments with the TM version if so. 1561 static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) { 1562 // Check that we have a comparison with a constant. 1563 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 1564 if (!ConstOp1) 1565 return; 1566 uint64_t CmpVal = ConstOp1->getZExtValue(); 1567 1568 // Check whether the nonconstant input is an AND with a constant mask. 1569 Comparison NewC(C); 1570 uint64_t MaskVal; 1571 ConstantSDNode *Mask = nullptr; 1572 if (C.Op0.getOpcode() == ISD::AND) { 1573 NewC.Op0 = C.Op0.getOperand(0); 1574 NewC.Op1 = C.Op0.getOperand(1); 1575 Mask = dyn_cast<ConstantSDNode>(NewC.Op1); 1576 if (!Mask) 1577 return; 1578 MaskVal = Mask->getZExtValue(); 1579 } else { 1580 // There is no instruction to compare with a 64-bit immediate 1581 // so use TMHH instead if possible. We need an unsigned ordered 1582 // comparison with an i64 immediate. 1583 if (NewC.Op0.getValueType() != MVT::i64 || 1584 NewC.CCMask == SystemZ::CCMASK_CMP_EQ || 1585 NewC.CCMask == SystemZ::CCMASK_CMP_NE || 1586 NewC.ICmpType == SystemZICMP::SignedOnly) 1587 return; 1588 // Convert LE and GT comparisons into LT and GE. 1589 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || 1590 NewC.CCMask == SystemZ::CCMASK_CMP_GT) { 1591 if (CmpVal == uint64_t(-1)) 1592 return; 1593 CmpVal += 1; 1594 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; 1595 } 1596 // If the low N bits of Op1 are zero than the low N bits of Op0 can 1597 // be masked off without changing the result. 1598 MaskVal = -(CmpVal & -CmpVal); 1599 NewC.ICmpType = SystemZICMP::UnsignedOnly; 1600 } 1601 if (!MaskVal) 1602 return; 1603 1604 // Check whether the combination of mask, comparison value and comparison 1605 // type are suitable. 1606 unsigned BitSize = NewC.Op0.getValueType().getSizeInBits(); 1607 unsigned NewCCMask, ShiftVal; 1608 if (NewC.ICmpType != SystemZICMP::SignedOnly && 1609 NewC.Op0.getOpcode() == ISD::SHL && 1610 isSimpleShift(NewC.Op0, ShiftVal) && 1611 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 1612 MaskVal >> ShiftVal, 1613 CmpVal >> ShiftVal, 1614 SystemZICMP::Any))) { 1615 NewC.Op0 = NewC.Op0.getOperand(0); 1616 MaskVal >>= ShiftVal; 1617 } else if (NewC.ICmpType != SystemZICMP::SignedOnly && 1618 NewC.Op0.getOpcode() == ISD::SRL && 1619 isSimpleShift(NewC.Op0, ShiftVal) && 1620 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 1621 MaskVal << ShiftVal, 1622 CmpVal << ShiftVal, 1623 SystemZICMP::UnsignedOnly))) { 1624 NewC.Op0 = NewC.Op0.getOperand(0); 1625 MaskVal <<= ShiftVal; 1626 } else { 1627 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, 1628 NewC.ICmpType); 1629 if (!NewCCMask) 1630 return; 1631 } 1632 1633 // Go ahead and make the change. 1634 C.Opcode = SystemZISD::TM; 1635 C.Op0 = NewC.Op0; 1636 if (Mask && Mask->getZExtValue() == MaskVal) 1637 C.Op1 = SDValue(Mask, 0); 1638 else 1639 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); 1640 C.CCValid = SystemZ::CCMASK_TM; 1641 C.CCMask = NewCCMask; 1642 } 1643 1644 // Return a Comparison that tests the condition-code result of intrinsic 1645 // node Call against constant integer CC using comparison code Cond. 1646 // Opcode is the opcode of the SystemZISD operation for the intrinsic 1647 // and CCValid is the set of possible condition-code results. 1648 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, 1649 SDValue Call, unsigned CCValid, uint64_t CC, 1650 ISD::CondCode Cond) { 1651 Comparison C(Call, SDValue()); 1652 C.Opcode = Opcode; 1653 C.CCValid = CCValid; 1654 if (Cond == ISD::SETEQ) 1655 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. 1656 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; 1657 else if (Cond == ISD::SETNE) 1658 // ...and the inverse of that. 1659 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; 1660 else if (Cond == ISD::SETLT || Cond == ISD::SETULT) 1661 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, 1662 // always true for CC>3. 1663 C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; 1664 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) 1665 // ...and the inverse of that. 1666 C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; 1667 else if (Cond == ISD::SETLE || Cond == ISD::SETULE) 1668 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), 1669 // always true for CC>3. 1670 C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; 1671 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) 1672 // ...and the inverse of that. 1673 C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; 1674 else 1675 llvm_unreachable("Unexpected integer comparison type"); 1676 C.CCMask &= CCValid; 1677 return C; 1678 } 1679 1680 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. 1681 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 1682 ISD::CondCode Cond, SDLoc DL) { 1683 if (CmpOp1.getOpcode() == ISD::Constant) { 1684 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); 1685 unsigned Opcode, CCValid; 1686 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && 1687 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && 1688 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) 1689 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); 1690 } 1691 Comparison C(CmpOp0, CmpOp1); 1692 C.CCMask = CCMaskForCondCode(Cond); 1693 if (C.Op0.getValueType().isFloatingPoint()) { 1694 C.CCValid = SystemZ::CCMASK_FCMP; 1695 C.Opcode = SystemZISD::FCMP; 1696 adjustForFNeg(C); 1697 } else { 1698 C.CCValid = SystemZ::CCMASK_ICMP; 1699 C.Opcode = SystemZISD::ICMP; 1700 // Choose the type of comparison. Equality and inequality tests can 1701 // use either signed or unsigned comparisons. The choice also doesn't 1702 // matter if both sign bits are known to be clear. In those cases we 1703 // want to give the main isel code the freedom to choose whichever 1704 // form fits best. 1705 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 1706 C.CCMask == SystemZ::CCMASK_CMP_NE || 1707 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) 1708 C.ICmpType = SystemZICMP::Any; 1709 else if (C.CCMask & SystemZ::CCMASK_CMP_UO) 1710 C.ICmpType = SystemZICMP::UnsignedOnly; 1711 else 1712 C.ICmpType = SystemZICMP::SignedOnly; 1713 C.CCMask &= ~SystemZ::CCMASK_CMP_UO; 1714 adjustZeroCmp(DAG, DL, C); 1715 adjustSubwordCmp(DAG, DL, C); 1716 adjustForSubtraction(DAG, DL, C); 1717 adjustForLTGFR(C); 1718 adjustICmpTruncate(DAG, DL, C); 1719 } 1720 1721 if (shouldSwapCmpOperands(C)) { 1722 std::swap(C.Op0, C.Op1); 1723 C.CCMask = reverseCCMask(C.CCMask); 1724 } 1725 1726 adjustForTestUnderMask(DAG, DL, C); 1727 return C; 1728 } 1729 1730 // Emit the comparison instruction described by C. 1731 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { 1732 if (!C.Op1.getNode()) { 1733 SDValue Op; 1734 switch (C.Op0.getOpcode()) { 1735 case ISD::INTRINSIC_W_CHAIN: 1736 Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); 1737 break; 1738 default: 1739 llvm_unreachable("Invalid comparison operands"); 1740 } 1741 return SDValue(Op.getNode(), Op->getNumValues() - 1); 1742 } 1743 if (C.Opcode == SystemZISD::ICMP) 1744 return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, 1745 DAG.getConstant(C.ICmpType, DL, MVT::i32)); 1746 if (C.Opcode == SystemZISD::TM) { 1747 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != 1748 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); 1749 return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, 1750 DAG.getConstant(RegisterOnly, DL, MVT::i32)); 1751 } 1752 return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); 1753 } 1754 1755 // Implement a 32-bit *MUL_LOHI operation by extending both operands to 1756 // 64 bits. Extend is the extension type to use. Store the high part 1757 // in Hi and the low part in Lo. 1758 static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, 1759 unsigned Extend, SDValue Op0, SDValue Op1, 1760 SDValue &Hi, SDValue &Lo) { 1761 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); 1762 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); 1763 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); 1764 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, 1765 DAG.getConstant(32, DL, MVT::i64)); 1766 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); 1767 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 1768 } 1769 1770 // Lower a binary operation that produces two VT results, one in each 1771 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 1772 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation 1773 // on the extended Op0 and (unextended) Op1. Store the even register result 1774 // in Even and the odd register result in Odd. 1775 static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, 1776 unsigned Extend, unsigned Opcode, 1777 SDValue Op0, SDValue Op1, 1778 SDValue &Even, SDValue &Odd) { 1779 SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); 1780 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, 1781 SDValue(In128, 0), Op1); 1782 bool Is32Bit = is32Bit(VT); 1783 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); 1784 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); 1785 } 1786 1787 // Return an i32 value that is 1 if the CC value produced by Glue is 1788 // in the mask CCMask and 0 otherwise. CC is known to have a value 1789 // in CCValid, so other values can be ignored. 1790 static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, 1791 unsigned CCValid, unsigned CCMask) { 1792 IPMConversion Conversion = getIPMConversion(CCValid, CCMask); 1793 SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 1794 1795 if (Conversion.XORValue) 1796 Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, 1797 DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); 1798 1799 if (Conversion.AddValue) 1800 Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, 1801 DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); 1802 1803 // The SHR/AND sequence should get optimized to an RISBG. 1804 Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, 1805 DAG.getConstant(Conversion.Bit, DL, MVT::i32)); 1806 if (Conversion.Bit != 31) 1807 Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, 1808 DAG.getConstant(1, DL, MVT::i32)); 1809 return Result; 1810 } 1811 1812 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, 1813 SelectionDAG &DAG) const { 1814 SDValue CmpOp0 = Op.getOperand(0); 1815 SDValue CmpOp1 = Op.getOperand(1); 1816 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1817 SDLoc DL(Op); 1818 1819 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 1820 SDValue Glue = emitCmp(DAG, DL, C); 1821 return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); 1822 } 1823 1824 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 1825 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 1826 SDValue CmpOp0 = Op.getOperand(2); 1827 SDValue CmpOp1 = Op.getOperand(3); 1828 SDValue Dest = Op.getOperand(4); 1829 SDLoc DL(Op); 1830 1831 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 1832 SDValue Glue = emitCmp(DAG, DL, C); 1833 return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), 1834 Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), 1835 DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); 1836 } 1837 1838 // Return true if Pos is CmpOp and Neg is the negative of CmpOp, 1839 // allowing Pos and Neg to be wider than CmpOp. 1840 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { 1841 return (Neg.getOpcode() == ISD::SUB && 1842 Neg.getOperand(0).getOpcode() == ISD::Constant && 1843 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 && 1844 Neg.getOperand(1) == Pos && 1845 (Pos == CmpOp || 1846 (Pos.getOpcode() == ISD::SIGN_EXTEND && 1847 Pos.getOperand(0) == CmpOp))); 1848 } 1849 1850 // Return the absolute or negative absolute of Op; IsNegative decides which. 1851 static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, 1852 bool IsNegative) { 1853 Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); 1854 if (IsNegative) 1855 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), 1856 DAG.getConstant(0, DL, Op.getValueType()), Op); 1857 return Op; 1858 } 1859 1860 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 1861 SelectionDAG &DAG) const { 1862 SDValue CmpOp0 = Op.getOperand(0); 1863 SDValue CmpOp1 = Op.getOperand(1); 1864 SDValue TrueOp = Op.getOperand(2); 1865 SDValue FalseOp = Op.getOperand(3); 1866 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 1867 SDLoc DL(Op); 1868 1869 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 1870 1871 // Check for absolute and negative-absolute selections, including those 1872 // where the comparison value is sign-extended (for LPGFR and LNGFR). 1873 // This check supplements the one in DAGCombiner. 1874 if (C.Opcode == SystemZISD::ICMP && 1875 C.CCMask != SystemZ::CCMASK_CMP_EQ && 1876 C.CCMask != SystemZ::CCMASK_CMP_NE && 1877 C.Op1.getOpcode() == ISD::Constant && 1878 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1879 if (isAbsolute(C.Op0, TrueOp, FalseOp)) 1880 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); 1881 if (isAbsolute(C.Op0, FalseOp, TrueOp)) 1882 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); 1883 } 1884 1885 SDValue Glue = emitCmp(DAG, DL, C); 1886 1887 // Special case for handling -1/0 results. The shifts we use here 1888 // should get optimized with the IPM conversion sequence. 1889 auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp); 1890 auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp); 1891 if (TrueC && FalseC) { 1892 int64_t TrueVal = TrueC->getSExtValue(); 1893 int64_t FalseVal = FalseC->getSExtValue(); 1894 if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { 1895 // Invert the condition if we want -1 on false. 1896 if (TrueVal == 0) 1897 C.CCMask ^= C.CCValid; 1898 SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); 1899 EVT VT = Op.getValueType(); 1900 // Extend the result to VT. Upper bits are ignored. 1901 if (!is32Bit(VT)) 1902 Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); 1903 // Sign-extend from the low bit. 1904 SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); 1905 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); 1906 return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); 1907 } 1908 } 1909 1910 SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), 1911 DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; 1912 1913 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 1914 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); 1915 } 1916 1917 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 1918 SelectionDAG &DAG) const { 1919 SDLoc DL(Node); 1920 const GlobalValue *GV = Node->getGlobal(); 1921 int64_t Offset = Node->getOffset(); 1922 EVT PtrVT = getPointerTy(); 1923 Reloc::Model RM = DAG.getTarget().getRelocationModel(); 1924 CodeModel::Model CM = DAG.getTarget().getCodeModel(); 1925 1926 SDValue Result; 1927 if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { 1928 // Assign anchors at 1<<12 byte boundaries. 1929 uint64_t Anchor = Offset & ~uint64_t(0xfff); 1930 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); 1931 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1932 1933 // The offset can be folded into the address if it is aligned to a halfword. 1934 Offset -= Anchor; 1935 if (Offset != 0 && (Offset & 1) == 0) { 1936 SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); 1937 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); 1938 Offset = 0; 1939 } 1940 } else { 1941 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 1942 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1943 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 1944 MachinePointerInfo::getGOT(), false, false, false, 0); 1945 } 1946 1947 // If there was a non-zero offset that we didn't fold, create an explicit 1948 // addition for it. 1949 if (Offset != 0) 1950 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 1951 DAG.getConstant(Offset, DL, PtrVT)); 1952 1953 return Result; 1954 } 1955 1956 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, 1957 SelectionDAG &DAG, 1958 unsigned Opcode, 1959 SDValue GOTOffset) const { 1960 SDLoc DL(Node); 1961 EVT PtrVT = getPointerTy(); 1962 SDValue Chain = DAG.getEntryNode(); 1963 SDValue Glue; 1964 1965 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. 1966 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1967 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); 1968 Glue = Chain.getValue(1); 1969 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); 1970 Glue = Chain.getValue(1); 1971 1972 // The first call operand is the chain and the second is the TLS symbol. 1973 SmallVector<SDValue, 8> Ops; 1974 Ops.push_back(Chain); 1975 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, 1976 Node->getValueType(0), 1977 0, 0)); 1978 1979 // Add argument registers to the end of the list so that they are 1980 // known live into the call. 1981 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); 1982 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); 1983 1984 // Add a register mask operand representing the call-preserved registers. 1985 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1986 const uint32_t *Mask = 1987 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); 1988 assert(Mask && "Missing call preserved mask for calling convention"); 1989 Ops.push_back(DAG.getRegisterMask(Mask)); 1990 1991 // Glue the call to the argument copies. 1992 Ops.push_back(Glue); 1993 1994 // Emit the call. 1995 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1996 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); 1997 Glue = Chain.getValue(1); 1998 1999 // Copy the return value from %r2. 2000 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); 2001 } 2002 2003 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 2004 SelectionDAG &DAG) const { 2005 SDLoc DL(Node); 2006 const GlobalValue *GV = Node->getGlobal(); 2007 EVT PtrVT = getPointerTy(); 2008 TLSModel::Model model = DAG.getTarget().getTLSModel(GV); 2009 2010 // The high part of the thread pointer is in access register 0. 2011 SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 2012 DAG.getConstant(0, DL, MVT::i32)); 2013 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 2014 2015 // The low part of the thread pointer is in access register 1. 2016 SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 2017 DAG.getConstant(1, DL, MVT::i32)); 2018 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 2019 2020 // Merge them into a single 64-bit address. 2021 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 2022 DAG.getConstant(32, DL, PtrVT)); 2023 SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 2024 2025 // Get the offset of GA from the thread pointer, based on the TLS model. 2026 SDValue Offset; 2027 switch (model) { 2028 case TLSModel::GeneralDynamic: { 2029 // Load the GOT offset of the tls_index (module ID / per-symbol offset). 2030 SystemZConstantPoolValue *CPV = 2031 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); 2032 2033 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2034 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2035 Offset, MachinePointerInfo::getConstantPool(), 2036 false, false, false, 0); 2037 2038 // Call __tls_get_offset to retrieve the offset. 2039 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); 2040 break; 2041 } 2042 2043 case TLSModel::LocalDynamic: { 2044 // Load the GOT offset of the module ID. 2045 SystemZConstantPoolValue *CPV = 2046 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); 2047 2048 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2049 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2050 Offset, MachinePointerInfo::getConstantPool(), 2051 false, false, false, 0); 2052 2053 // Call __tls_get_offset to retrieve the module base offset. 2054 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); 2055 2056 // Note: The SystemZLDCleanupPass will remove redundant computations 2057 // of the module base offset. Count total number of local-dynamic 2058 // accesses to trigger execution of that pass. 2059 SystemZMachineFunctionInfo* MFI = 2060 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); 2061 MFI->incNumLocalDynamicTLSAccesses(); 2062 2063 // Add the per-symbol offset. 2064 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); 2065 2066 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); 2067 DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2068 DTPOffset, MachinePointerInfo::getConstantPool(), 2069 false, false, false, 0); 2070 2071 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); 2072 break; 2073 } 2074 2075 case TLSModel::InitialExec: { 2076 // Load the offset from the GOT. 2077 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 2078 SystemZII::MO_INDNTPOFF); 2079 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); 2080 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2081 Offset, MachinePointerInfo::getGOT(), 2082 false, false, false, 0); 2083 break; 2084 } 2085 2086 case TLSModel::LocalExec: { 2087 // Force the offset into the constant pool and load it from there. 2088 SystemZConstantPoolValue *CPV = 2089 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 2090 2091 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2092 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2093 Offset, MachinePointerInfo::getConstantPool(), 2094 false, false, false, 0); 2095 break; 2096 } 2097 } 2098 2099 // Add the base and offset together. 2100 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 2101 } 2102 2103 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 2104 SelectionDAG &DAG) const { 2105 SDLoc DL(Node); 2106 const BlockAddress *BA = Node->getBlockAddress(); 2107 int64_t Offset = Node->getOffset(); 2108 EVT PtrVT = getPointerTy(); 2109 2110 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 2111 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2112 return Result; 2113 } 2114 2115 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 2116 SelectionDAG &DAG) const { 2117 SDLoc DL(JT); 2118 EVT PtrVT = getPointerTy(); 2119 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2120 2121 // Use LARL to load the address of the table. 2122 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2123 } 2124 2125 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 2126 SelectionDAG &DAG) const { 2127 SDLoc DL(CP); 2128 EVT PtrVT = getPointerTy(); 2129 2130 SDValue Result; 2131 if (CP->isMachineConstantPoolEntry()) 2132 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2133 CP->getAlignment()); 2134 else 2135 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2136 CP->getAlignment(), CP->getOffset()); 2137 2138 // Use LARL to load the address of the constant pool entry. 2139 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2140 } 2141 2142 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 2143 SelectionDAG &DAG) const { 2144 SDLoc DL(Op); 2145 SDValue In = Op.getOperand(0); 2146 EVT InVT = In.getValueType(); 2147 EVT ResVT = Op.getValueType(); 2148 2149 if (InVT == MVT::i32 && ResVT == MVT::f32) { 2150 SDValue In64; 2151 if (Subtarget.hasHighWord()) { 2152 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, 2153 MVT::i64); 2154 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 2155 MVT::i64, SDValue(U64, 0), In); 2156 } else { 2157 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 2158 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, 2159 DAG.getConstant(32, DL, MVT::i64)); 2160 } 2161 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); 2162 return DAG.getTargetExtractSubreg(SystemZ::subreg_r32, 2163 DL, MVT::f32, Out64); 2164 } 2165 if (InVT == MVT::f32 && ResVT == MVT::i32) { 2166 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 2167 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL, 2168 MVT::f64, SDValue(U64, 0), In); 2169 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); 2170 if (Subtarget.hasHighWord()) 2171 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, 2172 MVT::i32, Out64); 2173 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, 2174 DAG.getConstant(32, DL, MVT::i64)); 2175 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 2176 } 2177 llvm_unreachable("Unexpected bitcast combination"); 2178 } 2179 2180 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 2181 SelectionDAG &DAG) const { 2182 MachineFunction &MF = DAG.getMachineFunction(); 2183 SystemZMachineFunctionInfo *FuncInfo = 2184 MF.getInfo<SystemZMachineFunctionInfo>(); 2185 EVT PtrVT = getPointerTy(); 2186 2187 SDValue Chain = Op.getOperand(0); 2188 SDValue Addr = Op.getOperand(1); 2189 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2190 SDLoc DL(Op); 2191 2192 // The initial values of each field. 2193 const unsigned NumFields = 4; 2194 SDValue Fields[NumFields] = { 2195 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), 2196 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), 2197 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 2198 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 2199 }; 2200 2201 // Store each field into its respective slot. 2202 SDValue MemOps[NumFields]; 2203 unsigned Offset = 0; 2204 for (unsigned I = 0; I < NumFields; ++I) { 2205 SDValue FieldAddr = Addr; 2206 if (Offset != 0) 2207 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 2208 DAG.getIntPtrConstant(Offset, DL)); 2209 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 2210 MachinePointerInfo(SV, Offset), 2211 false, false, 0); 2212 Offset += 8; 2213 } 2214 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 2215 } 2216 2217 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 2218 SelectionDAG &DAG) const { 2219 SDValue Chain = Op.getOperand(0); 2220 SDValue DstPtr = Op.getOperand(1); 2221 SDValue SrcPtr = Op.getOperand(2); 2222 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 2223 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 2224 SDLoc DL(Op); 2225 2226 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), 2227 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, 2228 /*isTailCall*/false, 2229 MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); 2230 } 2231 2232 SDValue SystemZTargetLowering:: 2233 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { 2234 SDValue Chain = Op.getOperand(0); 2235 SDValue Size = Op.getOperand(1); 2236 SDLoc DL(Op); 2237 2238 unsigned SPReg = getStackPointerRegisterToSaveRestore(); 2239 2240 // Get a reference to the stack pointer. 2241 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 2242 2243 // Get the new stack pointer value. 2244 SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size); 2245 2246 // Copy the new stack pointer back. 2247 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 2248 2249 // The allocated data lives above the 160 bytes allocated for the standard 2250 // frame, plus any outgoing stack arguments. We don't know how much that 2251 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 2252 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 2253 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 2254 2255 SDValue Ops[2] = { Result, Chain }; 2256 return DAG.getMergeValues(Ops, DL); 2257 } 2258 2259 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, 2260 SelectionDAG &DAG) const { 2261 EVT VT = Op.getValueType(); 2262 SDLoc DL(Op); 2263 SDValue Ops[2]; 2264 if (is32Bit(VT)) 2265 // Just do a normal 64-bit multiplication and extract the results. 2266 // We define this so that it can be used for constant division. 2267 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), 2268 Op.getOperand(1), Ops[1], Ops[0]); 2269 else { 2270 // Do a full 128-bit multiplication based on UMUL_LOHI64: 2271 // 2272 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) 2273 // 2274 // but using the fact that the upper halves are either all zeros 2275 // or all ones: 2276 // 2277 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) 2278 // 2279 // and grouping the right terms together since they are quicker than the 2280 // multiplication: 2281 // 2282 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) 2283 SDValue C63 = DAG.getConstant(63, DL, MVT::i64); 2284 SDValue LL = Op.getOperand(0); 2285 SDValue RL = Op.getOperand(1); 2286 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); 2287 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); 2288 // UMUL_LOHI64 returns the low result in the odd register and the high 2289 // result in the even register. SMUL_LOHI is defined to return the 2290 // low half first, so the results are in reverse order. 2291 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, 2292 LL, RL, Ops[1], Ops[0]); 2293 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); 2294 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); 2295 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); 2296 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); 2297 } 2298 return DAG.getMergeValues(Ops, DL); 2299 } 2300 2301 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 2302 SelectionDAG &DAG) const { 2303 EVT VT = Op.getValueType(); 2304 SDLoc DL(Op); 2305 SDValue Ops[2]; 2306 if (is32Bit(VT)) 2307 // Just do a normal 64-bit multiplication and extract the results. 2308 // We define this so that it can be used for constant division. 2309 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), 2310 Op.getOperand(1), Ops[1], Ops[0]); 2311 else 2312 // UMUL_LOHI64 returns the low result in the odd register and the high 2313 // result in the even register. UMUL_LOHI is defined to return the 2314 // low half first, so the results are in reverse order. 2315 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, 2316 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 2317 return DAG.getMergeValues(Ops, DL); 2318 } 2319 2320 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 2321 SelectionDAG &DAG) const { 2322 SDValue Op0 = Op.getOperand(0); 2323 SDValue Op1 = Op.getOperand(1); 2324 EVT VT = Op.getValueType(); 2325 SDLoc DL(Op); 2326 unsigned Opcode; 2327 2328 // We use DSGF for 32-bit division. 2329 if (is32Bit(VT)) { 2330 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 2331 Opcode = SystemZISD::SDIVREM32; 2332 } else if (DAG.ComputeNumSignBits(Op1) > 32) { 2333 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 2334 Opcode = SystemZISD::SDIVREM32; 2335 } else 2336 Opcode = SystemZISD::SDIVREM64; 2337 2338 // DSG(F) takes a 64-bit dividend, so the even register in the GR128 2339 // input is "don't care". The instruction returns the remainder in 2340 // the even register and the quotient in the odd register. 2341 SDValue Ops[2]; 2342 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, 2343 Op0, Op1, Ops[1], Ops[0]); 2344 return DAG.getMergeValues(Ops, DL); 2345 } 2346 2347 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 2348 SelectionDAG &DAG) const { 2349 EVT VT = Op.getValueType(); 2350 SDLoc DL(Op); 2351 2352 // DL(G) uses a double-width dividend, so we need to clear the even 2353 // register in the GR128 input. The instruction returns the remainder 2354 // in the even register and the quotient in the odd register. 2355 SDValue Ops[2]; 2356 if (is32Bit(VT)) 2357 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, 2358 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 2359 else 2360 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, 2361 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 2362 return DAG.getMergeValues(Ops, DL); 2363 } 2364 2365 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 2366 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 2367 2368 // Get the known-zero masks for each operand. 2369 SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; 2370 APInt KnownZero[2], KnownOne[2]; 2371 DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); 2372 DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); 2373 2374 // See if the upper 32 bits of one operand and the lower 32 bits of the 2375 // other are known zero. They are the low and high operands respectively. 2376 uint64_t Masks[] = { KnownZero[0].getZExtValue(), 2377 KnownZero[1].getZExtValue() }; 2378 unsigned High, Low; 2379 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 2380 High = 1, Low = 0; 2381 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 2382 High = 0, Low = 1; 2383 else 2384 return Op; 2385 2386 SDValue LowOp = Ops[Low]; 2387 SDValue HighOp = Ops[High]; 2388 2389 // If the high part is a constant, we're better off using IILH. 2390 if (HighOp.getOpcode() == ISD::Constant) 2391 return Op; 2392 2393 // If the low part is a constant that is outside the range of LHI, 2394 // then we're better off using IILF. 2395 if (LowOp.getOpcode() == ISD::Constant) { 2396 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue()); 2397 if (!isInt<16>(Value)) 2398 return Op; 2399 } 2400 2401 // Check whether the high part is an AND that doesn't change the 2402 // high 32 bits and just masks out low bits. We can skip it if so. 2403 if (HighOp.getOpcode() == ISD::AND && 2404 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 2405 SDValue HighOp0 = HighOp.getOperand(0); 2406 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue(); 2407 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) 2408 HighOp = HighOp0; 2409 } 2410 2411 // Take advantage of the fact that all GR32 operations only change the 2412 // low 32 bits by truncating Low to an i32 and inserting it directly 2413 // using a subreg. The interesting cases are those where the truncation 2414 // can be folded. 2415 SDLoc DL(Op); 2416 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 2417 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, 2418 MVT::i64, HighOp, Low32); 2419 } 2420 2421 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, 2422 SelectionDAG &DAG) const { 2423 EVT VT = Op.getValueType(); 2424 int64_t OrigBitSize = VT.getSizeInBits(); 2425 SDLoc DL(Op); 2426 2427 // Get the known-zero mask for the operand. 2428 Op = Op.getOperand(0); 2429 APInt KnownZero, KnownOne; 2430 DAG.computeKnownBits(Op, KnownZero, KnownOne); 2431 unsigned NumSignificantBits = (~KnownZero).getActiveBits(); 2432 if (NumSignificantBits == 0) 2433 return DAG.getConstant(0, DL, VT); 2434 2435 // Skip known-zero high parts of the operand. 2436 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); 2437 BitSize = std::min(BitSize, OrigBitSize); 2438 2439 // The POPCNT instruction counts the number of bits in each byte. 2440 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); 2441 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); 2442 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 2443 2444 // Add up per-byte counts in a binary tree. All bits of Op at 2445 // position larger than BitSize remain zero throughout. 2446 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { 2447 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); 2448 if (BitSize != OrigBitSize) 2449 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, 2450 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); 2451 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 2452 } 2453 2454 // Extract overall result from high byte. 2455 if (BitSize > 8) 2456 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 2457 DAG.getConstant(BitSize - 8, DL, VT)); 2458 2459 return Op; 2460 } 2461 2462 // Op is an atomic load. Lower it into a normal volatile load. 2463 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, 2464 SelectionDAG &DAG) const { 2465 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2466 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), 2467 Node->getChain(), Node->getBasePtr(), 2468 Node->getMemoryVT(), Node->getMemOperand()); 2469 } 2470 2471 // Op is an atomic store. Lower it into a normal volatile store followed 2472 // by a serialization. 2473 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, 2474 SelectionDAG &DAG) const { 2475 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2476 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), 2477 Node->getBasePtr(), Node->getMemoryVT(), 2478 Node->getMemOperand()); 2479 return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, 2480 Chain), 0); 2481 } 2482 2483 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 2484 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 2485 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, 2486 SelectionDAG &DAG, 2487 unsigned Opcode) const { 2488 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2489 2490 // 32-bit operations need no code outside the main loop. 2491 EVT NarrowVT = Node->getMemoryVT(); 2492 EVT WideVT = MVT::i32; 2493 if (NarrowVT == WideVT) 2494 return Op; 2495 2496 int64_t BitSize = NarrowVT.getSizeInBits(); 2497 SDValue ChainIn = Node->getChain(); 2498 SDValue Addr = Node->getBasePtr(); 2499 SDValue Src2 = Node->getVal(); 2500 MachineMemOperand *MMO = Node->getMemOperand(); 2501 SDLoc DL(Node); 2502 EVT PtrVT = Addr.getValueType(); 2503 2504 // Convert atomic subtracts of constants into additions. 2505 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 2506 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { 2507 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 2508 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); 2509 } 2510 2511 // Get the address of the containing word. 2512 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 2513 DAG.getConstant(-4, DL, PtrVT)); 2514 2515 // Get the number of bits that the word must be rotated left in order 2516 // to bring the field to the top bits of a GR32. 2517 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 2518 DAG.getConstant(3, DL, PtrVT)); 2519 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 2520 2521 // Get the complementing shift amount, for rotating a field in the top 2522 // bits back to its proper position. 2523 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 2524 DAG.getConstant(0, DL, WideVT), BitShift); 2525 2526 // Extend the source operand to 32 bits and prepare it for the inner loop. 2527 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 2528 // operations require the source to be shifted in advance. (This shift 2529 // can be folded if the source is constant.) For AND and NAND, the lower 2530 // bits must be set, while for other opcodes they should be left clear. 2531 if (Opcode != SystemZISD::ATOMIC_SWAPW) 2532 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 2533 DAG.getConstant(32 - BitSize, DL, WideVT)); 2534 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 2535 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 2536 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 2537 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); 2538 2539 // Construct the ATOMIC_LOADW_* node. 2540 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 2541 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 2542 DAG.getConstant(BitSize, DL, WideVT) }; 2543 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 2544 NarrowVT, MMO); 2545 2546 // Rotate the result of the final CS so that the field is in the lower 2547 // bits of a GR32, then truncate it. 2548 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 2549 DAG.getConstant(BitSize, DL, WideVT)); 2550 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 2551 2552 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 2553 return DAG.getMergeValues(RetOps, DL); 2554 } 2555 2556 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations 2557 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit 2558 // operations into additions. 2559 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, 2560 SelectionDAG &DAG) const { 2561 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2562 EVT MemVT = Node->getMemoryVT(); 2563 if (MemVT == MVT::i32 || MemVT == MVT::i64) { 2564 // A full-width operation. 2565 assert(Op.getValueType() == MemVT && "Mismatched VTs"); 2566 SDValue Src2 = Node->getVal(); 2567 SDValue NegSrc2; 2568 SDLoc DL(Src2); 2569 2570 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) { 2571 // Use an addition if the operand is constant and either LAA(G) is 2572 // available or the negative value is in the range of A(G)FHI. 2573 int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); 2574 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) 2575 NegSrc2 = DAG.getConstant(Value, DL, MemVT); 2576 } else if (Subtarget.hasInterlockedAccess1()) 2577 // Use LAA(G) if available. 2578 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), 2579 Src2); 2580 2581 if (NegSrc2.getNode()) 2582 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, 2583 Node->getChain(), Node->getBasePtr(), NegSrc2, 2584 Node->getMemOperand(), Node->getOrdering(), 2585 Node->getSynchScope()); 2586 2587 // Use the node as-is. 2588 return Op; 2589 } 2590 2591 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 2592 } 2593 2594 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two 2595 // into a fullword ATOMIC_CMP_SWAPW operation. 2596 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 2597 SelectionDAG &DAG) const { 2598 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2599 2600 // We have native support for 32-bit compare and swap. 2601 EVT NarrowVT = Node->getMemoryVT(); 2602 EVT WideVT = MVT::i32; 2603 if (NarrowVT == WideVT) 2604 return Op; 2605 2606 int64_t BitSize = NarrowVT.getSizeInBits(); 2607 SDValue ChainIn = Node->getOperand(0); 2608 SDValue Addr = Node->getOperand(1); 2609 SDValue CmpVal = Node->getOperand(2); 2610 SDValue SwapVal = Node->getOperand(3); 2611 MachineMemOperand *MMO = Node->getMemOperand(); 2612 SDLoc DL(Node); 2613 EVT PtrVT = Addr.getValueType(); 2614 2615 // Get the address of the containing word. 2616 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 2617 DAG.getConstant(-4, DL, PtrVT)); 2618 2619 // Get the number of bits that the word must be rotated left in order 2620 // to bring the field to the top bits of a GR32. 2621 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 2622 DAG.getConstant(3, DL, PtrVT)); 2623 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 2624 2625 // Get the complementing shift amount, for rotating a field in the top 2626 // bits back to its proper position. 2627 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 2628 DAG.getConstant(0, DL, WideVT), BitShift); 2629 2630 // Construct the ATOMIC_CMP_SWAPW node. 2631 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 2632 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 2633 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; 2634 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 2635 VTList, Ops, NarrowVT, MMO); 2636 return AtomicOp; 2637 } 2638 2639 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 2640 SelectionDAG &DAG) const { 2641 MachineFunction &MF = DAG.getMachineFunction(); 2642 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 2643 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 2644 SystemZ::R15D, Op.getValueType()); 2645 } 2646 2647 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 2648 SelectionDAG &DAG) const { 2649 MachineFunction &MF = DAG.getMachineFunction(); 2650 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 2651 return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op), 2652 SystemZ::R15D, Op.getOperand(1)); 2653 } 2654 2655 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, 2656 SelectionDAG &DAG) const { 2657 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2658 if (!IsData) 2659 // Just preserve the chain. 2660 return Op.getOperand(0); 2661 2662 SDLoc DL(Op); 2663 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2664 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; 2665 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); 2666 SDValue Ops[] = { 2667 Op.getOperand(0), 2668 DAG.getConstant(Code, DL, MVT::i32), 2669 Op.getOperand(1) 2670 }; 2671 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, 2672 Node->getVTList(), Ops, 2673 Node->getMemoryVT(), Node->getMemOperand()); 2674 } 2675 2676 // Return an i32 that contains the value of CC immediately after After, 2677 // whose final operand must be MVT::Glue. 2678 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { 2679 SDLoc DL(After); 2680 SDValue Glue = SDValue(After, After->getNumValues() - 1); 2681 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 2682 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 2683 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 2684 } 2685 2686 SDValue 2687 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2688 SelectionDAG &DAG) const { 2689 unsigned Opcode, CCValid; 2690 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { 2691 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 2692 SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); 2693 SDValue CC = getCCResult(DAG, Glued.getNode()); 2694 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); 2695 return SDValue(); 2696 } 2697 2698 return SDValue(); 2699 } 2700 2701 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 2702 SelectionDAG &DAG) const { 2703 switch (Op.getOpcode()) { 2704 case ISD::BR_CC: 2705 return lowerBR_CC(Op, DAG); 2706 case ISD::SELECT_CC: 2707 return lowerSELECT_CC(Op, DAG); 2708 case ISD::SETCC: 2709 return lowerSETCC(Op, DAG); 2710 case ISD::GlobalAddress: 2711 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 2712 case ISD::GlobalTLSAddress: 2713 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 2714 case ISD::BlockAddress: 2715 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 2716 case ISD::JumpTable: 2717 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 2718 case ISD::ConstantPool: 2719 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 2720 case ISD::BITCAST: 2721 return lowerBITCAST(Op, DAG); 2722 case ISD::VASTART: 2723 return lowerVASTART(Op, DAG); 2724 case ISD::VACOPY: 2725 return lowerVACOPY(Op, DAG); 2726 case ISD::DYNAMIC_STACKALLOC: 2727 return lowerDYNAMIC_STACKALLOC(Op, DAG); 2728 case ISD::SMUL_LOHI: 2729 return lowerSMUL_LOHI(Op, DAG); 2730 case ISD::UMUL_LOHI: 2731 return lowerUMUL_LOHI(Op, DAG); 2732 case ISD::SDIVREM: 2733 return lowerSDIVREM(Op, DAG); 2734 case ISD::UDIVREM: 2735 return lowerUDIVREM(Op, DAG); 2736 case ISD::OR: 2737 return lowerOR(Op, DAG); 2738 case ISD::CTPOP: 2739 return lowerCTPOP(Op, DAG); 2740 case ISD::ATOMIC_SWAP: 2741 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); 2742 case ISD::ATOMIC_STORE: 2743 return lowerATOMIC_STORE(Op, DAG); 2744 case ISD::ATOMIC_LOAD: 2745 return lowerATOMIC_LOAD(Op, DAG); 2746 case ISD::ATOMIC_LOAD_ADD: 2747 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 2748 case ISD::ATOMIC_LOAD_SUB: 2749 return lowerATOMIC_LOAD_SUB(Op, DAG); 2750 case ISD::ATOMIC_LOAD_AND: 2751 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 2752 case ISD::ATOMIC_LOAD_OR: 2753 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 2754 case ISD::ATOMIC_LOAD_XOR: 2755 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 2756 case ISD::ATOMIC_LOAD_NAND: 2757 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 2758 case ISD::ATOMIC_LOAD_MIN: 2759 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 2760 case ISD::ATOMIC_LOAD_MAX: 2761 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 2762 case ISD::ATOMIC_LOAD_UMIN: 2763 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 2764 case ISD::ATOMIC_LOAD_UMAX: 2765 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 2766 case ISD::ATOMIC_CMP_SWAP: 2767 return lowerATOMIC_CMP_SWAP(Op, DAG); 2768 case ISD::STACKSAVE: 2769 return lowerSTACKSAVE(Op, DAG); 2770 case ISD::STACKRESTORE: 2771 return lowerSTACKRESTORE(Op, DAG); 2772 case ISD::PREFETCH: 2773 return lowerPREFETCH(Op, DAG); 2774 case ISD::INTRINSIC_W_CHAIN: 2775 return lowerINTRINSIC_W_CHAIN(Op, DAG); 2776 default: 2777 llvm_unreachable("Unexpected node to lower"); 2778 } 2779 } 2780 2781 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 2782 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 2783 switch (Opcode) { 2784 OPCODE(RET_FLAG); 2785 OPCODE(CALL); 2786 OPCODE(SIBCALL); 2787 OPCODE(TLS_GDCALL); 2788 OPCODE(TLS_LDCALL); 2789 OPCODE(PCREL_WRAPPER); 2790 OPCODE(PCREL_OFFSET); 2791 OPCODE(IABS); 2792 OPCODE(ICMP); 2793 OPCODE(FCMP); 2794 OPCODE(TM); 2795 OPCODE(BR_CCMASK); 2796 OPCODE(SELECT_CCMASK); 2797 OPCODE(ADJDYNALLOC); 2798 OPCODE(EXTRACT_ACCESS); 2799 OPCODE(POPCNT); 2800 OPCODE(UMUL_LOHI64); 2801 OPCODE(SDIVREM32); 2802 OPCODE(SDIVREM64); 2803 OPCODE(UDIVREM32); 2804 OPCODE(UDIVREM64); 2805 OPCODE(MVC); 2806 OPCODE(MVC_LOOP); 2807 OPCODE(NC); 2808 OPCODE(NC_LOOP); 2809 OPCODE(OC); 2810 OPCODE(OC_LOOP); 2811 OPCODE(XC); 2812 OPCODE(XC_LOOP); 2813 OPCODE(CLC); 2814 OPCODE(CLC_LOOP); 2815 OPCODE(STPCPY); 2816 OPCODE(STRCMP); 2817 OPCODE(SEARCH_STRING); 2818 OPCODE(IPM); 2819 OPCODE(SERIALIZE); 2820 OPCODE(TBEGIN); 2821 OPCODE(TBEGIN_NOFLOAT); 2822 OPCODE(TEND); 2823 OPCODE(ATOMIC_SWAPW); 2824 OPCODE(ATOMIC_LOADW_ADD); 2825 OPCODE(ATOMIC_LOADW_SUB); 2826 OPCODE(ATOMIC_LOADW_AND); 2827 OPCODE(ATOMIC_LOADW_OR); 2828 OPCODE(ATOMIC_LOADW_XOR); 2829 OPCODE(ATOMIC_LOADW_NAND); 2830 OPCODE(ATOMIC_LOADW_MIN); 2831 OPCODE(ATOMIC_LOADW_MAX); 2832 OPCODE(ATOMIC_LOADW_UMIN); 2833 OPCODE(ATOMIC_LOADW_UMAX); 2834 OPCODE(ATOMIC_CMP_SWAPW); 2835 OPCODE(PREFETCH); 2836 } 2837 return nullptr; 2838 #undef OPCODE 2839 } 2840 2841 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, 2842 DAGCombinerInfo &DCI) const { 2843 SelectionDAG &DAG = DCI.DAG; 2844 unsigned Opcode = N->getOpcode(); 2845 if (Opcode == ISD::SIGN_EXTEND) { 2846 // Convert (sext (ashr (shl X, C1), C2)) to 2847 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as 2848 // cheap as narrower ones. 2849 SDValue N0 = N->getOperand(0); 2850 EVT VT = N->getValueType(0); 2851 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { 2852 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2853 SDValue Inner = N0.getOperand(0); 2854 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { 2855 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { 2856 unsigned Extra = (VT.getSizeInBits() - 2857 N0.getValueType().getSizeInBits()); 2858 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; 2859 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; 2860 EVT ShiftVT = N0.getOperand(1).getValueType(); 2861 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, 2862 Inner.getOperand(0)); 2863 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, 2864 DAG.getConstant(NewShlAmt, SDLoc(Inner), 2865 ShiftVT)); 2866 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, 2867 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); 2868 } 2869 } 2870 } 2871 } 2872 return SDValue(); 2873 } 2874 2875 //===----------------------------------------------------------------------===// 2876 // Custom insertion 2877 //===----------------------------------------------------------------------===// 2878 2879 // Create a new basic block after MBB. 2880 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { 2881 MachineFunction &MF = *MBB->getParent(); 2882 MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); 2883 MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); 2884 return NewMBB; 2885 } 2886 2887 // Split MBB after MI and return the new block (the one that contains 2888 // instructions after MI). 2889 static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, 2890 MachineBasicBlock *MBB) { 2891 MachineBasicBlock *NewMBB = emitBlockAfter(MBB); 2892 NewMBB->splice(NewMBB->begin(), MBB, 2893 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 2894 NewMBB->transferSuccessorsAndUpdatePHIs(MBB); 2895 return NewMBB; 2896 } 2897 2898 // Split MBB before MI and return the new block (the one that contains MI). 2899 static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, 2900 MachineBasicBlock *MBB) { 2901 MachineBasicBlock *NewMBB = emitBlockAfter(MBB); 2902 NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); 2903 NewMBB->transferSuccessorsAndUpdatePHIs(MBB); 2904 return NewMBB; 2905 } 2906 2907 // Force base value Base into a register before MI. Return the register. 2908 static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, 2909 const SystemZInstrInfo *TII) { 2910 if (Base.isReg()) 2911 return Base.getReg(); 2912 2913 MachineBasicBlock *MBB = MI->getParent(); 2914 MachineFunction &MF = *MBB->getParent(); 2915 MachineRegisterInfo &MRI = MF.getRegInfo(); 2916 2917 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 2918 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg) 2919 .addOperand(Base).addImm(0).addReg(0); 2920 return Reg; 2921 } 2922 2923 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 2924 MachineBasicBlock * 2925 SystemZTargetLowering::emitSelect(MachineInstr *MI, 2926 MachineBasicBlock *MBB) const { 2927 const SystemZInstrInfo *TII = 2928 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 2929 2930 unsigned DestReg = MI->getOperand(0).getReg(); 2931 unsigned TrueReg = MI->getOperand(1).getReg(); 2932 unsigned FalseReg = MI->getOperand(2).getReg(); 2933 unsigned CCValid = MI->getOperand(3).getImm(); 2934 unsigned CCMask = MI->getOperand(4).getImm(); 2935 DebugLoc DL = MI->getDebugLoc(); 2936 2937 MachineBasicBlock *StartMBB = MBB; 2938 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); 2939 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 2940 2941 // StartMBB: 2942 // BRC CCMask, JoinMBB 2943 // # fallthrough to FalseMBB 2944 MBB = StartMBB; 2945 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 2946 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 2947 MBB->addSuccessor(JoinMBB); 2948 MBB->addSuccessor(FalseMBB); 2949 2950 // FalseMBB: 2951 // # fallthrough to JoinMBB 2952 MBB = FalseMBB; 2953 MBB->addSuccessor(JoinMBB); 2954 2955 // JoinMBB: 2956 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 2957 // ... 2958 MBB = JoinMBB; 2959 BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) 2960 .addReg(TrueReg).addMBB(StartMBB) 2961 .addReg(FalseReg).addMBB(FalseMBB); 2962 2963 MI->eraseFromParent(); 2964 return JoinMBB; 2965 } 2966 2967 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 2968 // StoreOpcode is the store to use and Invert says whether the store should 2969 // happen when the condition is false rather than true. If a STORE ON 2970 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. 2971 MachineBasicBlock * 2972 SystemZTargetLowering::emitCondStore(MachineInstr *MI, 2973 MachineBasicBlock *MBB, 2974 unsigned StoreOpcode, unsigned STOCOpcode, 2975 bool Invert) const { 2976 const SystemZInstrInfo *TII = 2977 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 2978 2979 unsigned SrcReg = MI->getOperand(0).getReg(); 2980 MachineOperand Base = MI->getOperand(1); 2981 int64_t Disp = MI->getOperand(2).getImm(); 2982 unsigned IndexReg = MI->getOperand(3).getReg(); 2983 unsigned CCValid = MI->getOperand(4).getImm(); 2984 unsigned CCMask = MI->getOperand(5).getImm(); 2985 DebugLoc DL = MI->getDebugLoc(); 2986 2987 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 2988 2989 // Use STOCOpcode if possible. We could use different store patterns in 2990 // order to avoid matching the index register, but the performance trade-offs 2991 // might be more complicated in that case. 2992 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { 2993 if (Invert) 2994 CCMask ^= CCValid; 2995 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) 2996 .addReg(SrcReg).addOperand(Base).addImm(Disp) 2997 .addImm(CCValid).addImm(CCMask); 2998 MI->eraseFromParent(); 2999 return MBB; 3000 } 3001 3002 // Get the condition needed to branch around the store. 3003 if (!Invert) 3004 CCMask ^= CCValid; 3005 3006 MachineBasicBlock *StartMBB = MBB; 3007 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); 3008 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 3009 3010 // StartMBB: 3011 // BRC CCMask, JoinMBB 3012 // # fallthrough to FalseMBB 3013 MBB = StartMBB; 3014 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3015 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 3016 MBB->addSuccessor(JoinMBB); 3017 MBB->addSuccessor(FalseMBB); 3018 3019 // FalseMBB: 3020 // store %SrcReg, %Disp(%Index,%Base) 3021 // # fallthrough to JoinMBB 3022 MBB = FalseMBB; 3023 BuildMI(MBB, DL, TII->get(StoreOpcode)) 3024 .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); 3025 MBB->addSuccessor(JoinMBB); 3026 3027 MI->eraseFromParent(); 3028 return JoinMBB; 3029 } 3030 3031 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* 3032 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that 3033 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. 3034 // BitSize is the width of the field in bits, or 0 if this is a partword 3035 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize 3036 // is one of the operands. Invert says whether the field should be 3037 // inverted after performing BinOpcode (e.g. for NAND). 3038 MachineBasicBlock * 3039 SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, 3040 MachineBasicBlock *MBB, 3041 unsigned BinOpcode, 3042 unsigned BitSize, 3043 bool Invert) const { 3044 MachineFunction &MF = *MBB->getParent(); 3045 const SystemZInstrInfo *TII = 3046 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3047 MachineRegisterInfo &MRI = MF.getRegInfo(); 3048 bool IsSubWord = (BitSize < 32); 3049 3050 // Extract the operands. Base can be a register or a frame index. 3051 // Src2 can be a register or immediate. 3052 unsigned Dest = MI->getOperand(0).getReg(); 3053 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 3054 int64_t Disp = MI->getOperand(2).getImm(); 3055 MachineOperand Src2 = earlyUseOperand(MI->getOperand(3)); 3056 unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); 3057 unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); 3058 DebugLoc DL = MI->getDebugLoc(); 3059 if (IsSubWord) 3060 BitSize = MI->getOperand(6).getImm(); 3061 3062 // Subword operations use 32-bit registers. 3063 const TargetRegisterClass *RC = (BitSize <= 32 ? 3064 &SystemZ::GR32BitRegClass : 3065 &SystemZ::GR64BitRegClass); 3066 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 3067 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 3068 3069 // Get the right opcodes for the displacement. 3070 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 3071 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 3072 assert(LOpcode && CSOpcode && "Displacement out of range"); 3073 3074 // Create virtual registers for temporary results. 3075 unsigned OrigVal = MRI.createVirtualRegister(RC); 3076 unsigned OldVal = MRI.createVirtualRegister(RC); 3077 unsigned NewVal = (BinOpcode || IsSubWord ? 3078 MRI.createVirtualRegister(RC) : Src2.getReg()); 3079 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 3080 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 3081 3082 // Insert a basic block for the main loop. 3083 MachineBasicBlock *StartMBB = MBB; 3084 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3085 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3086 3087 // StartMBB: 3088 // ... 3089 // %OrigVal = L Disp(%Base) 3090 // # fall through to LoopMMB 3091 MBB = StartMBB; 3092 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 3093 .addOperand(Base).addImm(Disp).addReg(0); 3094 MBB->addSuccessor(LoopMBB); 3095 3096 // LoopMBB: 3097 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 3098 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 3099 // %RotatedNewVal = OP %RotatedOldVal, %Src2 3100 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 3101 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 3102 // JNE LoopMBB 3103 // # fall through to DoneMMB 3104 MBB = LoopMBB; 3105 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 3106 .addReg(OrigVal).addMBB(StartMBB) 3107 .addReg(Dest).addMBB(LoopMBB); 3108 if (IsSubWord) 3109 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 3110 .addReg(OldVal).addReg(BitShift).addImm(0); 3111 if (Invert) { 3112 // Perform the operation normally and then invert every bit of the field. 3113 unsigned Tmp = MRI.createVirtualRegister(RC); 3114 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) 3115 .addReg(RotatedOldVal).addOperand(Src2); 3116 if (BitSize <= 32) 3117 // XILF with the upper BitSize bits set. 3118 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) 3119 .addReg(Tmp).addImm(-1U << (32 - BitSize)); 3120 else { 3121 // Use LCGR and add -1 to the result, which is more compact than 3122 // an XILF, XILH pair. 3123 unsigned Tmp2 = MRI.createVirtualRegister(RC); 3124 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); 3125 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) 3126 .addReg(Tmp2).addImm(-1); 3127 } 3128 } else if (BinOpcode) 3129 // A simply binary operation. 3130 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 3131 .addReg(RotatedOldVal).addOperand(Src2); 3132 else if (IsSubWord) 3133 // Use RISBG to rotate Src2 into position and use it to replace the 3134 // field in RotatedOldVal. 3135 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 3136 .addReg(RotatedOldVal).addReg(Src2.getReg()) 3137 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 3138 if (IsSubWord) 3139 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 3140 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 3141 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 3142 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 3143 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3144 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 3145 MBB->addSuccessor(LoopMBB); 3146 MBB->addSuccessor(DoneMBB); 3147 3148 MI->eraseFromParent(); 3149 return DoneMBB; 3150 } 3151 3152 // Implement EmitInstrWithCustomInserter for pseudo 3153 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 3154 // instruction that should be used to compare the current field with the 3155 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 3156 // for when the current field should be kept. BitSize is the width of 3157 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. 3158 MachineBasicBlock * 3159 SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, 3160 MachineBasicBlock *MBB, 3161 unsigned CompareOpcode, 3162 unsigned KeepOldMask, 3163 unsigned BitSize) const { 3164 MachineFunction &MF = *MBB->getParent(); 3165 const SystemZInstrInfo *TII = 3166 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3167 MachineRegisterInfo &MRI = MF.getRegInfo(); 3168 bool IsSubWord = (BitSize < 32); 3169 3170 // Extract the operands. Base can be a register or a frame index. 3171 unsigned Dest = MI->getOperand(0).getReg(); 3172 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 3173 int64_t Disp = MI->getOperand(2).getImm(); 3174 unsigned Src2 = MI->getOperand(3).getReg(); 3175 unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); 3176 unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); 3177 DebugLoc DL = MI->getDebugLoc(); 3178 if (IsSubWord) 3179 BitSize = MI->getOperand(6).getImm(); 3180 3181 // Subword operations use 32-bit registers. 3182 const TargetRegisterClass *RC = (BitSize <= 32 ? 3183 &SystemZ::GR32BitRegClass : 3184 &SystemZ::GR64BitRegClass); 3185 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 3186 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 3187 3188 // Get the right opcodes for the displacement. 3189 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 3190 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 3191 assert(LOpcode && CSOpcode && "Displacement out of range"); 3192 3193 // Create virtual registers for temporary results. 3194 unsigned OrigVal = MRI.createVirtualRegister(RC); 3195 unsigned OldVal = MRI.createVirtualRegister(RC); 3196 unsigned NewVal = MRI.createVirtualRegister(RC); 3197 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 3198 unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); 3199 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 3200 3201 // Insert 3 basic blocks for the loop. 3202 MachineBasicBlock *StartMBB = MBB; 3203 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3204 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3205 MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); 3206 MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); 3207 3208 // StartMBB: 3209 // ... 3210 // %OrigVal = L Disp(%Base) 3211 // # fall through to LoopMMB 3212 MBB = StartMBB; 3213 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 3214 .addOperand(Base).addImm(Disp).addReg(0); 3215 MBB->addSuccessor(LoopMBB); 3216 3217 // LoopMBB: 3218 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 3219 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 3220 // CompareOpcode %RotatedOldVal, %Src2 3221 // BRC KeepOldMask, UpdateMBB 3222 MBB = LoopMBB; 3223 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 3224 .addReg(OrigVal).addMBB(StartMBB) 3225 .addReg(Dest).addMBB(UpdateMBB); 3226 if (IsSubWord) 3227 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 3228 .addReg(OldVal).addReg(BitShift).addImm(0); 3229 BuildMI(MBB, DL, TII->get(CompareOpcode)) 3230 .addReg(RotatedOldVal).addReg(Src2); 3231 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3232 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); 3233 MBB->addSuccessor(UpdateMBB); 3234 MBB->addSuccessor(UseAltMBB); 3235 3236 // UseAltMBB: 3237 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 3238 // # fall through to UpdateMMB 3239 MBB = UseAltMBB; 3240 if (IsSubWord) 3241 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 3242 .addReg(RotatedOldVal).addReg(Src2) 3243 .addImm(32).addImm(31 + BitSize).addImm(0); 3244 MBB->addSuccessor(UpdateMBB); 3245 3246 // UpdateMBB: 3247 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 3248 // [ %RotatedAltVal, UseAltMBB ] 3249 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 3250 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 3251 // JNE LoopMBB 3252 // # fall through to DoneMMB 3253 MBB = UpdateMBB; 3254 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 3255 .addReg(RotatedOldVal).addMBB(LoopMBB) 3256 .addReg(RotatedAltVal).addMBB(UseAltMBB); 3257 if (IsSubWord) 3258 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 3259 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 3260 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 3261 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 3262 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3263 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 3264 MBB->addSuccessor(LoopMBB); 3265 MBB->addSuccessor(DoneMBB); 3266 3267 MI->eraseFromParent(); 3268 return DoneMBB; 3269 } 3270 3271 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW 3272 // instruction MI. 3273 MachineBasicBlock * 3274 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, 3275 MachineBasicBlock *MBB) const { 3276 MachineFunction &MF = *MBB->getParent(); 3277 const SystemZInstrInfo *TII = 3278 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3279 MachineRegisterInfo &MRI = MF.getRegInfo(); 3280 3281 // Extract the operands. Base can be a register or a frame index. 3282 unsigned Dest = MI->getOperand(0).getReg(); 3283 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 3284 int64_t Disp = MI->getOperand(2).getImm(); 3285 unsigned OrigCmpVal = MI->getOperand(3).getReg(); 3286 unsigned OrigSwapVal = MI->getOperand(4).getReg(); 3287 unsigned BitShift = MI->getOperand(5).getReg(); 3288 unsigned NegBitShift = MI->getOperand(6).getReg(); 3289 int64_t BitSize = MI->getOperand(7).getImm(); 3290 DebugLoc DL = MI->getDebugLoc(); 3291 3292 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 3293 3294 // Get the right opcodes for the displacement. 3295 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 3296 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 3297 assert(LOpcode && CSOpcode && "Displacement out of range"); 3298 3299 // Create virtual registers for temporary results. 3300 unsigned OrigOldVal = MRI.createVirtualRegister(RC); 3301 unsigned OldVal = MRI.createVirtualRegister(RC); 3302 unsigned CmpVal = MRI.createVirtualRegister(RC); 3303 unsigned SwapVal = MRI.createVirtualRegister(RC); 3304 unsigned StoreVal = MRI.createVirtualRegister(RC); 3305 unsigned RetryOldVal = MRI.createVirtualRegister(RC); 3306 unsigned RetryCmpVal = MRI.createVirtualRegister(RC); 3307 unsigned RetrySwapVal = MRI.createVirtualRegister(RC); 3308 3309 // Insert 2 basic blocks for the loop. 3310 MachineBasicBlock *StartMBB = MBB; 3311 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3312 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3313 MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); 3314 3315 // StartMBB: 3316 // ... 3317 // %OrigOldVal = L Disp(%Base) 3318 // # fall through to LoopMMB 3319 MBB = StartMBB; 3320 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 3321 .addOperand(Base).addImm(Disp).addReg(0); 3322 MBB->addSuccessor(LoopMBB); 3323 3324 // LoopMBB: 3325 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 3326 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ] 3327 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 3328 // %Dest = RLL %OldVal, BitSize(%BitShift) 3329 // ^^ The low BitSize bits contain the field 3330 // of interest. 3331 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0 3332 // ^^ Replace the upper 32-BitSize bits of the 3333 // comparison value with those that we loaded, 3334 // so that we can use a full word comparison. 3335 // CR %Dest, %RetryCmpVal 3336 // JNE DoneMBB 3337 // # Fall through to SetMBB 3338 MBB = LoopMBB; 3339 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 3340 .addReg(OrigOldVal).addMBB(StartMBB) 3341 .addReg(RetryOldVal).addMBB(SetMBB); 3342 BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal) 3343 .addReg(OrigCmpVal).addMBB(StartMBB) 3344 .addReg(RetryCmpVal).addMBB(SetMBB); 3345 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 3346 .addReg(OrigSwapVal).addMBB(StartMBB) 3347 .addReg(RetrySwapVal).addMBB(SetMBB); 3348 BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest) 3349 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 3350 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) 3351 .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 3352 BuildMI(MBB, DL, TII->get(SystemZ::CR)) 3353 .addReg(Dest).addReg(RetryCmpVal); 3354 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3355 .addImm(SystemZ::CCMASK_ICMP) 3356 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); 3357 MBB->addSuccessor(DoneMBB); 3358 MBB->addSuccessor(SetMBB); 3359 3360 // SetMBB: 3361 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0 3362 // ^^ Replace the upper 32-BitSize bits of the new 3363 // value with those that we loaded. 3364 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 3365 // ^^ Rotate the new field to its proper position. 3366 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base) 3367 // JNE LoopMBB 3368 // # fall through to ExitMMB 3369 MBB = SetMBB; 3370 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 3371 .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 3372 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 3373 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 3374 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 3375 .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); 3376 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3377 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 3378 MBB->addSuccessor(LoopMBB); 3379 MBB->addSuccessor(DoneMBB); 3380 3381 MI->eraseFromParent(); 3382 return DoneMBB; 3383 } 3384 3385 // Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true 3386 // if the high register of the GR128 value must be cleared or false if 3387 // it's "don't care". SubReg is subreg_l32 when extending a GR32 3388 // and subreg_l64 when extending a GR64. 3389 MachineBasicBlock * 3390 SystemZTargetLowering::emitExt128(MachineInstr *MI, 3391 MachineBasicBlock *MBB, 3392 bool ClearEven, unsigned SubReg) const { 3393 MachineFunction &MF = *MBB->getParent(); 3394 const SystemZInstrInfo *TII = 3395 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3396 MachineRegisterInfo &MRI = MF.getRegInfo(); 3397 DebugLoc DL = MI->getDebugLoc(); 3398 3399 unsigned Dest = MI->getOperand(0).getReg(); 3400 unsigned Src = MI->getOperand(1).getReg(); 3401 unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 3402 3403 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 3404 if (ClearEven) { 3405 unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 3406 unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 3407 3408 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 3409 .addImm(0); 3410 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 3411 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); 3412 In128 = NewIn128; 3413 } 3414 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 3415 .addReg(In128).addReg(Src).addImm(SubReg); 3416 3417 MI->eraseFromParent(); 3418 return MBB; 3419 } 3420 3421 MachineBasicBlock * 3422 SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, 3423 MachineBasicBlock *MBB, 3424 unsigned Opcode) const { 3425 MachineFunction &MF = *MBB->getParent(); 3426 const SystemZInstrInfo *TII = 3427 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3428 MachineRegisterInfo &MRI = MF.getRegInfo(); 3429 DebugLoc DL = MI->getDebugLoc(); 3430 3431 MachineOperand DestBase = earlyUseOperand(MI->getOperand(0)); 3432 uint64_t DestDisp = MI->getOperand(1).getImm(); 3433 MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2)); 3434 uint64_t SrcDisp = MI->getOperand(3).getImm(); 3435 uint64_t Length = MI->getOperand(4).getImm(); 3436 3437 // When generating more than one CLC, all but the last will need to 3438 // branch to the end when a difference is found. 3439 MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? 3440 splitBlockAfter(MI, MBB) : nullptr); 3441 3442 // Check for the loop form, in which operand 5 is the trip count. 3443 if (MI->getNumExplicitOperands() > 5) { 3444 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); 3445 3446 uint64_t StartCountReg = MI->getOperand(5).getReg(); 3447 uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); 3448 uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : 3449 forceReg(MI, DestBase, TII)); 3450 3451 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 3452 uint64_t ThisSrcReg = MRI.createVirtualRegister(RC); 3453 uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg : 3454 MRI.createVirtualRegister(RC)); 3455 uint64_t NextSrcReg = MRI.createVirtualRegister(RC); 3456 uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg : 3457 MRI.createVirtualRegister(RC)); 3458 3459 RC = &SystemZ::GR64BitRegClass; 3460 uint64_t ThisCountReg = MRI.createVirtualRegister(RC); 3461 uint64_t NextCountReg = MRI.createVirtualRegister(RC); 3462 3463 MachineBasicBlock *StartMBB = MBB; 3464 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3465 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3466 MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); 3467 3468 // StartMBB: 3469 // # fall through to LoopMMB 3470 MBB->addSuccessor(LoopMBB); 3471 3472 // LoopMBB: 3473 // %ThisDestReg = phi [ %StartDestReg, StartMBB ], 3474 // [ %NextDestReg, NextMBB ] 3475 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], 3476 // [ %NextSrcReg, NextMBB ] 3477 // %ThisCountReg = phi [ %StartCountReg, StartMBB ], 3478 // [ %NextCountReg, NextMBB ] 3479 // ( PFD 2, 768+DestDisp(%ThisDestReg) ) 3480 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) 3481 // ( JLH EndMBB ) 3482 // 3483 // The prefetch is used only for MVC. The JLH is used only for CLC. 3484 MBB = LoopMBB; 3485 3486 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) 3487 .addReg(StartDestReg).addMBB(StartMBB) 3488 .addReg(NextDestReg).addMBB(NextMBB); 3489 if (!HaveSingleBase) 3490 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) 3491 .addReg(StartSrcReg).addMBB(StartMBB) 3492 .addReg(NextSrcReg).addMBB(NextMBB); 3493 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) 3494 .addReg(StartCountReg).addMBB(StartMBB) 3495 .addReg(NextCountReg).addMBB(NextMBB); 3496 if (Opcode == SystemZ::MVC) 3497 BuildMI(MBB, DL, TII->get(SystemZ::PFD)) 3498 .addImm(SystemZ::PFD_WRITE) 3499 .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); 3500 BuildMI(MBB, DL, TII->get(Opcode)) 3501 .addReg(ThisDestReg).addImm(DestDisp).addImm(256) 3502 .addReg(ThisSrcReg).addImm(SrcDisp); 3503 if (EndMBB) { 3504 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3505 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 3506 .addMBB(EndMBB); 3507 MBB->addSuccessor(EndMBB); 3508 MBB->addSuccessor(NextMBB); 3509 } 3510 3511 // NextMBB: 3512 // %NextDestReg = LA 256(%ThisDestReg) 3513 // %NextSrcReg = LA 256(%ThisSrcReg) 3514 // %NextCountReg = AGHI %ThisCountReg, -1 3515 // CGHI %NextCountReg, 0 3516 // JLH LoopMBB 3517 // # fall through to DoneMMB 3518 // 3519 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. 3520 MBB = NextMBB; 3521 3522 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) 3523 .addReg(ThisDestReg).addImm(256).addReg(0); 3524 if (!HaveSingleBase) 3525 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) 3526 .addReg(ThisSrcReg).addImm(256).addReg(0); 3527 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) 3528 .addReg(ThisCountReg).addImm(-1); 3529 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 3530 .addReg(NextCountReg).addImm(0); 3531 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3532 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 3533 .addMBB(LoopMBB); 3534 MBB->addSuccessor(LoopMBB); 3535 MBB->addSuccessor(DoneMBB); 3536 3537 DestBase = MachineOperand::CreateReg(NextDestReg, false); 3538 SrcBase = MachineOperand::CreateReg(NextSrcReg, false); 3539 Length &= 255; 3540 MBB = DoneMBB; 3541 } 3542 // Handle any remaining bytes with straight-line code. 3543 while (Length > 0) { 3544 uint64_t ThisLength = std::min(Length, uint64_t(256)); 3545 // The previous iteration might have created out-of-range displacements. 3546 // Apply them using LAY if so. 3547 if (!isUInt<12>(DestDisp)) { 3548 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 3549 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) 3550 .addOperand(DestBase).addImm(DestDisp).addReg(0); 3551 DestBase = MachineOperand::CreateReg(Reg, false); 3552 DestDisp = 0; 3553 } 3554 if (!isUInt<12>(SrcDisp)) { 3555 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 3556 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) 3557 .addOperand(SrcBase).addImm(SrcDisp).addReg(0); 3558 SrcBase = MachineOperand::CreateReg(Reg, false); 3559 SrcDisp = 0; 3560 } 3561 BuildMI(*MBB, MI, DL, TII->get(Opcode)) 3562 .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength) 3563 .addOperand(SrcBase).addImm(SrcDisp); 3564 DestDisp += ThisLength; 3565 SrcDisp += ThisLength; 3566 Length -= ThisLength; 3567 // If there's another CLC to go, branch to the end if a difference 3568 // was found. 3569 if (EndMBB && Length > 0) { 3570 MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); 3571 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3572 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 3573 .addMBB(EndMBB); 3574 MBB->addSuccessor(EndMBB); 3575 MBB->addSuccessor(NextMBB); 3576 MBB = NextMBB; 3577 } 3578 } 3579 if (EndMBB) { 3580 MBB->addSuccessor(EndMBB); 3581 MBB = EndMBB; 3582 MBB->addLiveIn(SystemZ::CC); 3583 } 3584 3585 MI->eraseFromParent(); 3586 return MBB; 3587 } 3588 3589 // Decompose string pseudo-instruction MI into a loop that continually performs 3590 // Opcode until CC != 3. 3591 MachineBasicBlock * 3592 SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, 3593 MachineBasicBlock *MBB, 3594 unsigned Opcode) const { 3595 MachineFunction &MF = *MBB->getParent(); 3596 const SystemZInstrInfo *TII = 3597 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3598 MachineRegisterInfo &MRI = MF.getRegInfo(); 3599 DebugLoc DL = MI->getDebugLoc(); 3600 3601 uint64_t End1Reg = MI->getOperand(0).getReg(); 3602 uint64_t Start1Reg = MI->getOperand(1).getReg(); 3603 uint64_t Start2Reg = MI->getOperand(2).getReg(); 3604 uint64_t CharReg = MI->getOperand(3).getReg(); 3605 3606 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; 3607 uint64_t This1Reg = MRI.createVirtualRegister(RC); 3608 uint64_t This2Reg = MRI.createVirtualRegister(RC); 3609 uint64_t End2Reg = MRI.createVirtualRegister(RC); 3610 3611 MachineBasicBlock *StartMBB = MBB; 3612 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3613 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3614 3615 // StartMBB: 3616 // # fall through to LoopMMB 3617 MBB->addSuccessor(LoopMBB); 3618 3619 // LoopMBB: 3620 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] 3621 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] 3622 // R0L = %CharReg 3623 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L 3624 // JO LoopMBB 3625 // # fall through to DoneMMB 3626 // 3627 // The load of R0L can be hoisted by post-RA LICM. 3628 MBB = LoopMBB; 3629 3630 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) 3631 .addReg(Start1Reg).addMBB(StartMBB) 3632 .addReg(End1Reg).addMBB(LoopMBB); 3633 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) 3634 .addReg(Start2Reg).addMBB(StartMBB) 3635 .addReg(End2Reg).addMBB(LoopMBB); 3636 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); 3637 BuildMI(MBB, DL, TII->get(Opcode)) 3638 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) 3639 .addReg(This1Reg).addReg(This2Reg); 3640 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3641 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); 3642 MBB->addSuccessor(LoopMBB); 3643 MBB->addSuccessor(DoneMBB); 3644 3645 DoneMBB->addLiveIn(SystemZ::CC); 3646 3647 MI->eraseFromParent(); 3648 return DoneMBB; 3649 } 3650 3651 // Update TBEGIN instruction with final opcode and register clobbers. 3652 MachineBasicBlock * 3653 SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, 3654 MachineBasicBlock *MBB, 3655 unsigned Opcode, 3656 bool NoFloat) const { 3657 MachineFunction &MF = *MBB->getParent(); 3658 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 3659 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 3660 3661 // Update opcode. 3662 MI->setDesc(TII->get(Opcode)); 3663 3664 // We cannot handle a TBEGIN that clobbers the stack or frame pointer. 3665 // Make sure to add the corresponding GRSM bits if they are missing. 3666 uint64_t Control = MI->getOperand(2).getImm(); 3667 static const unsigned GPRControlBit[16] = { 3668 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 3669 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 3670 }; 3671 Control |= GPRControlBit[15]; 3672 if (TFI->hasFP(MF)) 3673 Control |= GPRControlBit[11]; 3674 MI->getOperand(2).setImm(Control); 3675 3676 // Add GPR clobbers. 3677 for (int I = 0; I < 16; I++) { 3678 if ((Control & GPRControlBit[I]) == 0) { 3679 unsigned Reg = SystemZMC::GR64Regs[I]; 3680 MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); 3681 } 3682 } 3683 3684 // Add FPR clobbers. 3685 if (!NoFloat && (Control & 4) != 0) { 3686 for (int I = 0; I < 16; I++) { 3687 unsigned Reg = SystemZMC::FP64Regs[I]; 3688 MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); 3689 } 3690 } 3691 3692 return MBB; 3693 } 3694 3695 MachineBasicBlock *SystemZTargetLowering:: 3696 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { 3697 switch (MI->getOpcode()) { 3698 case SystemZ::Select32Mux: 3699 case SystemZ::Select32: 3700 case SystemZ::SelectF32: 3701 case SystemZ::Select64: 3702 case SystemZ::SelectF64: 3703 case SystemZ::SelectF128: 3704 return emitSelect(MI, MBB); 3705 3706 case SystemZ::CondStore8Mux: 3707 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); 3708 case SystemZ::CondStore8MuxInv: 3709 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); 3710 case SystemZ::CondStore16Mux: 3711 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); 3712 case SystemZ::CondStore16MuxInv: 3713 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); 3714 case SystemZ::CondStore8: 3715 return emitCondStore(MI, MBB, SystemZ::STC, 0, false); 3716 case SystemZ::CondStore8Inv: 3717 return emitCondStore(MI, MBB, SystemZ::STC, 0, true); 3718 case SystemZ::CondStore16: 3719 return emitCondStore(MI, MBB, SystemZ::STH, 0, false); 3720 case SystemZ::CondStore16Inv: 3721 return emitCondStore(MI, MBB, SystemZ::STH, 0, true); 3722 case SystemZ::CondStore32: 3723 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); 3724 case SystemZ::CondStore32Inv: 3725 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); 3726 case SystemZ::CondStore64: 3727 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); 3728 case SystemZ::CondStore64Inv: 3729 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); 3730 case SystemZ::CondStoreF32: 3731 return emitCondStore(MI, MBB, SystemZ::STE, 0, false); 3732 case SystemZ::CondStoreF32Inv: 3733 return emitCondStore(MI, MBB, SystemZ::STE, 0, true); 3734 case SystemZ::CondStoreF64: 3735 return emitCondStore(MI, MBB, SystemZ::STD, 0, false); 3736 case SystemZ::CondStoreF64Inv: 3737 return emitCondStore(MI, MBB, SystemZ::STD, 0, true); 3738 3739 case SystemZ::AEXT128_64: 3740 return emitExt128(MI, MBB, false, SystemZ::subreg_l64); 3741 case SystemZ::ZEXT128_32: 3742 return emitExt128(MI, MBB, true, SystemZ::subreg_l32); 3743 case SystemZ::ZEXT128_64: 3744 return emitExt128(MI, MBB, true, SystemZ::subreg_l64); 3745 3746 case SystemZ::ATOMIC_SWAPW: 3747 return emitAtomicLoadBinary(MI, MBB, 0, 0); 3748 case SystemZ::ATOMIC_SWAP_32: 3749 return emitAtomicLoadBinary(MI, MBB, 0, 32); 3750 case SystemZ::ATOMIC_SWAP_64: 3751 return emitAtomicLoadBinary(MI, MBB, 0, 64); 3752 3753 case SystemZ::ATOMIC_LOADW_AR: 3754 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); 3755 case SystemZ::ATOMIC_LOADW_AFI: 3756 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); 3757 case SystemZ::ATOMIC_LOAD_AR: 3758 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); 3759 case SystemZ::ATOMIC_LOAD_AHI: 3760 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); 3761 case SystemZ::ATOMIC_LOAD_AFI: 3762 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); 3763 case SystemZ::ATOMIC_LOAD_AGR: 3764 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); 3765 case SystemZ::ATOMIC_LOAD_AGHI: 3766 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); 3767 case SystemZ::ATOMIC_LOAD_AGFI: 3768 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); 3769 3770 case SystemZ::ATOMIC_LOADW_SR: 3771 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); 3772 case SystemZ::ATOMIC_LOAD_SR: 3773 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); 3774 case SystemZ::ATOMIC_LOAD_SGR: 3775 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); 3776 3777 case SystemZ::ATOMIC_LOADW_NR: 3778 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); 3779 case SystemZ::ATOMIC_LOADW_NILH: 3780 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); 3781 case SystemZ::ATOMIC_LOAD_NR: 3782 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); 3783 case SystemZ::ATOMIC_LOAD_NILL: 3784 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); 3785 case SystemZ::ATOMIC_LOAD_NILH: 3786 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); 3787 case SystemZ::ATOMIC_LOAD_NILF: 3788 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); 3789 case SystemZ::ATOMIC_LOAD_NGR: 3790 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); 3791 case SystemZ::ATOMIC_LOAD_NILL64: 3792 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); 3793 case SystemZ::ATOMIC_LOAD_NILH64: 3794 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); 3795 case SystemZ::ATOMIC_LOAD_NIHL64: 3796 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64); 3797 case SystemZ::ATOMIC_LOAD_NIHH64: 3798 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64); 3799 case SystemZ::ATOMIC_LOAD_NILF64: 3800 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); 3801 case SystemZ::ATOMIC_LOAD_NIHF64: 3802 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64); 3803 3804 case SystemZ::ATOMIC_LOADW_OR: 3805 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); 3806 case SystemZ::ATOMIC_LOADW_OILH: 3807 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); 3808 case SystemZ::ATOMIC_LOAD_OR: 3809 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); 3810 case SystemZ::ATOMIC_LOAD_OILL: 3811 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); 3812 case SystemZ::ATOMIC_LOAD_OILH: 3813 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); 3814 case SystemZ::ATOMIC_LOAD_OILF: 3815 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); 3816 case SystemZ::ATOMIC_LOAD_OGR: 3817 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); 3818 case SystemZ::ATOMIC_LOAD_OILL64: 3819 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); 3820 case SystemZ::ATOMIC_LOAD_OILH64: 3821 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); 3822 case SystemZ::ATOMIC_LOAD_OIHL64: 3823 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64); 3824 case SystemZ::ATOMIC_LOAD_OIHH64: 3825 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64); 3826 case SystemZ::ATOMIC_LOAD_OILF64: 3827 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); 3828 case SystemZ::ATOMIC_LOAD_OIHF64: 3829 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64); 3830 3831 case SystemZ::ATOMIC_LOADW_XR: 3832 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); 3833 case SystemZ::ATOMIC_LOADW_XILF: 3834 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); 3835 case SystemZ::ATOMIC_LOAD_XR: 3836 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); 3837 case SystemZ::ATOMIC_LOAD_XILF: 3838 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); 3839 case SystemZ::ATOMIC_LOAD_XGR: 3840 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); 3841 case SystemZ::ATOMIC_LOAD_XILF64: 3842 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); 3843 case SystemZ::ATOMIC_LOAD_XIHF64: 3844 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64); 3845 3846 case SystemZ::ATOMIC_LOADW_NRi: 3847 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); 3848 case SystemZ::ATOMIC_LOADW_NILHi: 3849 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); 3850 case SystemZ::ATOMIC_LOAD_NRi: 3851 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); 3852 case SystemZ::ATOMIC_LOAD_NILLi: 3853 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); 3854 case SystemZ::ATOMIC_LOAD_NILHi: 3855 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); 3856 case SystemZ::ATOMIC_LOAD_NILFi: 3857 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); 3858 case SystemZ::ATOMIC_LOAD_NGRi: 3859 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); 3860 case SystemZ::ATOMIC_LOAD_NILL64i: 3861 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); 3862 case SystemZ::ATOMIC_LOAD_NILH64i: 3863 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); 3864 case SystemZ::ATOMIC_LOAD_NIHL64i: 3865 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true); 3866 case SystemZ::ATOMIC_LOAD_NIHH64i: 3867 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true); 3868 case SystemZ::ATOMIC_LOAD_NILF64i: 3869 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); 3870 case SystemZ::ATOMIC_LOAD_NIHF64i: 3871 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true); 3872 3873 case SystemZ::ATOMIC_LOADW_MIN: 3874 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3875 SystemZ::CCMASK_CMP_LE, 0); 3876 case SystemZ::ATOMIC_LOAD_MIN_32: 3877 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3878 SystemZ::CCMASK_CMP_LE, 32); 3879 case SystemZ::ATOMIC_LOAD_MIN_64: 3880 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 3881 SystemZ::CCMASK_CMP_LE, 64); 3882 3883 case SystemZ::ATOMIC_LOADW_MAX: 3884 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3885 SystemZ::CCMASK_CMP_GE, 0); 3886 case SystemZ::ATOMIC_LOAD_MAX_32: 3887 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3888 SystemZ::CCMASK_CMP_GE, 32); 3889 case SystemZ::ATOMIC_LOAD_MAX_64: 3890 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 3891 SystemZ::CCMASK_CMP_GE, 64); 3892 3893 case SystemZ::ATOMIC_LOADW_UMIN: 3894 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3895 SystemZ::CCMASK_CMP_LE, 0); 3896 case SystemZ::ATOMIC_LOAD_UMIN_32: 3897 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3898 SystemZ::CCMASK_CMP_LE, 32); 3899 case SystemZ::ATOMIC_LOAD_UMIN_64: 3900 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 3901 SystemZ::CCMASK_CMP_LE, 64); 3902 3903 case SystemZ::ATOMIC_LOADW_UMAX: 3904 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3905 SystemZ::CCMASK_CMP_GE, 0); 3906 case SystemZ::ATOMIC_LOAD_UMAX_32: 3907 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3908 SystemZ::CCMASK_CMP_GE, 32); 3909 case SystemZ::ATOMIC_LOAD_UMAX_64: 3910 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 3911 SystemZ::CCMASK_CMP_GE, 64); 3912 3913 case SystemZ::ATOMIC_CMP_SWAPW: 3914 return emitAtomicCmpSwapW(MI, MBB); 3915 case SystemZ::MVCSequence: 3916 case SystemZ::MVCLoop: 3917 return emitMemMemWrapper(MI, MBB, SystemZ::MVC); 3918 case SystemZ::NCSequence: 3919 case SystemZ::NCLoop: 3920 return emitMemMemWrapper(MI, MBB, SystemZ::NC); 3921 case SystemZ::OCSequence: 3922 case SystemZ::OCLoop: 3923 return emitMemMemWrapper(MI, MBB, SystemZ::OC); 3924 case SystemZ::XCSequence: 3925 case SystemZ::XCLoop: 3926 return emitMemMemWrapper(MI, MBB, SystemZ::XC); 3927 case SystemZ::CLCSequence: 3928 case SystemZ::CLCLoop: 3929 return emitMemMemWrapper(MI, MBB, SystemZ::CLC); 3930 case SystemZ::CLSTLoop: 3931 return emitStringWrapper(MI, MBB, SystemZ::CLST); 3932 case SystemZ::MVSTLoop: 3933 return emitStringWrapper(MI, MBB, SystemZ::MVST); 3934 case SystemZ::SRSTLoop: 3935 return emitStringWrapper(MI, MBB, SystemZ::SRST); 3936 case SystemZ::TBEGIN: 3937 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); 3938 case SystemZ::TBEGIN_nofloat: 3939 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); 3940 case SystemZ::TBEGINC: 3941 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); 3942 default: 3943 llvm_unreachable("Unexpected instr type to insert"); 3944 } 3945 } 3946