1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SystemZTargetLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "systemz-lower" 15 16 #include "SystemZISelLowering.h" 17 #include "SystemZCallingConv.h" 18 #include "SystemZConstantPoolValue.h" 19 #include "SystemZMachineFunctionInfo.h" 20 #include "SystemZTargetMachine.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 26 using namespace llvm; 27 28 // Classify VT as either 32 or 64 bit. 29 static bool is32Bit(EVT VT) { 30 switch (VT.getSimpleVT().SimpleTy) { 31 case MVT::i32: 32 return true; 33 case MVT::i64: 34 return false; 35 default: 36 llvm_unreachable("Unsupported type"); 37 } 38 } 39 40 // Return a version of MachineOperand that can be safely used before the 41 // final use. 42 static MachineOperand earlyUseOperand(MachineOperand Op) { 43 if (Op.isReg()) 44 Op.setIsKill(false); 45 return Op; 46 } 47 48 SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) 49 : TargetLowering(tm, new TargetLoweringObjectFileELF()), 50 Subtarget(*tm.getSubtargetImpl()), TM(tm) { 51 MVT PtrVT = getPointerTy(); 52 53 // Set up the register classes. 54 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 55 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 56 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 57 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 58 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 59 60 // Compute derived properties from the register classes 61 computeRegisterProperties(); 62 63 // Set up special registers. 64 setExceptionPointerRegister(SystemZ::R6D); 65 setExceptionSelectorRegister(SystemZ::R7D); 66 setStackPointerRegisterToSaveRestore(SystemZ::R15D); 67 68 // TODO: It may be better to default to latency-oriented scheduling, however 69 // LLVM's current latency-oriented scheduler can't handle physreg definitions 70 // such as SystemZ has with CC, so set this to the register-pressure 71 // scheduler, because it can. 72 setSchedulingPreference(Sched::RegPressure); 73 74 setBooleanContents(ZeroOrOneBooleanContent); 75 setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? 76 77 // Instructions are strings of 2-byte aligned 2-byte values. 78 setMinFunctionAlignment(2); 79 80 // Handle operations that are handled in a similar way for all types. 81 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 82 I <= MVT::LAST_FP_VALUETYPE; 83 ++I) { 84 MVT VT = MVT::SimpleValueType(I); 85 if (isTypeLegal(VT)) { 86 // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND). 87 setOperationAction(ISD::SETCC, VT, Expand); 88 89 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 90 setOperationAction(ISD::SELECT, VT, Expand); 91 92 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 93 setOperationAction(ISD::SELECT_CC, VT, Custom); 94 setOperationAction(ISD::BR_CC, VT, Custom); 95 } 96 } 97 98 // Expand jump table branches as address arithmetic followed by an 99 // indirect jump. 100 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 101 102 // Expand BRCOND into a BR_CC (see above). 103 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 104 105 // Handle integer types. 106 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 107 I <= MVT::LAST_INTEGER_VALUETYPE; 108 ++I) { 109 MVT VT = MVT::SimpleValueType(I); 110 if (isTypeLegal(VT)) { 111 // Expand individual DIV and REMs into DIVREMs. 112 setOperationAction(ISD::SDIV, VT, Expand); 113 setOperationAction(ISD::UDIV, VT, Expand); 114 setOperationAction(ISD::SREM, VT, Expand); 115 setOperationAction(ISD::UREM, VT, Expand); 116 setOperationAction(ISD::SDIVREM, VT, Custom); 117 setOperationAction(ISD::UDIVREM, VT, Custom); 118 119 // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP. 120 // FIXME: probably much too conservative. 121 setOperationAction(ISD::ATOMIC_LOAD, VT, Expand); 122 setOperationAction(ISD::ATOMIC_STORE, VT, Expand); 123 124 // No special instructions for these. 125 setOperationAction(ISD::CTPOP, VT, Expand); 126 setOperationAction(ISD::CTTZ, VT, Expand); 127 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 128 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 129 setOperationAction(ISD::ROTR, VT, Expand); 130 131 // Use *MUL_LOHI where possible and a wider multiplication otherwise. 132 setOperationAction(ISD::MULHS, VT, Expand); 133 setOperationAction(ISD::MULHU, VT, Expand); 134 135 // We have instructions for signed but not unsigned FP conversion. 136 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 137 } 138 } 139 140 // Type legalization will convert 8- and 16-bit atomic operations into 141 // forms that operate on i32s (but still keeping the original memory VT). 142 // Lower them into full i32 operations. 143 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 144 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 145 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 146 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 147 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 148 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 149 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 150 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 151 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 152 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 153 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 154 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 155 156 // We have instructions for signed but not unsigned FP conversion. 157 // Handle unsigned 32-bit types as signed 64-bit types. 158 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 159 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 160 161 // We have native support for a 64-bit CTLZ, via FLOGR. 162 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 163 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 164 165 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 166 setOperationAction(ISD::OR, MVT::i64, Custom); 167 168 // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR), 169 // but they aren't really worth using. There is no 64-bit SMUL_LOHI, 170 // but there is a 64-bit UMUL_LOHI: MLGR. 171 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 172 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 173 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 174 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 175 176 // FIXME: Can we support these natively? 177 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 178 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 179 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 180 181 // We have native instructions for i8, i16 and i32 extensions, but not i1. 182 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 183 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 184 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 185 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 186 187 // Handle the various types of symbolic address. 188 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 189 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 190 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 191 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 192 setOperationAction(ISD::JumpTable, PtrVT, Custom); 193 194 // We need to handle dynamic allocations specially because of the 195 // 160-byte area at the bottom of the stack. 196 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 197 198 // Use custom expanders so that we can force the function to use 199 // a frame pointer. 200 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 201 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 202 203 // Handle floating-point types. 204 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 205 I <= MVT::LAST_FP_VALUETYPE; 206 ++I) { 207 MVT VT = MVT::SimpleValueType(I); 208 if (isTypeLegal(VT)) { 209 // We can use FI for FRINT. 210 setOperationAction(ISD::FRINT, VT, Legal); 211 212 // No special instructions for these. 213 setOperationAction(ISD::FSIN, VT, Expand); 214 setOperationAction(ISD::FCOS, VT, Expand); 215 setOperationAction(ISD::FREM, VT, Expand); 216 } 217 } 218 219 // We have fused multiply-addition for f32 and f64 but not f128. 220 setOperationAction(ISD::FMA, MVT::f32, Legal); 221 setOperationAction(ISD::FMA, MVT::f64, Legal); 222 setOperationAction(ISD::FMA, MVT::f128, Expand); 223 224 // Needed so that we don't try to implement f128 constant loads using 225 // a load-and-extend of a f80 constant (in cases where the constant 226 // would fit in an f80). 227 setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); 228 229 // Floating-point truncation and stores need to be done separately. 230 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 231 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 232 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 233 234 // We have 64-bit FPR<->GPR moves, but need special handling for 235 // 32-bit forms. 236 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 237 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 238 239 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 240 // structure, but VAEND is a no-op. 241 setOperationAction(ISD::VASTART, MVT::Other, Custom); 242 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 243 setOperationAction(ISD::VAEND, MVT::Other, Expand); 244 245 // We want to use MVC in preference to even a single load/store pair. 246 MaxStoresPerMemcpy = 0; 247 MaxStoresPerMemcpyOptSize = 0; 248 249 // The main memset sequence is a byte store followed by an MVC. 250 // Two STC or MV..I stores win over that, but the kind of fused stores 251 // generated by target-independent code don't when the byte value is 252 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 253 // than "STC;MVC". Handle the choice in target-specific code instead. 254 MaxStoresPerMemset = 0; 255 MaxStoresPerMemsetOptSize = 0; 256 } 257 258 bool 259 SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 260 VT = VT.getScalarType(); 261 262 if (!VT.isSimple()) 263 return false; 264 265 switch (VT.getSimpleVT().SimpleTy) { 266 case MVT::f32: 267 case MVT::f64: 268 return true; 269 case MVT::f128: 270 return false; 271 default: 272 break; 273 } 274 275 return false; 276 } 277 278 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 279 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 280 return Imm.isZero() || Imm.isNegZero(); 281 } 282 283 bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 284 bool *Fast) const { 285 // Unaligned accesses should never be slower than the expanded version. 286 // We check specifically for aligned accesses in the few cases where 287 // they are required. 288 if (Fast) 289 *Fast = true; 290 return true; 291 } 292 293 //===----------------------------------------------------------------------===// 294 // Inline asm support 295 //===----------------------------------------------------------------------===// 296 297 TargetLowering::ConstraintType 298 SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { 299 if (Constraint.size() == 1) { 300 switch (Constraint[0]) { 301 case 'a': // Address register 302 case 'd': // Data register (equivalent to 'r') 303 case 'f': // Floating-point register 304 case 'r': // General-purpose register 305 return C_RegisterClass; 306 307 case 'Q': // Memory with base and unsigned 12-bit displacement 308 case 'R': // Likewise, plus an index 309 case 'S': // Memory with base and signed 20-bit displacement 310 case 'T': // Likewise, plus an index 311 case 'm': // Equivalent to 'T'. 312 return C_Memory; 313 314 case 'I': // Unsigned 8-bit constant 315 case 'J': // Unsigned 12-bit constant 316 case 'K': // Signed 16-bit constant 317 case 'L': // Signed 20-bit displacement (on all targets we support) 318 case 'M': // 0x7fffffff 319 return C_Other; 320 321 default: 322 break; 323 } 324 } 325 return TargetLowering::getConstraintType(Constraint); 326 } 327 328 TargetLowering::ConstraintWeight SystemZTargetLowering:: 329 getSingleConstraintMatchWeight(AsmOperandInfo &info, 330 const char *constraint) const { 331 ConstraintWeight weight = CW_Invalid; 332 Value *CallOperandVal = info.CallOperandVal; 333 // If we don't have a value, we can't do a match, 334 // but allow it at the lowest weight. 335 if (CallOperandVal == NULL) 336 return CW_Default; 337 Type *type = CallOperandVal->getType(); 338 // Look at the constraint type. 339 switch (*constraint) { 340 default: 341 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 342 break; 343 344 case 'a': // Address register 345 case 'd': // Data register (equivalent to 'r') 346 case 'r': // General-purpose register 347 if (CallOperandVal->getType()->isIntegerTy()) 348 weight = CW_Register; 349 break; 350 351 case 'f': // Floating-point register 352 if (type->isFloatingPointTy()) 353 weight = CW_Register; 354 break; 355 356 case 'I': // Unsigned 8-bit constant 357 if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) 358 if (isUInt<8>(C->getZExtValue())) 359 weight = CW_Constant; 360 break; 361 362 case 'J': // Unsigned 12-bit constant 363 if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) 364 if (isUInt<12>(C->getZExtValue())) 365 weight = CW_Constant; 366 break; 367 368 case 'K': // Signed 16-bit constant 369 if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) 370 if (isInt<16>(C->getSExtValue())) 371 weight = CW_Constant; 372 break; 373 374 case 'L': // Signed 20-bit displacement (on all targets we support) 375 if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) 376 if (isInt<20>(C->getSExtValue())) 377 weight = CW_Constant; 378 break; 379 380 case 'M': // 0x7fffffff 381 if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) 382 if (C->getZExtValue() == 0x7fffffff) 383 weight = CW_Constant; 384 break; 385 } 386 return weight; 387 } 388 389 // Parse a "{tNNN}" register constraint for which the register type "t" 390 // has already been verified. MC is the class associated with "t" and 391 // Map maps 0-based register numbers to LLVM register numbers. 392 static std::pair<unsigned, const TargetRegisterClass *> 393 parseRegisterNumber(const std::string &Constraint, 394 const TargetRegisterClass *RC, const unsigned *Map) { 395 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 396 if (isdigit(Constraint[2])) { 397 std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); 398 unsigned Index = atoi(Suffix.c_str()); 399 if (Index < 16 && Map[Index]) 400 return std::make_pair(Map[Index], RC); 401 } 402 return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); 403 } 404 405 std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering:: 406 getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { 407 if (Constraint.size() == 1) { 408 // GCC Constraint Letters 409 switch (Constraint[0]) { 410 default: break; 411 case 'd': // Data register (equivalent to 'r') 412 case 'r': // General-purpose register 413 if (VT == MVT::i64) 414 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 415 else if (VT == MVT::i128) 416 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 417 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 418 419 case 'a': // Address register 420 if (VT == MVT::i64) 421 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 422 else if (VT == MVT::i128) 423 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 424 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 425 426 case 'f': // Floating-point register 427 if (VT == MVT::f64) 428 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 429 else if (VT == MVT::f128) 430 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 431 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 432 } 433 } 434 if (Constraint[0] == '{') { 435 // We need to override the default register parsing for GPRs and FPRs 436 // because the interpretation depends on VT. The internal names of 437 // the registers are also different from the external names 438 // (F0D and F0S instead of F0, etc.). 439 if (Constraint[1] == 'r') { 440 if (VT == MVT::i32) 441 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 442 SystemZMC::GR32Regs); 443 if (VT == MVT::i128) 444 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 445 SystemZMC::GR128Regs); 446 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 447 SystemZMC::GR64Regs); 448 } 449 if (Constraint[1] == 'f') { 450 if (VT == MVT::f32) 451 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 452 SystemZMC::FP32Regs); 453 if (VT == MVT::f128) 454 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 455 SystemZMC::FP128Regs); 456 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 457 SystemZMC::FP64Regs); 458 } 459 } 460 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 461 } 462 463 void SystemZTargetLowering:: 464 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 465 std::vector<SDValue> &Ops, 466 SelectionDAG &DAG) const { 467 // Only support length 1 constraints for now. 468 if (Constraint.length() == 1) { 469 switch (Constraint[0]) { 470 case 'I': // Unsigned 8-bit constant 471 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 472 if (isUInt<8>(C->getZExtValue())) 473 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), 474 Op.getValueType())); 475 return; 476 477 case 'J': // Unsigned 12-bit constant 478 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 479 if (isUInt<12>(C->getZExtValue())) 480 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), 481 Op.getValueType())); 482 return; 483 484 case 'K': // Signed 16-bit constant 485 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 486 if (isInt<16>(C->getSExtValue())) 487 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), 488 Op.getValueType())); 489 return; 490 491 case 'L': // Signed 20-bit displacement (on all targets we support) 492 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 493 if (isInt<20>(C->getSExtValue())) 494 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), 495 Op.getValueType())); 496 return; 497 498 case 'M': // 0x7fffffff 499 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 500 if (C->getZExtValue() == 0x7fffffff) 501 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), 502 Op.getValueType())); 503 return; 504 } 505 } 506 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 507 } 508 509 //===----------------------------------------------------------------------===// 510 // Calling conventions 511 //===----------------------------------------------------------------------===// 512 513 #include "SystemZGenCallingConv.inc" 514 515 // Value is a value that has been passed to us in the location described by VA 516 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 517 // any loads onto Chain. 518 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, 519 CCValAssign &VA, SDValue Chain, 520 SDValue Value) { 521 // If the argument has been promoted from a smaller type, insert an 522 // assertion to capture this. 523 if (VA.getLocInfo() == CCValAssign::SExt) 524 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 525 DAG.getValueType(VA.getValVT())); 526 else if (VA.getLocInfo() == CCValAssign::ZExt) 527 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 528 DAG.getValueType(VA.getValVT())); 529 530 if (VA.isExtInLoc()) 531 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 532 else if (VA.getLocInfo() == CCValAssign::Indirect) 533 Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, 534 MachinePointerInfo(), false, false, false, 0); 535 else 536 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 537 return Value; 538 } 539 540 // Value is a value of type VA.getValVT() that we need to copy into 541 // the location described by VA. Return a copy of Value converted to 542 // VA.getValVT(). The caller is responsible for handling indirect values. 543 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, 544 CCValAssign &VA, SDValue Value) { 545 switch (VA.getLocInfo()) { 546 case CCValAssign::SExt: 547 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 548 case CCValAssign::ZExt: 549 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 550 case CCValAssign::AExt: 551 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 552 case CCValAssign::Full: 553 return Value; 554 default: 555 llvm_unreachable("Unhandled getLocInfo()"); 556 } 557 } 558 559 SDValue SystemZTargetLowering:: 560 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 561 const SmallVectorImpl<ISD::InputArg> &Ins, 562 SDLoc DL, SelectionDAG &DAG, 563 SmallVectorImpl<SDValue> &InVals) const { 564 MachineFunction &MF = DAG.getMachineFunction(); 565 MachineFrameInfo *MFI = MF.getFrameInfo(); 566 MachineRegisterInfo &MRI = MF.getRegInfo(); 567 SystemZMachineFunctionInfo *FuncInfo = 568 MF.getInfo<SystemZMachineFunctionInfo>(); 569 const SystemZFrameLowering *TFL = 570 static_cast<const SystemZFrameLowering *>(TM.getFrameLowering()); 571 572 // Assign locations to all of the incoming arguments. 573 SmallVector<CCValAssign, 16> ArgLocs; 574 CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); 575 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 576 577 unsigned NumFixedGPRs = 0; 578 unsigned NumFixedFPRs = 0; 579 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 580 SDValue ArgValue; 581 CCValAssign &VA = ArgLocs[I]; 582 EVT LocVT = VA.getLocVT(); 583 if (VA.isRegLoc()) { 584 // Arguments passed in registers 585 const TargetRegisterClass *RC; 586 switch (LocVT.getSimpleVT().SimpleTy) { 587 default: 588 // Integers smaller than i64 should be promoted to i64. 589 llvm_unreachable("Unexpected argument type"); 590 case MVT::i32: 591 NumFixedGPRs += 1; 592 RC = &SystemZ::GR32BitRegClass; 593 break; 594 case MVT::i64: 595 NumFixedGPRs += 1; 596 RC = &SystemZ::GR64BitRegClass; 597 break; 598 case MVT::f32: 599 NumFixedFPRs += 1; 600 RC = &SystemZ::FP32BitRegClass; 601 break; 602 case MVT::f64: 603 NumFixedFPRs += 1; 604 RC = &SystemZ::FP64BitRegClass; 605 break; 606 } 607 608 unsigned VReg = MRI.createVirtualRegister(RC); 609 MRI.addLiveIn(VA.getLocReg(), VReg); 610 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 611 } else { 612 assert(VA.isMemLoc() && "Argument not register or memory"); 613 614 // Create the frame index object for this incoming parameter. 615 int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8, 616 VA.getLocMemOffset(), true); 617 618 // Create the SelectionDAG nodes corresponding to a load 619 // from this parameter. Unpromoted ints and floats are 620 // passed as right-justified 8-byte values. 621 EVT PtrVT = getPointerTy(); 622 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 623 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 624 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4)); 625 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 626 MachinePointerInfo::getFixedStack(FI), 627 false, false, false, 0); 628 } 629 630 // Convert the value of the argument register into the value that's 631 // being passed. 632 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 633 } 634 635 if (IsVarArg) { 636 // Save the number of non-varargs registers for later use by va_start, etc. 637 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 638 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 639 640 // Likewise the address (in the form of a frame index) of where the 641 // first stack vararg would be. The 1-byte size here is arbitrary. 642 int64_t StackSize = CCInfo.getNextStackOffset(); 643 FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true)); 644 645 // ...and a similar frame index for the caller-allocated save area 646 // that will be used to store the incoming registers. 647 int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); 648 unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true); 649 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 650 651 // Store the FPR varargs in the reserved frame slots. (We store the 652 // GPRs as part of the prologue.) 653 if (NumFixedFPRs < SystemZ::NumArgFPRs) { 654 SDValue MemOps[SystemZ::NumArgFPRs]; 655 for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { 656 unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); 657 int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); 658 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 659 unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], 660 &SystemZ::FP64BitRegClass); 661 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 662 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 663 MachinePointerInfo::getFixedStack(FI), 664 false, false, 0); 665 666 } 667 // Join the stores, which are independent of one another. 668 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 669 &MemOps[NumFixedFPRs], 670 SystemZ::NumArgFPRs - NumFixedFPRs); 671 } 672 } 673 674 return Chain; 675 } 676 677 SDValue 678 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 679 SmallVectorImpl<SDValue> &InVals) const { 680 SelectionDAG &DAG = CLI.DAG; 681 SDLoc &DL = CLI.DL; 682 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 683 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 684 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 685 SDValue Chain = CLI.Chain; 686 SDValue Callee = CLI.Callee; 687 bool &isTailCall = CLI.IsTailCall; 688 CallingConv::ID CallConv = CLI.CallConv; 689 bool IsVarArg = CLI.IsVarArg; 690 MachineFunction &MF = DAG.getMachineFunction(); 691 EVT PtrVT = getPointerTy(); 692 693 // SystemZ target does not yet support tail call optimization. 694 isTailCall = false; 695 696 // Analyze the operands of the call, assigning locations to each operand. 697 SmallVector<CCValAssign, 16> ArgLocs; 698 CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); 699 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 700 701 // Get a count of how many bytes are to be pushed on the stack. 702 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 703 704 // Mark the start of the call. 705 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), 706 DL); 707 708 // Copy argument values to their designated locations. 709 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 710 SmallVector<SDValue, 8> MemOpChains; 711 SDValue StackPtr; 712 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 713 CCValAssign &VA = ArgLocs[I]; 714 SDValue ArgValue = OutVals[I]; 715 716 if (VA.getLocInfo() == CCValAssign::Indirect) { 717 // Store the argument in a stack slot and pass its address. 718 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 719 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 720 MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot, 721 MachinePointerInfo::getFixedStack(FI), 722 false, false, 0)); 723 ArgValue = SpillSlot; 724 } else 725 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 726 727 if (VA.isRegLoc()) 728 // Queue up the argument copies and emit them at the end. 729 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 730 else { 731 assert(VA.isMemLoc() && "Argument not register or memory"); 732 733 // Work out the address of the stack slot. Unpromoted ints and 734 // floats are passed as right-justified 8-byte values. 735 if (!StackPtr.getNode()) 736 StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); 737 unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset(); 738 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 739 Offset += 4; 740 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 741 DAG.getIntPtrConstant(Offset)); 742 743 // Emit the store. 744 MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, 745 MachinePointerInfo(), 746 false, false, 0)); 747 } 748 } 749 750 // Join the stores, which are independent of one another. 751 if (!MemOpChains.empty()) 752 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 753 &MemOpChains[0], MemOpChains.size()); 754 755 // Build a sequence of copy-to-reg nodes, chained and glued together. 756 SDValue Glue; 757 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 758 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 759 RegsToPass[I].second, Glue); 760 Glue = Chain.getValue(1); 761 } 762 763 // Accept direct calls by converting symbolic call addresses to the 764 // associated Target* opcodes. 765 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 766 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 767 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 768 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 769 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 770 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 771 } 772 773 // The first call operand is the chain and the second is the target address. 774 SmallVector<SDValue, 8> Ops; 775 Ops.push_back(Chain); 776 Ops.push_back(Callee); 777 778 // Add argument registers to the end of the list so that they are 779 // known live into the call. 780 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 781 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 782 RegsToPass[I].second.getValueType())); 783 784 // Glue the call to the argument copies, if any. 785 if (Glue.getNode()) 786 Ops.push_back(Glue); 787 788 // Emit the call. 789 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 790 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); 791 Glue = Chain.getValue(1); 792 793 // Mark the end of the call, which is glued to the call itself. 794 Chain = DAG.getCALLSEQ_END(Chain, 795 DAG.getConstant(NumBytes, PtrVT, true), 796 DAG.getConstant(0, PtrVT, true), 797 Glue, DL); 798 Glue = Chain.getValue(1); 799 800 // Assign locations to each value returned by this call. 801 SmallVector<CCValAssign, 16> RetLocs; 802 CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); 803 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 804 805 // Copy all of the result registers out of their specified physreg. 806 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 807 CCValAssign &VA = RetLocs[I]; 808 809 // Copy the value out, gluing the copy to the end of the call sequence. 810 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 811 VA.getLocVT(), Glue); 812 Chain = RetValue.getValue(1); 813 Glue = RetValue.getValue(2); 814 815 // Convert the value of the return register into the value that's 816 // being returned. 817 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 818 } 819 820 return Chain; 821 } 822 823 SDValue 824 SystemZTargetLowering::LowerReturn(SDValue Chain, 825 CallingConv::ID CallConv, bool IsVarArg, 826 const SmallVectorImpl<ISD::OutputArg> &Outs, 827 const SmallVectorImpl<SDValue> &OutVals, 828 SDLoc DL, SelectionDAG &DAG) const { 829 MachineFunction &MF = DAG.getMachineFunction(); 830 831 // Assign locations to each returned value. 832 SmallVector<CCValAssign, 16> RetLocs; 833 CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); 834 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 835 836 // Quick exit for void returns 837 if (RetLocs.empty()) 838 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); 839 840 // Copy the result values into the output registers. 841 SDValue Glue; 842 SmallVector<SDValue, 4> RetOps; 843 RetOps.push_back(Chain); 844 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 845 CCValAssign &VA = RetLocs[I]; 846 SDValue RetValue = OutVals[I]; 847 848 // Make the return register live on exit. 849 assert(VA.isRegLoc() && "Can only return in registers!"); 850 851 // Promote the value as required. 852 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 853 854 // Chain and glue the copies together. 855 unsigned Reg = VA.getLocReg(); 856 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 857 Glue = Chain.getValue(1); 858 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 859 } 860 861 // Update chain and glue. 862 RetOps[0] = Chain; 863 if (Glue.getNode()) 864 RetOps.push_back(Glue); 865 866 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, 867 RetOps.data(), RetOps.size()); 868 } 869 870 // CC is a comparison that will be implemented using an integer or 871 // floating-point comparison. Return the condition code mask for 872 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 873 // unsigned comparisons and clear for signed ones. In the floating-point 874 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 875 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 876 #define CONV(X) \ 877 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 878 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 879 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 880 881 switch (CC) { 882 default: 883 llvm_unreachable("Invalid integer condition!"); 884 885 CONV(EQ); 886 CONV(NE); 887 CONV(GT); 888 CONV(GE); 889 CONV(LT); 890 CONV(LE); 891 892 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 893 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 894 } 895 #undef CONV 896 } 897 898 // If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 899 // is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary. 900 static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, 901 SDValue &CmpOp0, SDValue &CmpOp1, 902 unsigned &CCMask) { 903 // For us to make any changes, it must a comparison between a single-use 904 // load and a constant. 905 if (!CmpOp0.hasOneUse() || 906 CmpOp0.getOpcode() != ISD::LOAD || 907 CmpOp1.getOpcode() != ISD::Constant) 908 return; 909 910 // We must have an 8- or 16-bit load. 911 LoadSDNode *Load = cast<LoadSDNode>(CmpOp0); 912 unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); 913 if (NumBits != 8 && NumBits != 16) 914 return; 915 916 // The load must be an extending one and the constant must be within the 917 // range of the unextended value. 918 ConstantSDNode *Constant = cast<ConstantSDNode>(CmpOp1); 919 uint64_t Value = Constant->getZExtValue(); 920 uint64_t Mask = (1 << NumBits) - 1; 921 if (Load->getExtensionType() == ISD::SEXTLOAD) { 922 int64_t SignedValue = Constant->getSExtValue(); 923 if (uint64_t(SignedValue) + (1ULL << (NumBits - 1)) > Mask) 924 return; 925 // Unsigned comparison between two sign-extended values is equivalent 926 // to unsigned comparison between two zero-extended values. 927 if (IsUnsigned) 928 Value &= Mask; 929 else if (CCMask == SystemZ::CCMASK_CMP_EQ || 930 CCMask == SystemZ::CCMASK_CMP_NE) 931 // Any choice of IsUnsigned is OK for equality comparisons. 932 // We could use either CHHSI or CLHHSI for 16-bit comparisons, 933 // but since we use CLHHSI for zero extensions, it seems better 934 // to be consistent and do the same here. 935 Value &= Mask, IsUnsigned = true; 936 else if (NumBits == 8) { 937 // Try to treat the comparison as unsigned, so that we can use CLI. 938 // Adjust CCMask and Value as necessary. 939 if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT) 940 // Test whether the high bit of the byte is set. 941 Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true; 942 else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT) 943 // Test whether the high bit of the byte is clear. 944 Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true; 945 else 946 // No instruction exists for this combination. 947 return; 948 } 949 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 950 if (Value > Mask) 951 return; 952 // Signed comparison between two zero-extended values is equivalent 953 // to unsigned comparison. 954 IsUnsigned = true; 955 } else 956 return; 957 958 // Make sure that the first operand is an i32 of the right extension type. 959 ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD; 960 if (CmpOp0.getValueType() != MVT::i32 || 961 Load->getExtensionType() != ExtType) 962 CmpOp0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, 963 Load->getChain(), Load->getBasePtr(), 964 Load->getPointerInfo(), Load->getMemoryVT(), 965 Load->isVolatile(), Load->isNonTemporal(), 966 Load->getAlignment()); 967 968 // Make sure that the second operand is an i32 with the right value. 969 if (CmpOp1.getValueType() != MVT::i32 || 970 Value != Constant->getZExtValue()) 971 CmpOp1 = DAG.getConstant(Value, MVT::i32); 972 } 973 974 // Return true if a comparison described by CCMask, CmpOp0 and CmpOp1 975 // is an equality comparison that is better implemented using unsigned 976 // rather than signed comparison instructions. 977 static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0, 978 SDValue CmpOp1, unsigned CCMask) { 979 // The test must be for equality or inequality. 980 if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE) 981 return false; 982 983 if (CmpOp1.getOpcode() == ISD::Constant) { 984 uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue(); 985 986 // If we're comparing with memory, prefer unsigned comparisons for 987 // values that are in the unsigned 16-bit range but not the signed 988 // 16-bit range. We want to use CLFHSI and CLGHSI. 989 if (CmpOp0.hasOneUse() && 990 ISD::isNormalLoad(CmpOp0.getNode()) && 991 (Value >= 32768 && Value < 65536)) 992 return true; 993 994 // Use unsigned comparisons for values that are in the CLGFI range 995 // but not in the CGFI range. 996 if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1) 997 return true; 998 999 return false; 1000 } 1001 1002 // Prefer CL for zero-extended loads. 1003 if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND || 1004 ISD::isZEXTLoad(CmpOp1.getNode())) 1005 return true; 1006 1007 // ...and for "in-register" zero extensions. 1008 if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) { 1009 SDValue Mask = CmpOp1.getOperand(1); 1010 if (Mask.getOpcode() == ISD::Constant && 1011 cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff) 1012 return true; 1013 } 1014 1015 return false; 1016 } 1017 1018 // Return a target node that compares CmpOp0 and CmpOp1. Set CCMask to the 1019 // 4-bit condition-code mask for CC. 1020 static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 1021 ISD::CondCode CC, unsigned &CCMask) { 1022 bool IsUnsigned = false; 1023 CCMask = CCMaskForCondCode(CC); 1024 if (!CmpOp0.getValueType().isFloatingPoint()) { 1025 IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO; 1026 CCMask &= ~SystemZ::CCMASK_CMP_UO; 1027 adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); 1028 if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask)) 1029 IsUnsigned = true; 1030 } 1031 1032 SDLoc DL(CmpOp0); 1033 return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), 1034 DL, MVT::Glue, CmpOp0, CmpOp1); 1035 } 1036 1037 // Lower a binary operation that produces two VT results, one in each 1038 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 1039 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation 1040 // on the extended Op0 and (unextended) Op1. Store the even register result 1041 // in Even and the odd register result in Odd. 1042 static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, 1043 unsigned Extend, unsigned Opcode, 1044 SDValue Op0, SDValue Op1, 1045 SDValue &Even, SDValue &Odd) { 1046 SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); 1047 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, 1048 SDValue(In128, 0), Op1); 1049 bool Is32Bit = is32Bit(VT); 1050 SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT); 1051 SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT); 1052 SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, 1053 VT, Result, SubReg0); 1054 SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, 1055 VT, Result, SubReg1); 1056 Even = SDValue(Reg0, 0); 1057 Odd = SDValue(Reg1, 0); 1058 } 1059 1060 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 1061 SDValue Chain = Op.getOperand(0); 1062 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 1063 SDValue CmpOp0 = Op.getOperand(2); 1064 SDValue CmpOp1 = Op.getOperand(3); 1065 SDValue Dest = Op.getOperand(4); 1066 SDLoc DL(Op); 1067 1068 unsigned CCMask; 1069 SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); 1070 return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), 1071 Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags); 1072 } 1073 1074 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 1075 SelectionDAG &DAG) const { 1076 SDValue CmpOp0 = Op.getOperand(0); 1077 SDValue CmpOp1 = Op.getOperand(1); 1078 SDValue TrueOp = Op.getOperand(2); 1079 SDValue FalseOp = Op.getOperand(3); 1080 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 1081 SDLoc DL(Op); 1082 1083 unsigned CCMask; 1084 SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); 1085 1086 SmallVector<SDValue, 4> Ops; 1087 Ops.push_back(TrueOp); 1088 Ops.push_back(FalseOp); 1089 Ops.push_back(DAG.getConstant(CCMask, MVT::i32)); 1090 Ops.push_back(Flags); 1091 1092 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 1093 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); 1094 } 1095 1096 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 1097 SelectionDAG &DAG) const { 1098 SDLoc DL(Node); 1099 const GlobalValue *GV = Node->getGlobal(); 1100 int64_t Offset = Node->getOffset(); 1101 EVT PtrVT = getPointerTy(); 1102 Reloc::Model RM = TM.getRelocationModel(); 1103 CodeModel::Model CM = TM.getCodeModel(); 1104 1105 SDValue Result; 1106 if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { 1107 // Make sure that the offset is aligned to a halfword. If it isn't, 1108 // create an "anchor" at the previous 12-bit boundary. 1109 // FIXME check whether there is a better way of handling this. 1110 if (Offset & 1) { 1111 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 1112 Offset & ~uint64_t(0xfff)); 1113 Offset &= 0xfff; 1114 } else { 1115 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset); 1116 Offset = 0; 1117 } 1118 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1119 } else { 1120 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 1121 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1122 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 1123 MachinePointerInfo::getGOT(), false, false, false, 0); 1124 } 1125 1126 // If there was a non-zero offset that we didn't fold, create an explicit 1127 // addition for it. 1128 if (Offset != 0) 1129 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 1130 DAG.getConstant(Offset, PtrVT)); 1131 1132 return Result; 1133 } 1134 1135 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 1136 SelectionDAG &DAG) const { 1137 SDLoc DL(Node); 1138 const GlobalValue *GV = Node->getGlobal(); 1139 EVT PtrVT = getPointerTy(); 1140 TLSModel::Model model = TM.getTLSModel(GV); 1141 1142 if (model != TLSModel::LocalExec) 1143 llvm_unreachable("only local-exec TLS mode supported"); 1144 1145 // The high part of the thread pointer is in access register 0. 1146 SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 1147 DAG.getConstant(0, MVT::i32)); 1148 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 1149 1150 // The low part of the thread pointer is in access register 1. 1151 SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 1152 DAG.getConstant(1, MVT::i32)); 1153 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 1154 1155 // Merge them into a single 64-bit address. 1156 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 1157 DAG.getConstant(32, PtrVT)); 1158 SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 1159 1160 // Get the offset of GA from the thread pointer. 1161 SystemZConstantPoolValue *CPV = 1162 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 1163 1164 // Force the offset into the constant pool and load it from there. 1165 SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); 1166 SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 1167 CPAddr, MachinePointerInfo::getConstantPool(), 1168 false, false, false, 0); 1169 1170 // Add the base and offset together. 1171 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 1172 } 1173 1174 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 1175 SelectionDAG &DAG) const { 1176 SDLoc DL(Node); 1177 const BlockAddress *BA = Node->getBlockAddress(); 1178 int64_t Offset = Node->getOffset(); 1179 EVT PtrVT = getPointerTy(); 1180 1181 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 1182 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1183 return Result; 1184 } 1185 1186 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 1187 SelectionDAG &DAG) const { 1188 SDLoc DL(JT); 1189 EVT PtrVT = getPointerTy(); 1190 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1191 1192 // Use LARL to load the address of the table. 1193 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1194 } 1195 1196 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 1197 SelectionDAG &DAG) const { 1198 SDLoc DL(CP); 1199 EVT PtrVT = getPointerTy(); 1200 1201 SDValue Result; 1202 if (CP->isMachineConstantPoolEntry()) 1203 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1204 CP->getAlignment()); 1205 else 1206 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1207 CP->getAlignment(), CP->getOffset()); 1208 1209 // Use LARL to load the address of the constant pool entry. 1210 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1211 } 1212 1213 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 1214 SelectionDAG &DAG) const { 1215 SDLoc DL(Op); 1216 SDValue In = Op.getOperand(0); 1217 EVT InVT = In.getValueType(); 1218 EVT ResVT = Op.getValueType(); 1219 1220 SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); 1221 SDValue Shift32 = DAG.getConstant(32, MVT::i64); 1222 if (InVT == MVT::i32 && ResVT == MVT::f32) { 1223 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 1224 SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32); 1225 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift); 1226 SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, 1227 MVT::f32, Out64, SubReg32); 1228 return SDValue(Out, 0); 1229 } 1230 if (InVT == MVT::f32 && ResVT == MVT::i32) { 1231 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 1232 SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, 1233 MVT::f64, SDValue(U64, 0), In, SubReg32); 1234 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0)); 1235 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32); 1236 SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 1237 return Out; 1238 } 1239 llvm_unreachable("Unexpected bitcast combination"); 1240 } 1241 1242 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 1243 SelectionDAG &DAG) const { 1244 MachineFunction &MF = DAG.getMachineFunction(); 1245 SystemZMachineFunctionInfo *FuncInfo = 1246 MF.getInfo<SystemZMachineFunctionInfo>(); 1247 EVT PtrVT = getPointerTy(); 1248 1249 SDValue Chain = Op.getOperand(0); 1250 SDValue Addr = Op.getOperand(1); 1251 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1252 SDLoc DL(Op); 1253 1254 // The initial values of each field. 1255 const unsigned NumFields = 4; 1256 SDValue Fields[NumFields] = { 1257 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT), 1258 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT), 1259 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 1260 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 1261 }; 1262 1263 // Store each field into its respective slot. 1264 SDValue MemOps[NumFields]; 1265 unsigned Offset = 0; 1266 for (unsigned I = 0; I < NumFields; ++I) { 1267 SDValue FieldAddr = Addr; 1268 if (Offset != 0) 1269 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 1270 DAG.getIntPtrConstant(Offset)); 1271 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 1272 MachinePointerInfo(SV, Offset), 1273 false, false, 0); 1274 Offset += 8; 1275 } 1276 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields); 1277 } 1278 1279 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 1280 SelectionDAG &DAG) const { 1281 SDValue Chain = Op.getOperand(0); 1282 SDValue DstPtr = Op.getOperand(1); 1283 SDValue SrcPtr = Op.getOperand(2); 1284 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 1285 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 1286 SDLoc DL(Op); 1287 1288 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), 1289 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, 1290 MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); 1291 } 1292 1293 SDValue SystemZTargetLowering:: 1294 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { 1295 SDValue Chain = Op.getOperand(0); 1296 SDValue Size = Op.getOperand(1); 1297 SDLoc DL(Op); 1298 1299 unsigned SPReg = getStackPointerRegisterToSaveRestore(); 1300 1301 // Get a reference to the stack pointer. 1302 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 1303 1304 // Get the new stack pointer value. 1305 SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size); 1306 1307 // Copy the new stack pointer back. 1308 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 1309 1310 // The allocated data lives above the 160 bytes allocated for the standard 1311 // frame, plus any outgoing stack arguments. We don't know how much that 1312 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 1313 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 1314 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 1315 1316 SDValue Ops[2] = { Result, Chain }; 1317 return DAG.getMergeValues(Ops, 2, DL); 1318 } 1319 1320 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 1321 SelectionDAG &DAG) const { 1322 EVT VT = Op.getValueType(); 1323 SDLoc DL(Op); 1324 assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI"); 1325 1326 // UMUL_LOHI64 returns the low result in the odd register and the high 1327 // result in the even register. UMUL_LOHI is defined to return the 1328 // low half first, so the results are in reverse order. 1329 SDValue Ops[2]; 1330 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, 1331 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 1332 return DAG.getMergeValues(Ops, 2, DL); 1333 } 1334 1335 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 1336 SelectionDAG &DAG) const { 1337 SDValue Op0 = Op.getOperand(0); 1338 SDValue Op1 = Op.getOperand(1); 1339 EVT VT = Op.getValueType(); 1340 SDLoc DL(Op); 1341 unsigned Opcode; 1342 1343 // We use DSGF for 32-bit division. 1344 if (is32Bit(VT)) { 1345 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 1346 Opcode = SystemZISD::SDIVREM32; 1347 } else if (DAG.ComputeNumSignBits(Op1) > 32) { 1348 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 1349 Opcode = SystemZISD::SDIVREM32; 1350 } else 1351 Opcode = SystemZISD::SDIVREM64; 1352 1353 // DSG(F) takes a 64-bit dividend, so the even register in the GR128 1354 // input is "don't care". The instruction returns the remainder in 1355 // the even register and the quotient in the odd register. 1356 SDValue Ops[2]; 1357 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, 1358 Op0, Op1, Ops[1], Ops[0]); 1359 return DAG.getMergeValues(Ops, 2, DL); 1360 } 1361 1362 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 1363 SelectionDAG &DAG) const { 1364 EVT VT = Op.getValueType(); 1365 SDLoc DL(Op); 1366 1367 // DL(G) uses a double-width dividend, so we need to clear the even 1368 // register in the GR128 input. The instruction returns the remainder 1369 // in the even register and the quotient in the odd register. 1370 SDValue Ops[2]; 1371 if (is32Bit(VT)) 1372 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, 1373 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 1374 else 1375 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, 1376 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 1377 return DAG.getMergeValues(Ops, 2, DL); 1378 } 1379 1380 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 1381 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 1382 1383 // Get the known-zero masks for each operand. 1384 SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; 1385 APInt KnownZero[2], KnownOne[2]; 1386 DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]); 1387 DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]); 1388 1389 // See if the upper 32 bits of one operand and the lower 32 bits of the 1390 // other are known zero. They are the low and high operands respectively. 1391 uint64_t Masks[] = { KnownZero[0].getZExtValue(), 1392 KnownZero[1].getZExtValue() }; 1393 unsigned High, Low; 1394 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 1395 High = 1, Low = 0; 1396 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 1397 High = 0, Low = 1; 1398 else 1399 return Op; 1400 1401 SDValue LowOp = Ops[Low]; 1402 SDValue HighOp = Ops[High]; 1403 1404 // If the high part is a constant, we're better off using IILH. 1405 if (HighOp.getOpcode() == ISD::Constant) 1406 return Op; 1407 1408 // If the low part is a constant that is outside the range of LHI, 1409 // then we're better off using IILF. 1410 if (LowOp.getOpcode() == ISD::Constant) { 1411 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue()); 1412 if (!isInt<16>(Value)) 1413 return Op; 1414 } 1415 1416 // Check whether the high part is an AND that doesn't change the 1417 // high 32 bits and just masks out low bits. We can skip it if so. 1418 if (HighOp.getOpcode() == ISD::AND && 1419 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 1420 ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1)); 1421 uint64_t Mask = MaskNode->getZExtValue() | Masks[High]; 1422 if ((Mask >> 32) == 0xffffffff) 1423 HighOp = HighOp.getOperand(0); 1424 } 1425 1426 // Take advantage of the fact that all GR32 operations only change the 1427 // low 32 bits by truncating Low to an i32 and inserting it directly 1428 // using a subreg. The interesting cases are those where the truncation 1429 // can be folded. 1430 SDLoc DL(Op); 1431 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 1432 SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); 1433 SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, 1434 MVT::i64, HighOp, Low32, SubReg32); 1435 return SDValue(Result, 0); 1436 } 1437 1438 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 1439 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 1440 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, 1441 SelectionDAG &DAG, 1442 unsigned Opcode) const { 1443 AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode()); 1444 1445 // 32-bit operations need no code outside the main loop. 1446 EVT NarrowVT = Node->getMemoryVT(); 1447 EVT WideVT = MVT::i32; 1448 if (NarrowVT == WideVT) 1449 return Op; 1450 1451 int64_t BitSize = NarrowVT.getSizeInBits(); 1452 SDValue ChainIn = Node->getChain(); 1453 SDValue Addr = Node->getBasePtr(); 1454 SDValue Src2 = Node->getVal(); 1455 MachineMemOperand *MMO = Node->getMemOperand(); 1456 SDLoc DL(Node); 1457 EVT PtrVT = Addr.getValueType(); 1458 1459 // Convert atomic subtracts of constants into additions. 1460 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 1461 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Src2)) { 1462 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 1463 Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType()); 1464 } 1465 1466 // Get the address of the containing word. 1467 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 1468 DAG.getConstant(-4, PtrVT)); 1469 1470 // Get the number of bits that the word must be rotated left in order 1471 // to bring the field to the top bits of a GR32. 1472 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 1473 DAG.getConstant(3, PtrVT)); 1474 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 1475 1476 // Get the complementing shift amount, for rotating a field in the top 1477 // bits back to its proper position. 1478 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 1479 DAG.getConstant(0, WideVT), BitShift); 1480 1481 // Extend the source operand to 32 bits and prepare it for the inner loop. 1482 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 1483 // operations require the source to be shifted in advance. (This shift 1484 // can be folded if the source is constant.) For AND and NAND, the lower 1485 // bits must be set, while for other opcodes they should be left clear. 1486 if (Opcode != SystemZISD::ATOMIC_SWAPW) 1487 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 1488 DAG.getConstant(32 - BitSize, WideVT)); 1489 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 1490 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 1491 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 1492 DAG.getConstant(uint32_t(-1) >> BitSize, WideVT)); 1493 1494 // Construct the ATOMIC_LOADW_* node. 1495 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 1496 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 1497 DAG.getConstant(BitSize, WideVT) }; 1498 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 1499 array_lengthof(Ops), 1500 NarrowVT, MMO); 1501 1502 // Rotate the result of the final CS so that the field is in the lower 1503 // bits of a GR32, then truncate it. 1504 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 1505 DAG.getConstant(BitSize, WideVT)); 1506 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 1507 1508 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 1509 return DAG.getMergeValues(RetOps, 2, DL); 1510 } 1511 1512 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two 1513 // into a fullword ATOMIC_CMP_SWAPW operation. 1514 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 1515 SelectionDAG &DAG) const { 1516 AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode()); 1517 1518 // We have native support for 32-bit compare and swap. 1519 EVT NarrowVT = Node->getMemoryVT(); 1520 EVT WideVT = MVT::i32; 1521 if (NarrowVT == WideVT) 1522 return Op; 1523 1524 int64_t BitSize = NarrowVT.getSizeInBits(); 1525 SDValue ChainIn = Node->getOperand(0); 1526 SDValue Addr = Node->getOperand(1); 1527 SDValue CmpVal = Node->getOperand(2); 1528 SDValue SwapVal = Node->getOperand(3); 1529 MachineMemOperand *MMO = Node->getMemOperand(); 1530 SDLoc DL(Node); 1531 EVT PtrVT = Addr.getValueType(); 1532 1533 // Get the address of the containing word. 1534 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 1535 DAG.getConstant(-4, PtrVT)); 1536 1537 // Get the number of bits that the word must be rotated left in order 1538 // to bring the field to the top bits of a GR32. 1539 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 1540 DAG.getConstant(3, PtrVT)); 1541 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 1542 1543 // Get the complementing shift amount, for rotating a field in the top 1544 // bits back to its proper position. 1545 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 1546 DAG.getConstant(0, WideVT), BitShift); 1547 1548 // Construct the ATOMIC_CMP_SWAPW node. 1549 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 1550 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 1551 NegBitShift, DAG.getConstant(BitSize, WideVT) }; 1552 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 1553 VTList, Ops, array_lengthof(Ops), 1554 NarrowVT, MMO); 1555 return AtomicOp; 1556 } 1557 1558 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 1559 SelectionDAG &DAG) const { 1560 MachineFunction &MF = DAG.getMachineFunction(); 1561 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 1562 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 1563 SystemZ::R15D, Op.getValueType()); 1564 } 1565 1566 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 1567 SelectionDAG &DAG) const { 1568 MachineFunction &MF = DAG.getMachineFunction(); 1569 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 1570 return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op), 1571 SystemZ::R15D, Op.getOperand(1)); 1572 } 1573 1574 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 1575 SelectionDAG &DAG) const { 1576 switch (Op.getOpcode()) { 1577 case ISD::BR_CC: 1578 return lowerBR_CC(Op, DAG); 1579 case ISD::SELECT_CC: 1580 return lowerSELECT_CC(Op, DAG); 1581 case ISD::GlobalAddress: 1582 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 1583 case ISD::GlobalTLSAddress: 1584 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 1585 case ISD::BlockAddress: 1586 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 1587 case ISD::JumpTable: 1588 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 1589 case ISD::ConstantPool: 1590 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 1591 case ISD::BITCAST: 1592 return lowerBITCAST(Op, DAG); 1593 case ISD::VASTART: 1594 return lowerVASTART(Op, DAG); 1595 case ISD::VACOPY: 1596 return lowerVACOPY(Op, DAG); 1597 case ISD::DYNAMIC_STACKALLOC: 1598 return lowerDYNAMIC_STACKALLOC(Op, DAG); 1599 case ISD::UMUL_LOHI: 1600 return lowerUMUL_LOHI(Op, DAG); 1601 case ISD::SDIVREM: 1602 return lowerSDIVREM(Op, DAG); 1603 case ISD::UDIVREM: 1604 return lowerUDIVREM(Op, DAG); 1605 case ISD::OR: 1606 return lowerOR(Op, DAG); 1607 case ISD::ATOMIC_SWAP: 1608 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW); 1609 case ISD::ATOMIC_LOAD_ADD: 1610 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 1611 case ISD::ATOMIC_LOAD_SUB: 1612 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 1613 case ISD::ATOMIC_LOAD_AND: 1614 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 1615 case ISD::ATOMIC_LOAD_OR: 1616 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 1617 case ISD::ATOMIC_LOAD_XOR: 1618 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 1619 case ISD::ATOMIC_LOAD_NAND: 1620 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 1621 case ISD::ATOMIC_LOAD_MIN: 1622 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 1623 case ISD::ATOMIC_LOAD_MAX: 1624 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 1625 case ISD::ATOMIC_LOAD_UMIN: 1626 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 1627 case ISD::ATOMIC_LOAD_UMAX: 1628 return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 1629 case ISD::ATOMIC_CMP_SWAP: 1630 return lowerATOMIC_CMP_SWAP(Op, DAG); 1631 case ISD::STACKSAVE: 1632 return lowerSTACKSAVE(Op, DAG); 1633 case ISD::STACKRESTORE: 1634 return lowerSTACKRESTORE(Op, DAG); 1635 default: 1636 llvm_unreachable("Unexpected node to lower"); 1637 } 1638 } 1639 1640 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 1641 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 1642 switch (Opcode) { 1643 OPCODE(RET_FLAG); 1644 OPCODE(CALL); 1645 OPCODE(PCREL_WRAPPER); 1646 OPCODE(CMP); 1647 OPCODE(UCMP); 1648 OPCODE(BR_CCMASK); 1649 OPCODE(SELECT_CCMASK); 1650 OPCODE(ADJDYNALLOC); 1651 OPCODE(EXTRACT_ACCESS); 1652 OPCODE(UMUL_LOHI64); 1653 OPCODE(SDIVREM64); 1654 OPCODE(UDIVREM32); 1655 OPCODE(UDIVREM64); 1656 OPCODE(MVC); 1657 OPCODE(ATOMIC_SWAPW); 1658 OPCODE(ATOMIC_LOADW_ADD); 1659 OPCODE(ATOMIC_LOADW_SUB); 1660 OPCODE(ATOMIC_LOADW_AND); 1661 OPCODE(ATOMIC_LOADW_OR); 1662 OPCODE(ATOMIC_LOADW_XOR); 1663 OPCODE(ATOMIC_LOADW_NAND); 1664 OPCODE(ATOMIC_LOADW_MIN); 1665 OPCODE(ATOMIC_LOADW_MAX); 1666 OPCODE(ATOMIC_LOADW_UMIN); 1667 OPCODE(ATOMIC_LOADW_UMAX); 1668 OPCODE(ATOMIC_CMP_SWAPW); 1669 } 1670 return NULL; 1671 #undef OPCODE 1672 } 1673 1674 //===----------------------------------------------------------------------===// 1675 // Custom insertion 1676 //===----------------------------------------------------------------------===// 1677 1678 // Create a new basic block after MBB. 1679 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { 1680 MachineFunction &MF = *MBB->getParent(); 1681 MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); 1682 MF.insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB); 1683 return NewMBB; 1684 } 1685 1686 // Split MBB after MI and return the new block (the one that contains 1687 // instructions after MI). 1688 static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, 1689 MachineBasicBlock *MBB) { 1690 MachineBasicBlock *NewMBB = emitBlockAfter(MBB); 1691 NewMBB->splice(NewMBB->begin(), MBB, 1692 llvm::next(MachineBasicBlock::iterator(MI)), 1693 MBB->end()); 1694 NewMBB->transferSuccessorsAndUpdatePHIs(MBB); 1695 return NewMBB; 1696 } 1697 1698 bool SystemZTargetLowering:: 1699 convertPrevCompareToBranch(MachineBasicBlock *MBB, 1700 MachineBasicBlock::iterator MBBI, 1701 unsigned CCMask, MachineBasicBlock *Target) const { 1702 MachineBasicBlock::iterator Compare = MBBI; 1703 MachineBasicBlock::iterator Begin = MBB->begin(); 1704 do 1705 { 1706 if (Compare == Begin) 1707 return false; 1708 --Compare; 1709 } 1710 while (Compare->isDebugValue()); 1711 1712 const SystemZInstrInfo *TII = TM.getInstrInfo(); 1713 unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), 1714 Compare); 1715 if (!FusedOpcode) 1716 return false; 1717 1718 DebugLoc DL = Compare->getDebugLoc(); 1719 BuildMI(*MBB, MBBI, DL, TII->get(FusedOpcode)) 1720 .addOperand(Compare->getOperand(0)).addOperand(Compare->getOperand(1)) 1721 .addImm(CCMask).addMBB(Target); 1722 Compare->removeFromParent(); 1723 return true; 1724 } 1725 1726 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 1727 MachineBasicBlock * 1728 SystemZTargetLowering::emitSelect(MachineInstr *MI, 1729 MachineBasicBlock *MBB) const { 1730 const SystemZInstrInfo *TII = TM.getInstrInfo(); 1731 1732 unsigned DestReg = MI->getOperand(0).getReg(); 1733 unsigned TrueReg = MI->getOperand(1).getReg(); 1734 unsigned FalseReg = MI->getOperand(2).getReg(); 1735 unsigned CCMask = MI->getOperand(3).getImm(); 1736 DebugLoc DL = MI->getDebugLoc(); 1737 1738 MachineBasicBlock *StartMBB = MBB; 1739 MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); 1740 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 1741 1742 // StartMBB: 1743 // BRC CCMask, JoinMBB 1744 // # fallthrough to FalseMBB 1745 // 1746 // The original DAG glues comparisons to their uses, both to ensure 1747 // that no CC-clobbering instructions are inserted between them, and 1748 // to ensure that comparison results are not reused. This means that 1749 // this Select is the sole user of any preceding comparison instruction 1750 // and that we can try to use a fused compare and branch instead. 1751 MBB = StartMBB; 1752 if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB)) 1753 BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB); 1754 MBB->addSuccessor(JoinMBB); 1755 MBB->addSuccessor(FalseMBB); 1756 1757 // FalseMBB: 1758 // # fallthrough to JoinMBB 1759 MBB = FalseMBB; 1760 MBB->addSuccessor(JoinMBB); 1761 1762 // JoinMBB: 1763 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 1764 // ... 1765 MBB = JoinMBB; 1766 BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg) 1767 .addReg(TrueReg).addMBB(StartMBB) 1768 .addReg(FalseReg).addMBB(FalseMBB); 1769 1770 MI->eraseFromParent(); 1771 return JoinMBB; 1772 } 1773 1774 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 1775 // StoreOpcode is the store to use and Invert says whether the store should 1776 // happen when the condition is false rather than true. 1777 MachineBasicBlock * 1778 SystemZTargetLowering::emitCondStore(MachineInstr *MI, 1779 MachineBasicBlock *MBB, 1780 unsigned StoreOpcode, bool Invert) const { 1781 const SystemZInstrInfo *TII = TM.getInstrInfo(); 1782 1783 MachineOperand Base = MI->getOperand(0); 1784 int64_t Disp = MI->getOperand(1).getImm(); 1785 unsigned IndexReg = MI->getOperand(2).getReg(); 1786 unsigned SrcReg = MI->getOperand(3).getReg(); 1787 unsigned CCMask = MI->getOperand(4).getImm(); 1788 DebugLoc DL = MI->getDebugLoc(); 1789 1790 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 1791 1792 // Get the condition needed to branch around the store. 1793 if (!Invert) 1794 CCMask = CCMask ^ SystemZ::CCMASK_ANY; 1795 1796 MachineBasicBlock *StartMBB = MBB; 1797 MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); 1798 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 1799 1800 // StartMBB: 1801 // BRC CCMask, JoinMBB 1802 // # fallthrough to FalseMBB 1803 // 1804 // The original DAG glues comparisons to their uses, both to ensure 1805 // that no CC-clobbering instructions are inserted between them, and 1806 // to ensure that comparison results are not reused. This means that 1807 // this CondStore is the sole user of any preceding comparison instruction 1808 // and that we can try to use a fused compare and branch instead. 1809 MBB = StartMBB; 1810 if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB)) 1811 BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB); 1812 MBB->addSuccessor(JoinMBB); 1813 MBB->addSuccessor(FalseMBB); 1814 1815 // FalseMBB: 1816 // store %SrcReg, %Disp(%Index,%Base) 1817 // # fallthrough to JoinMBB 1818 MBB = FalseMBB; 1819 BuildMI(MBB, DL, TII->get(StoreOpcode)) 1820 .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); 1821 MBB->addSuccessor(JoinMBB); 1822 1823 MI->eraseFromParent(); 1824 return JoinMBB; 1825 } 1826 1827 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* 1828 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that 1829 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. 1830 // BitSize is the width of the field in bits, or 0 if this is a partword 1831 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize 1832 // is one of the operands. Invert says whether the field should be 1833 // inverted after performing BinOpcode (e.g. for NAND). 1834 MachineBasicBlock * 1835 SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, 1836 MachineBasicBlock *MBB, 1837 unsigned BinOpcode, 1838 unsigned BitSize, 1839 bool Invert) const { 1840 const SystemZInstrInfo *TII = TM.getInstrInfo(); 1841 MachineFunction &MF = *MBB->getParent(); 1842 MachineRegisterInfo &MRI = MF.getRegInfo(); 1843 unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); 1844 bool IsSubWord = (BitSize < 32); 1845 1846 // Extract the operands. Base can be a register or a frame index. 1847 // Src2 can be a register or immediate. 1848 unsigned Dest = MI->getOperand(0).getReg(); 1849 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 1850 int64_t Disp = MI->getOperand(2).getImm(); 1851 MachineOperand Src2 = earlyUseOperand(MI->getOperand(3)); 1852 unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); 1853 unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); 1854 DebugLoc DL = MI->getDebugLoc(); 1855 if (IsSubWord) 1856 BitSize = MI->getOperand(6).getImm(); 1857 1858 // Subword operations use 32-bit registers. 1859 const TargetRegisterClass *RC = (BitSize <= 32 ? 1860 &SystemZ::GR32BitRegClass : 1861 &SystemZ::GR64BitRegClass); 1862 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 1863 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 1864 1865 // Get the right opcodes for the displacement. 1866 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 1867 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 1868 assert(LOpcode && CSOpcode && "Displacement out of range"); 1869 1870 // Create virtual registers for temporary results. 1871 unsigned OrigVal = MRI.createVirtualRegister(RC); 1872 unsigned OldVal = MRI.createVirtualRegister(RC); 1873 unsigned NewVal = (BinOpcode || IsSubWord ? 1874 MRI.createVirtualRegister(RC) : Src2.getReg()); 1875 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 1876 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 1877 1878 // Insert a basic block for the main loop. 1879 MachineBasicBlock *StartMBB = MBB; 1880 MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); 1881 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 1882 1883 // StartMBB: 1884 // ... 1885 // %OrigVal = L Disp(%Base) 1886 // # fall through to LoopMMB 1887 MBB = StartMBB; 1888 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 1889 .addOperand(Base).addImm(Disp).addReg(0); 1890 MBB->addSuccessor(LoopMBB); 1891 1892 // LoopMBB: 1893 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 1894 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 1895 // %RotatedNewVal = OP %RotatedOldVal, %Src2 1896 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 1897 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 1898 // JNE LoopMBB 1899 // # fall through to DoneMMB 1900 MBB = LoopMBB; 1901 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 1902 .addReg(OrigVal).addMBB(StartMBB) 1903 .addReg(Dest).addMBB(LoopMBB); 1904 if (IsSubWord) 1905 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 1906 .addReg(OldVal).addReg(BitShift).addImm(0); 1907 if (Invert) { 1908 // Perform the operation normally and then invert every bit of the field. 1909 unsigned Tmp = MRI.createVirtualRegister(RC); 1910 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) 1911 .addReg(RotatedOldVal).addOperand(Src2); 1912 if (BitSize < 32) 1913 // XILF with the upper BitSize bits set. 1914 BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) 1915 .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize))); 1916 else if (BitSize == 32) 1917 // XILF with every bit set. 1918 BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) 1919 .addReg(Tmp).addImm(~uint32_t(0)); 1920 else { 1921 // Use LCGR and add -1 to the result, which is more compact than 1922 // an XILF, XILH pair. 1923 unsigned Tmp2 = MRI.createVirtualRegister(RC); 1924 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); 1925 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) 1926 .addReg(Tmp2).addImm(-1); 1927 } 1928 } else if (BinOpcode) 1929 // A simply binary operation. 1930 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 1931 .addReg(RotatedOldVal).addOperand(Src2); 1932 else if (IsSubWord) 1933 // Use RISBG to rotate Src2 into position and use it to replace the 1934 // field in RotatedOldVal. 1935 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 1936 .addReg(RotatedOldVal).addReg(Src2.getReg()) 1937 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 1938 if (IsSubWord) 1939 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 1940 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 1941 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 1942 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 1943 BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB); 1944 MBB->addSuccessor(LoopMBB); 1945 MBB->addSuccessor(DoneMBB); 1946 1947 MI->eraseFromParent(); 1948 return DoneMBB; 1949 } 1950 1951 // Implement EmitInstrWithCustomInserter for pseudo 1952 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 1953 // instruction that should be used to compare the current field with the 1954 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 1955 // for when the current field should be kept. BitSize is the width of 1956 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. 1957 MachineBasicBlock * 1958 SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, 1959 MachineBasicBlock *MBB, 1960 unsigned CompareOpcode, 1961 unsigned KeepOldMask, 1962 unsigned BitSize) const { 1963 const SystemZInstrInfo *TII = TM.getInstrInfo(); 1964 MachineFunction &MF = *MBB->getParent(); 1965 MachineRegisterInfo &MRI = MF.getRegInfo(); 1966 unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); 1967 bool IsSubWord = (BitSize < 32); 1968 1969 // Extract the operands. Base can be a register or a frame index. 1970 unsigned Dest = MI->getOperand(0).getReg(); 1971 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 1972 int64_t Disp = MI->getOperand(2).getImm(); 1973 unsigned Src2 = MI->getOperand(3).getReg(); 1974 unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); 1975 unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); 1976 DebugLoc DL = MI->getDebugLoc(); 1977 if (IsSubWord) 1978 BitSize = MI->getOperand(6).getImm(); 1979 1980 // Subword operations use 32-bit registers. 1981 const TargetRegisterClass *RC = (BitSize <= 32 ? 1982 &SystemZ::GR32BitRegClass : 1983 &SystemZ::GR64BitRegClass); 1984 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 1985 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 1986 1987 // Get the right opcodes for the displacement. 1988 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 1989 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 1990 assert(LOpcode && CSOpcode && "Displacement out of range"); 1991 1992 // Create virtual registers for temporary results. 1993 unsigned OrigVal = MRI.createVirtualRegister(RC); 1994 unsigned OldVal = MRI.createVirtualRegister(RC); 1995 unsigned NewVal = MRI.createVirtualRegister(RC); 1996 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 1997 unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); 1998 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 1999 2000 // Insert 3 basic blocks for the loop. 2001 MachineBasicBlock *StartMBB = MBB; 2002 MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); 2003 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 2004 MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); 2005 MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); 2006 2007 // StartMBB: 2008 // ... 2009 // %OrigVal = L Disp(%Base) 2010 // # fall through to LoopMMB 2011 MBB = StartMBB; 2012 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 2013 .addOperand(Base).addImm(Disp).addReg(0); 2014 MBB->addSuccessor(LoopMBB); 2015 2016 // LoopMBB: 2017 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 2018 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 2019 // CompareOpcode %RotatedOldVal, %Src2 2020 // BRC KeepOldMask, UpdateMBB 2021 MBB = LoopMBB; 2022 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 2023 .addReg(OrigVal).addMBB(StartMBB) 2024 .addReg(Dest).addMBB(UpdateMBB); 2025 if (IsSubWord) 2026 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 2027 .addReg(OldVal).addReg(BitShift).addImm(0); 2028 unsigned FusedOpcode = TII->getCompareAndBranch(CompareOpcode); 2029 if (FusedOpcode) 2030 BuildMI(MBB, DL, TII->get(FusedOpcode)) 2031 .addReg(RotatedOldVal).addReg(Src2) 2032 .addImm(KeepOldMask).addMBB(UpdateMBB); 2033 else { 2034 BuildMI(MBB, DL, TII->get(CompareOpcode)) 2035 .addReg(RotatedOldVal).addReg(Src2); 2036 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 2037 .addImm(KeepOldMask).addMBB(UpdateMBB); 2038 } 2039 MBB->addSuccessor(UpdateMBB); 2040 MBB->addSuccessor(UseAltMBB); 2041 2042 // UseAltMBB: 2043 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 2044 // # fall through to UpdateMMB 2045 MBB = UseAltMBB; 2046 if (IsSubWord) 2047 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 2048 .addReg(RotatedOldVal).addReg(Src2) 2049 .addImm(32).addImm(31 + BitSize).addImm(0); 2050 MBB->addSuccessor(UpdateMBB); 2051 2052 // UpdateMBB: 2053 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 2054 // [ %RotatedAltVal, UseAltMBB ] 2055 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 2056 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 2057 // JNE LoopMBB 2058 // # fall through to DoneMMB 2059 MBB = UpdateMBB; 2060 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 2061 .addReg(RotatedOldVal).addMBB(LoopMBB) 2062 .addReg(RotatedAltVal).addMBB(UseAltMBB); 2063 if (IsSubWord) 2064 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 2065 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 2066 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 2067 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 2068 BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB); 2069 MBB->addSuccessor(LoopMBB); 2070 MBB->addSuccessor(DoneMBB); 2071 2072 MI->eraseFromParent(); 2073 return DoneMBB; 2074 } 2075 2076 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW 2077 // instruction MI. 2078 MachineBasicBlock * 2079 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, 2080 MachineBasicBlock *MBB) const { 2081 const SystemZInstrInfo *TII = TM.getInstrInfo(); 2082 MachineFunction &MF = *MBB->getParent(); 2083 MachineRegisterInfo &MRI = MF.getRegInfo(); 2084 unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); 2085 2086 // Extract the operands. Base can be a register or a frame index. 2087 unsigned Dest = MI->getOperand(0).getReg(); 2088 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 2089 int64_t Disp = MI->getOperand(2).getImm(); 2090 unsigned OrigCmpVal = MI->getOperand(3).getReg(); 2091 unsigned OrigSwapVal = MI->getOperand(4).getReg(); 2092 unsigned BitShift = MI->getOperand(5).getReg(); 2093 unsigned NegBitShift = MI->getOperand(6).getReg(); 2094 int64_t BitSize = MI->getOperand(7).getImm(); 2095 DebugLoc DL = MI->getDebugLoc(); 2096 2097 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 2098 2099 // Get the right opcodes for the displacement. 2100 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 2101 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 2102 assert(LOpcode && CSOpcode && "Displacement out of range"); 2103 2104 // Create virtual registers for temporary results. 2105 unsigned OrigOldVal = MRI.createVirtualRegister(RC); 2106 unsigned OldVal = MRI.createVirtualRegister(RC); 2107 unsigned CmpVal = MRI.createVirtualRegister(RC); 2108 unsigned SwapVal = MRI.createVirtualRegister(RC); 2109 unsigned StoreVal = MRI.createVirtualRegister(RC); 2110 unsigned RetryOldVal = MRI.createVirtualRegister(RC); 2111 unsigned RetryCmpVal = MRI.createVirtualRegister(RC); 2112 unsigned RetrySwapVal = MRI.createVirtualRegister(RC); 2113 2114 // Insert 2 basic blocks for the loop. 2115 MachineBasicBlock *StartMBB = MBB; 2116 MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); 2117 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 2118 MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); 2119 2120 // StartMBB: 2121 // ... 2122 // %OrigOldVal = L Disp(%Base) 2123 // # fall through to LoopMMB 2124 MBB = StartMBB; 2125 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 2126 .addOperand(Base).addImm(Disp).addReg(0); 2127 MBB->addSuccessor(LoopMBB); 2128 2129 // LoopMBB: 2130 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 2131 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ] 2132 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 2133 // %Dest = RLL %OldVal, BitSize(%BitShift) 2134 // ^^ The low BitSize bits contain the field 2135 // of interest. 2136 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0 2137 // ^^ Replace the upper 32-BitSize bits of the 2138 // comparison value with those that we loaded, 2139 // so that we can use a full word comparison. 2140 // CRJNE %Dest, %RetryCmpVal, DoneMBB 2141 // # Fall through to SetMBB 2142 MBB = LoopMBB; 2143 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 2144 .addReg(OrigOldVal).addMBB(StartMBB) 2145 .addReg(RetryOldVal).addMBB(SetMBB); 2146 BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal) 2147 .addReg(OrigCmpVal).addMBB(StartMBB) 2148 .addReg(RetryCmpVal).addMBB(SetMBB); 2149 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 2150 .addReg(OrigSwapVal).addMBB(StartMBB) 2151 .addReg(RetrySwapVal).addMBB(SetMBB); 2152 BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest) 2153 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 2154 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) 2155 .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 2156 BuildMI(MBB, DL, TII->get(SystemZ::CRJ)) 2157 .addReg(Dest).addReg(RetryCmpVal) 2158 .addImm(MaskNE).addMBB(DoneMBB); 2159 MBB->addSuccessor(DoneMBB); 2160 MBB->addSuccessor(SetMBB); 2161 2162 // SetMBB: 2163 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0 2164 // ^^ Replace the upper 32-BitSize bits of the new 2165 // value with those that we loaded. 2166 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 2167 // ^^ Rotate the new field to its proper position. 2168 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base) 2169 // JNE LoopMBB 2170 // # fall through to ExitMMB 2171 MBB = SetMBB; 2172 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 2173 .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 2174 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 2175 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 2176 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 2177 .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); 2178 BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB); 2179 MBB->addSuccessor(LoopMBB); 2180 MBB->addSuccessor(DoneMBB); 2181 2182 MI->eraseFromParent(); 2183 return DoneMBB; 2184 } 2185 2186 // Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true 2187 // if the high register of the GR128 value must be cleared or false if 2188 // it's "don't care". SubReg is subreg_odd32 when extending a GR32 2189 // and subreg_odd when extending a GR64. 2190 MachineBasicBlock * 2191 SystemZTargetLowering::emitExt128(MachineInstr *MI, 2192 MachineBasicBlock *MBB, 2193 bool ClearEven, unsigned SubReg) const { 2194 const SystemZInstrInfo *TII = TM.getInstrInfo(); 2195 MachineFunction &MF = *MBB->getParent(); 2196 MachineRegisterInfo &MRI = MF.getRegInfo(); 2197 DebugLoc DL = MI->getDebugLoc(); 2198 2199 unsigned Dest = MI->getOperand(0).getReg(); 2200 unsigned Src = MI->getOperand(1).getReg(); 2201 unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 2202 2203 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 2204 if (ClearEven) { 2205 unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 2206 unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 2207 2208 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 2209 .addImm(0); 2210 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 2211 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high); 2212 In128 = NewIn128; 2213 } 2214 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 2215 .addReg(In128).addReg(Src).addImm(SubReg); 2216 2217 MI->eraseFromParent(); 2218 return MBB; 2219 } 2220 2221 MachineBasicBlock * 2222 SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI, 2223 MachineBasicBlock *MBB) const { 2224 const SystemZInstrInfo *TII = TM.getInstrInfo(); 2225 DebugLoc DL = MI->getDebugLoc(); 2226 2227 MachineOperand DestBase = MI->getOperand(0); 2228 uint64_t DestDisp = MI->getOperand(1).getImm(); 2229 MachineOperand SrcBase = MI->getOperand(2); 2230 uint64_t SrcDisp = MI->getOperand(3).getImm(); 2231 uint64_t Length = MI->getOperand(4).getImm(); 2232 2233 BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC)) 2234 .addOperand(DestBase).addImm(DestDisp).addImm(Length) 2235 .addOperand(SrcBase).addImm(SrcDisp); 2236 2237 MI->eraseFromParent(); 2238 return MBB; 2239 } 2240 2241 MachineBasicBlock *SystemZTargetLowering:: 2242 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { 2243 switch (MI->getOpcode()) { 2244 case SystemZ::Select32: 2245 case SystemZ::SelectF32: 2246 case SystemZ::Select64: 2247 case SystemZ::SelectF64: 2248 case SystemZ::SelectF128: 2249 return emitSelect(MI, MBB); 2250 2251 case SystemZ::CondStore8_32: 2252 return emitCondStore(MI, MBB, SystemZ::STC32, false); 2253 case SystemZ::CondStore8_32Inv: 2254 return emitCondStore(MI, MBB, SystemZ::STC32, true); 2255 case SystemZ::CondStore16_32: 2256 return emitCondStore(MI, MBB, SystemZ::STH32, false); 2257 case SystemZ::CondStore16_32Inv: 2258 return emitCondStore(MI, MBB, SystemZ::STH32, true); 2259 case SystemZ::CondStore32_32: 2260 return emitCondStore(MI, MBB, SystemZ::ST32, false); 2261 case SystemZ::CondStore32_32Inv: 2262 return emitCondStore(MI, MBB, SystemZ::ST32, true); 2263 case SystemZ::CondStore8: 2264 return emitCondStore(MI, MBB, SystemZ::STC, false); 2265 case SystemZ::CondStore8Inv: 2266 return emitCondStore(MI, MBB, SystemZ::STC, true); 2267 case SystemZ::CondStore16: 2268 return emitCondStore(MI, MBB, SystemZ::STH, false); 2269 case SystemZ::CondStore16Inv: 2270 return emitCondStore(MI, MBB, SystemZ::STH, true); 2271 case SystemZ::CondStore32: 2272 return emitCondStore(MI, MBB, SystemZ::ST, false); 2273 case SystemZ::CondStore32Inv: 2274 return emitCondStore(MI, MBB, SystemZ::ST, true); 2275 case SystemZ::CondStore64: 2276 return emitCondStore(MI, MBB, SystemZ::STG, false); 2277 case SystemZ::CondStore64Inv: 2278 return emitCondStore(MI, MBB, SystemZ::STG, true); 2279 case SystemZ::CondStoreF32: 2280 return emitCondStore(MI, MBB, SystemZ::STE, false); 2281 case SystemZ::CondStoreF32Inv: 2282 return emitCondStore(MI, MBB, SystemZ::STE, true); 2283 case SystemZ::CondStoreF64: 2284 return emitCondStore(MI, MBB, SystemZ::STD, false); 2285 case SystemZ::CondStoreF64Inv: 2286 return emitCondStore(MI, MBB, SystemZ::STD, true); 2287 2288 case SystemZ::AEXT128_64: 2289 return emitExt128(MI, MBB, false, SystemZ::subreg_low); 2290 case SystemZ::ZEXT128_32: 2291 return emitExt128(MI, MBB, true, SystemZ::subreg_low32); 2292 case SystemZ::ZEXT128_64: 2293 return emitExt128(MI, MBB, true, SystemZ::subreg_low); 2294 2295 case SystemZ::ATOMIC_SWAPW: 2296 return emitAtomicLoadBinary(MI, MBB, 0, 0); 2297 case SystemZ::ATOMIC_SWAP_32: 2298 return emitAtomicLoadBinary(MI, MBB, 0, 32); 2299 case SystemZ::ATOMIC_SWAP_64: 2300 return emitAtomicLoadBinary(MI, MBB, 0, 64); 2301 2302 case SystemZ::ATOMIC_LOADW_AR: 2303 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); 2304 case SystemZ::ATOMIC_LOADW_AFI: 2305 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); 2306 case SystemZ::ATOMIC_LOAD_AR: 2307 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); 2308 case SystemZ::ATOMIC_LOAD_AHI: 2309 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); 2310 case SystemZ::ATOMIC_LOAD_AFI: 2311 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); 2312 case SystemZ::ATOMIC_LOAD_AGR: 2313 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); 2314 case SystemZ::ATOMIC_LOAD_AGHI: 2315 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); 2316 case SystemZ::ATOMIC_LOAD_AGFI: 2317 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); 2318 2319 case SystemZ::ATOMIC_LOADW_SR: 2320 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); 2321 case SystemZ::ATOMIC_LOAD_SR: 2322 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); 2323 case SystemZ::ATOMIC_LOAD_SGR: 2324 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); 2325 2326 case SystemZ::ATOMIC_LOADW_NR: 2327 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); 2328 case SystemZ::ATOMIC_LOADW_NILH: 2329 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0); 2330 case SystemZ::ATOMIC_LOAD_NR: 2331 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); 2332 case SystemZ::ATOMIC_LOAD_NILL32: 2333 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32); 2334 case SystemZ::ATOMIC_LOAD_NILH32: 2335 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32); 2336 case SystemZ::ATOMIC_LOAD_NILF32: 2337 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32); 2338 case SystemZ::ATOMIC_LOAD_NGR: 2339 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); 2340 case SystemZ::ATOMIC_LOAD_NILL: 2341 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64); 2342 case SystemZ::ATOMIC_LOAD_NILH: 2343 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64); 2344 case SystemZ::ATOMIC_LOAD_NIHL: 2345 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64); 2346 case SystemZ::ATOMIC_LOAD_NIHH: 2347 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64); 2348 case SystemZ::ATOMIC_LOAD_NILF: 2349 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64); 2350 case SystemZ::ATOMIC_LOAD_NIHF: 2351 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64); 2352 2353 case SystemZ::ATOMIC_LOADW_OR: 2354 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); 2355 case SystemZ::ATOMIC_LOADW_OILH: 2356 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0); 2357 case SystemZ::ATOMIC_LOAD_OR: 2358 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); 2359 case SystemZ::ATOMIC_LOAD_OILL32: 2360 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32); 2361 case SystemZ::ATOMIC_LOAD_OILH32: 2362 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32); 2363 case SystemZ::ATOMIC_LOAD_OILF32: 2364 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32); 2365 case SystemZ::ATOMIC_LOAD_OGR: 2366 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); 2367 case SystemZ::ATOMIC_LOAD_OILL: 2368 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64); 2369 case SystemZ::ATOMIC_LOAD_OILH: 2370 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64); 2371 case SystemZ::ATOMIC_LOAD_OIHL: 2372 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64); 2373 case SystemZ::ATOMIC_LOAD_OIHH: 2374 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64); 2375 case SystemZ::ATOMIC_LOAD_OILF: 2376 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64); 2377 case SystemZ::ATOMIC_LOAD_OIHF: 2378 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64); 2379 2380 case SystemZ::ATOMIC_LOADW_XR: 2381 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); 2382 case SystemZ::ATOMIC_LOADW_XILF: 2383 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0); 2384 case SystemZ::ATOMIC_LOAD_XR: 2385 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); 2386 case SystemZ::ATOMIC_LOAD_XILF32: 2387 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32); 2388 case SystemZ::ATOMIC_LOAD_XGR: 2389 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); 2390 case SystemZ::ATOMIC_LOAD_XILF: 2391 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64); 2392 case SystemZ::ATOMIC_LOAD_XIHF: 2393 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64); 2394 2395 case SystemZ::ATOMIC_LOADW_NRi: 2396 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); 2397 case SystemZ::ATOMIC_LOADW_NILHi: 2398 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true); 2399 case SystemZ::ATOMIC_LOAD_NRi: 2400 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); 2401 case SystemZ::ATOMIC_LOAD_NILL32i: 2402 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true); 2403 case SystemZ::ATOMIC_LOAD_NILH32i: 2404 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true); 2405 case SystemZ::ATOMIC_LOAD_NILF32i: 2406 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true); 2407 case SystemZ::ATOMIC_LOAD_NGRi: 2408 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); 2409 case SystemZ::ATOMIC_LOAD_NILLi: 2410 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true); 2411 case SystemZ::ATOMIC_LOAD_NILHi: 2412 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true); 2413 case SystemZ::ATOMIC_LOAD_NIHLi: 2414 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true); 2415 case SystemZ::ATOMIC_LOAD_NIHHi: 2416 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true); 2417 case SystemZ::ATOMIC_LOAD_NILFi: 2418 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true); 2419 case SystemZ::ATOMIC_LOAD_NIHFi: 2420 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true); 2421 2422 case SystemZ::ATOMIC_LOADW_MIN: 2423 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 2424 SystemZ::CCMASK_CMP_LE, 0); 2425 case SystemZ::ATOMIC_LOAD_MIN_32: 2426 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 2427 SystemZ::CCMASK_CMP_LE, 32); 2428 case SystemZ::ATOMIC_LOAD_MIN_64: 2429 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 2430 SystemZ::CCMASK_CMP_LE, 64); 2431 2432 case SystemZ::ATOMIC_LOADW_MAX: 2433 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 2434 SystemZ::CCMASK_CMP_GE, 0); 2435 case SystemZ::ATOMIC_LOAD_MAX_32: 2436 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 2437 SystemZ::CCMASK_CMP_GE, 32); 2438 case SystemZ::ATOMIC_LOAD_MAX_64: 2439 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 2440 SystemZ::CCMASK_CMP_GE, 64); 2441 2442 case SystemZ::ATOMIC_LOADW_UMIN: 2443 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 2444 SystemZ::CCMASK_CMP_LE, 0); 2445 case SystemZ::ATOMIC_LOAD_UMIN_32: 2446 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 2447 SystemZ::CCMASK_CMP_LE, 32); 2448 case SystemZ::ATOMIC_LOAD_UMIN_64: 2449 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 2450 SystemZ::CCMASK_CMP_LE, 64); 2451 2452 case SystemZ::ATOMIC_LOADW_UMAX: 2453 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 2454 SystemZ::CCMASK_CMP_GE, 0); 2455 case SystemZ::ATOMIC_LOAD_UMAX_32: 2456 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 2457 SystemZ::CCMASK_CMP_GE, 32); 2458 case SystemZ::ATOMIC_LOAD_UMAX_64: 2459 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 2460 SystemZ::CCMASK_CMP_GE, 64); 2461 2462 case SystemZ::ATOMIC_CMP_SWAPW: 2463 return emitAtomicCmpSwapW(MI, MBB); 2464 case SystemZ::BRC: 2465 // The original DAG glues comparisons to their uses, both to ensure 2466 // that no CC-clobbering instructions are inserted between them, and 2467 // to ensure that comparison results are not reused. This means that 2468 // a BRC is the sole user of a preceding comparison and that we can 2469 // try to use a fused compare and branch instead. 2470 if (convertPrevCompareToBranch(MBB, MI, MI->getOperand(0).getImm(), 2471 MI->getOperand(1).getMBB())) 2472 MI->eraseFromParent(); 2473 return MBB; 2474 case SystemZ::MVCWrapper: 2475 return emitMVCWrapper(MI, MBB); 2476 default: 2477 llvm_unreachable("Unexpected instr type to insert"); 2478 } 2479 } 2480