1 //===- FastISel.cpp - Implementation of the FastISel class ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of the FastISel class. 10 // 11 // "Fast" instruction selection is designed to emit very poor code quickly. 12 // Also, it is not designed to be able to do much lowering, so most illegal 13 // types (e.g. i64 on 32-bit targets) and operations are not supported. It is 14 // also not intended to be able to do much optimization, except in a few cases 15 // where doing optimizations reduces overall compile time. For example, folding 16 // constants into immediate fields is often done, because it's cheap and it 17 // reduces the number of instructions later phases have to examine. 18 // 19 // "Fast" instruction selection is able to fail gracefully and transfer 20 // control to the SelectionDAG selector for operations that it doesn't 21 // support. In many cases, this allows us to avoid duplicating a lot of 22 // the complicated lowering logic that SelectionDAG currently has. 23 // 24 // The intended use for "fast" instruction selection is "-O0" mode 25 // compilation, where the quality of the generated code is irrelevant when 26 // weighed against the speed at which the code can be generated. Also, 27 // at -O0, the LLVM optimizers are not running, and this makes the 28 // compile time of codegen a much higher portion of the overall compile 29 // time. Despite its limitations, "fast" instruction selection is able to 30 // handle enough code on its own to provide noticeable overall speedups 31 // in -O0 compiles. 32 // 33 // Basic operations are supported in a target-independent way, by reading 34 // the same instruction descriptions that the SelectionDAG selector reads, 35 // and identifying simple arithmetic operations that can be directly selected 36 // from simple operators. More complicated operations currently require 37 // target-specific code. 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/CodeGen/FastISel.h" 42 #include "llvm/ADT/APFloat.h" 43 #include "llvm/ADT/APSInt.h" 44 #include "llvm/ADT/DenseMap.h" 45 #include "llvm/ADT/Optional.h" 46 #include "llvm/ADT/SmallPtrSet.h" 47 #include "llvm/ADT/SmallString.h" 48 #include "llvm/ADT/SmallVector.h" 49 #include "llvm/ADT/Statistic.h" 50 #include "llvm/Analysis/BranchProbabilityInfo.h" 51 #include "llvm/Analysis/TargetLibraryInfo.h" 52 #include "llvm/CodeGen/Analysis.h" 53 #include "llvm/CodeGen/FunctionLoweringInfo.h" 54 #include "llvm/CodeGen/ISDOpcodes.h" 55 #include "llvm/CodeGen/MachineBasicBlock.h" 56 #include "llvm/CodeGen/MachineFrameInfo.h" 57 #include "llvm/CodeGen/MachineInstr.h" 58 #include "llvm/CodeGen/MachineInstrBuilder.h" 59 #include "llvm/CodeGen/MachineMemOperand.h" 60 #include "llvm/CodeGen/MachineModuleInfo.h" 61 #include "llvm/CodeGen/MachineOperand.h" 62 #include "llvm/CodeGen/MachineRegisterInfo.h" 63 #include "llvm/CodeGen/StackMaps.h" 64 #include "llvm/CodeGen/TargetInstrInfo.h" 65 #include "llvm/CodeGen/TargetLowering.h" 66 #include "llvm/CodeGen/TargetSubtargetInfo.h" 67 #include "llvm/CodeGen/ValueTypes.h" 68 #include "llvm/IR/Argument.h" 69 #include "llvm/IR/Attributes.h" 70 #include "llvm/IR/BasicBlock.h" 71 #include "llvm/IR/CallingConv.h" 72 #include "llvm/IR/Constant.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/DataLayout.h" 75 #include "llvm/IR/DebugInfo.h" 76 #include "llvm/IR/DebugLoc.h" 77 #include "llvm/IR/DerivedTypes.h" 78 #include "llvm/IR/Function.h" 79 #include "llvm/IR/GetElementPtrTypeIterator.h" 80 #include "llvm/IR/GlobalValue.h" 81 #include "llvm/IR/InlineAsm.h" 82 #include "llvm/IR/InstrTypes.h" 83 #include "llvm/IR/Instruction.h" 84 #include "llvm/IR/Instructions.h" 85 #include "llvm/IR/IntrinsicInst.h" 86 #include "llvm/IR/LLVMContext.h" 87 #include "llvm/IR/Mangler.h" 88 #include "llvm/IR/Metadata.h" 89 #include "llvm/IR/Operator.h" 90 #include "llvm/IR/PatternMatch.h" 91 #include "llvm/IR/Type.h" 92 #include "llvm/IR/User.h" 93 #include "llvm/IR/Value.h" 94 #include "llvm/MC/MCContext.h" 95 #include "llvm/MC/MCInstrDesc.h" 96 #include "llvm/MC/MCRegisterInfo.h" 97 #include "llvm/Support/Casting.h" 98 #include "llvm/Support/Debug.h" 99 #include "llvm/Support/ErrorHandling.h" 100 #include "llvm/Support/MachineValueType.h" 101 #include "llvm/Support/MathExtras.h" 102 #include "llvm/Support/raw_ostream.h" 103 #include "llvm/Target/TargetMachine.h" 104 #include "llvm/Target/TargetOptions.h" 105 #include <algorithm> 106 #include <cassert> 107 #include <cstdint> 108 #include <iterator> 109 #include <utility> 110 111 using namespace llvm; 112 using namespace PatternMatch; 113 114 #define DEBUG_TYPE "isel" 115 116 // FIXME: Remove this after the feature has proven reliable. 117 static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values", 118 cl::init(true), cl::Hidden, 119 cl::desc("Sink local values in FastISel")); 120 121 STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " 122 "target-independent selector"); 123 STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " 124 "target-specific selector"); 125 STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); 126 127 /// Set the current block to which generated machine instructions will be 128 /// appended. 129 void FastISel::startNewBlock() { 130 assert(LocalValueMap.empty() && 131 "local values should be cleared after finishing a BB"); 132 133 // Instructions are appended to FuncInfo.MBB. If the basic block already 134 // contains labels or copies, use the last instruction as the last local 135 // value. 136 EmitStartPt = nullptr; 137 if (!FuncInfo.MBB->empty()) 138 EmitStartPt = &FuncInfo.MBB->back(); 139 LastLocalValue = EmitStartPt; 140 } 141 142 void FastISel::finishBasicBlock() { flushLocalValueMap(); } 143 144 bool FastISel::lowerArguments() { 145 if (!FuncInfo.CanLowerReturn) 146 // Fallback to SDISel argument lowering code to deal with sret pointer 147 // parameter. 148 return false; 149 150 if (!fastLowerArguments()) 151 return false; 152 153 // Enter arguments into ValueMap for uses in non-entry BBs. 154 for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), 155 E = FuncInfo.Fn->arg_end(); 156 I != E; ++I) { 157 DenseMap<const Value *, Register>::iterator VI = LocalValueMap.find(&*I); 158 assert(VI != LocalValueMap.end() && "Missed an argument?"); 159 FuncInfo.ValueMap[&*I] = VI->second; 160 } 161 return true; 162 } 163 164 /// Return the defined register if this instruction defines exactly one 165 /// virtual register and uses no other virtual registers. Otherwise return 0. 166 static Register findLocalRegDef(MachineInstr &MI) { 167 Register RegDef; 168 for (const MachineOperand &MO : MI.operands()) { 169 if (!MO.isReg()) 170 continue; 171 if (MO.isDef()) { 172 if (RegDef) 173 return Register(); 174 RegDef = MO.getReg(); 175 } else if (MO.getReg().isVirtual()) { 176 // This is another use of a vreg. Don't delete it. 177 return Register(); 178 } 179 } 180 return RegDef; 181 } 182 183 static bool isRegUsedByPhiNodes(Register DefReg, 184 FunctionLoweringInfo &FuncInfo) { 185 for (auto &P : FuncInfo.PHINodesToUpdate) 186 if (P.second == DefReg) 187 return true; 188 return false; 189 } 190 191 void FastISel::flushLocalValueMap() { 192 // If FastISel bails out, it could leave local value instructions behind 193 // that aren't used for anything. Detect and erase those. 194 if (LastLocalValue != EmitStartPt) { 195 MachineBasicBlock::reverse_iterator RE = 196 EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) 197 : FuncInfo.MBB->rend(); 198 MachineBasicBlock::reverse_iterator RI(LastLocalValue); 199 for (; RI != RE;) { 200 MachineInstr &LocalMI = *RI; 201 // Increment before erasing what it points to. 202 ++RI; 203 Register DefReg = findLocalRegDef(LocalMI); 204 if (!DefReg) 205 continue; 206 if (FuncInfo.RegsWithFixups.count(DefReg)) 207 continue; 208 bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); 209 if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { 210 if (EmitStartPt == &LocalMI) 211 EmitStartPt = EmitStartPt->getPrevNode(); 212 LLVM_DEBUG(dbgs() << "removing dead local value materialization" 213 << LocalMI); 214 LocalMI.eraseFromParent(); 215 } 216 } 217 } 218 219 LocalValueMap.clear(); 220 LastLocalValue = EmitStartPt; 221 recomputeInsertPt(); 222 SavedInsertPt = FuncInfo.InsertPt; 223 LastFlushPoint = FuncInfo.InsertPt; 224 } 225 226 bool FastISel::hasTrivialKill(const Value *V) { 227 // Don't consider constants or arguments to have trivial kills. 228 const Instruction *I = dyn_cast<Instruction>(V); 229 if (!I) 230 return false; 231 232 // No-op casts are trivially coalesced by fast-isel. 233 if (const auto *Cast = dyn_cast<CastInst>(I)) 234 if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0))) 235 return false; 236 237 // Even the value might have only one use in the LLVM IR, it is possible that 238 // FastISel might fold the use into another instruction and now there is more 239 // than one use at the Machine Instruction level. 240 Register Reg = lookUpRegForValue(V); 241 if (Reg && !MRI.use_empty(Reg)) 242 return false; 243 244 // GEPs with all zero indices are trivially coalesced by fast-isel. 245 if (const auto *GEP = dyn_cast<GetElementPtrInst>(I)) 246 if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) 247 return false; 248 249 // Only instructions with a single use in the same basic block are considered 250 // to have trivial kills. 251 return I->hasOneUse() && 252 !(I->getOpcode() == Instruction::BitCast || 253 I->getOpcode() == Instruction::PtrToInt || 254 I->getOpcode() == Instruction::IntToPtr) && 255 cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); 256 } 257 258 Register FastISel::getRegForValue(const Value *V) { 259 EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true); 260 // Don't handle non-simple values in FastISel. 261 if (!RealVT.isSimple()) 262 return Register(); 263 264 // Ignore illegal types. We must do this before looking up the value 265 // in ValueMap because Arguments are given virtual registers regardless 266 // of whether FastISel can handle them. 267 MVT VT = RealVT.getSimpleVT(); 268 if (!TLI.isTypeLegal(VT)) { 269 // Handle integer promotions, though, because they're common and easy. 270 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 271 VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); 272 else 273 return Register(); 274 } 275 276 // Look up the value to see if we already have a register for it. 277 Register Reg = lookUpRegForValue(V); 278 if (Reg) 279 return Reg; 280 281 // In bottom-up mode, just create the virtual register which will be used 282 // to hold the value. It will be materialized later. 283 if (isa<Instruction>(V) && 284 (!isa<AllocaInst>(V) || 285 !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) 286 return FuncInfo.InitializeRegForValue(V); 287 288 SavePoint SaveInsertPt = enterLocalValueArea(); 289 290 // Materialize the value in a register. Emit any instructions in the 291 // local value area. 292 Reg = materializeRegForValue(V, VT); 293 294 leaveLocalValueArea(SaveInsertPt); 295 296 return Reg; 297 } 298 299 Register FastISel::materializeConstant(const Value *V, MVT VT) { 300 Register Reg; 301 if (const auto *CI = dyn_cast<ConstantInt>(V)) { 302 if (CI->getValue().getActiveBits() <= 64) 303 Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 304 } else if (isa<AllocaInst>(V)) 305 Reg = fastMaterializeAlloca(cast<AllocaInst>(V)); 306 else if (isa<ConstantPointerNull>(V)) 307 // Translate this as an integer zero so that it can be 308 // local-CSE'd with actual integer zeros. 309 Reg = 310 getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType()))); 311 else if (const auto *CF = dyn_cast<ConstantFP>(V)) { 312 if (CF->isNullValue()) 313 Reg = fastMaterializeFloatZero(CF); 314 else 315 // Try to emit the constant directly. 316 Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF); 317 318 if (!Reg) { 319 // Try to emit the constant by using an integer constant with a cast. 320 const APFloat &Flt = CF->getValueAPF(); 321 EVT IntVT = TLI.getPointerTy(DL); 322 uint32_t IntBitWidth = IntVT.getSizeInBits(); 323 APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false); 324 bool isExact; 325 (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact); 326 if (isExact) { 327 Register IntegerReg = 328 getRegForValue(ConstantInt::get(V->getContext(), SIntVal)); 329 if (IntegerReg) 330 Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, 331 /*Op0IsKill=*/false); 332 } 333 } 334 } else if (const auto *Op = dyn_cast<Operator>(V)) { 335 if (!selectOperator(Op, Op->getOpcode())) 336 if (!isa<Instruction>(Op) || 337 !fastSelectInstruction(cast<Instruction>(Op))) 338 return 0; 339 Reg = lookUpRegForValue(Op); 340 } else if (isa<UndefValue>(V)) { 341 Reg = createResultReg(TLI.getRegClassFor(VT)); 342 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 343 TII.get(TargetOpcode::IMPLICIT_DEF), Reg); 344 } 345 return Reg; 346 } 347 348 /// Helper for getRegForValue. This function is called when the value isn't 349 /// already available in a register and must be materialized with new 350 /// instructions. 351 Register FastISel::materializeRegForValue(const Value *V, MVT VT) { 352 Register Reg; 353 // Give the target-specific code a try first. 354 if (isa<Constant>(V)) 355 Reg = fastMaterializeConstant(cast<Constant>(V)); 356 357 // If target-specific code couldn't or didn't want to handle the value, then 358 // give target-independent code a try. 359 if (!Reg) 360 Reg = materializeConstant(V, VT); 361 362 // Don't cache constant materializations in the general ValueMap. 363 // To do so would require tracking what uses they dominate. 364 if (Reg) { 365 LocalValueMap[V] = Reg; 366 LastLocalValue = MRI.getVRegDef(Reg); 367 } 368 return Reg; 369 } 370 371 Register FastISel::lookUpRegForValue(const Value *V) { 372 // Look up the value to see if we already have a register for it. We 373 // cache values defined by Instructions across blocks, and other values 374 // only locally. This is because Instructions already have the SSA 375 // def-dominates-use requirement enforced. 376 DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(V); 377 if (I != FuncInfo.ValueMap.end()) 378 return I->second; 379 return LocalValueMap[V]; 380 } 381 382 void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) { 383 if (!isa<Instruction>(I)) { 384 LocalValueMap[I] = Reg; 385 return; 386 } 387 388 Register &AssignedReg = FuncInfo.ValueMap[I]; 389 if (!AssignedReg) 390 // Use the new register. 391 AssignedReg = Reg; 392 else if (Reg != AssignedReg) { 393 // Arrange for uses of AssignedReg to be replaced by uses of Reg. 394 for (unsigned i = 0; i < NumRegs; i++) { 395 FuncInfo.RegFixups[AssignedReg + i] = Reg + i; 396 FuncInfo.RegsWithFixups.insert(Reg + i); 397 } 398 399 AssignedReg = Reg; 400 } 401 } 402 403 std::pair<Register, bool> FastISel::getRegForGEPIndex(const Value *Idx) { 404 Register IdxN = getRegForValue(Idx); 405 if (!IdxN) 406 // Unhandled operand. Halt "fast" selection and bail. 407 return std::pair<Register, bool>(Register(), false); 408 409 bool IdxNIsKill = hasTrivialKill(Idx); 410 411 // If the index is smaller or larger than intptr_t, truncate or extend it. 412 MVT PtrVT = TLI.getPointerTy(DL); 413 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 414 if (IdxVT.bitsLT(PtrVT)) { 415 IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, 416 IdxNIsKill); 417 IdxNIsKill = true; 418 } else if (IdxVT.bitsGT(PtrVT)) { 419 IdxN = 420 fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); 421 IdxNIsKill = true; 422 } 423 return std::pair<Register, bool>(IdxN, IdxNIsKill); 424 } 425 426 void FastISel::recomputeInsertPt() { 427 if (getLastLocalValue()) { 428 FuncInfo.InsertPt = getLastLocalValue(); 429 FuncInfo.MBB = FuncInfo.InsertPt->getParent(); 430 ++FuncInfo.InsertPt; 431 } else 432 FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); 433 434 // Now skip past any EH_LABELs, which must remain at the beginning. 435 while (FuncInfo.InsertPt != FuncInfo.MBB->end() && 436 FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) 437 ++FuncInfo.InsertPt; 438 } 439 440 void FastISel::removeDeadCode(MachineBasicBlock::iterator I, 441 MachineBasicBlock::iterator E) { 442 assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 && 443 "Invalid iterator!"); 444 while (I != E) { 445 if (LastFlushPoint == I) 446 LastFlushPoint = E; 447 if (SavedInsertPt == I) 448 SavedInsertPt = E; 449 if (EmitStartPt == I) 450 EmitStartPt = E.isValid() ? &*E : nullptr; 451 if (LastLocalValue == I) 452 LastLocalValue = E.isValid() ? &*E : nullptr; 453 454 MachineInstr *Dead = &*I; 455 ++I; 456 Dead->eraseFromParent(); 457 ++NumFastIselDead; 458 } 459 recomputeInsertPt(); 460 } 461 462 FastISel::SavePoint FastISel::enterLocalValueArea() { 463 SavePoint OldInsertPt = FuncInfo.InsertPt; 464 recomputeInsertPt(); 465 return OldInsertPt; 466 } 467 468 void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { 469 if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) 470 LastLocalValue = &*std::prev(FuncInfo.InsertPt); 471 472 // Restore the previous insert position. 473 FuncInfo.InsertPt = OldInsertPt; 474 } 475 476 bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { 477 EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); 478 if (VT == MVT::Other || !VT.isSimple()) 479 // Unhandled type. Halt "fast" selection and bail. 480 return false; 481 482 // We only handle legal types. For example, on x86-32 the instruction 483 // selector contains all of the 64-bit instructions from x86-64, 484 // under the assumption that i64 won't be used if the target doesn't 485 // support it. 486 if (!TLI.isTypeLegal(VT)) { 487 // MVT::i1 is special. Allow AND, OR, or XOR because they 488 // don't require additional zeroing, which makes them easy. 489 if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || 490 ISDOpcode == ISD::XOR)) 491 VT = TLI.getTypeToTransformTo(I->getContext(), VT); 492 else 493 return false; 494 } 495 496 // Check if the first operand is a constant, and handle it as "ri". At -O0, 497 // we don't have anything that canonicalizes operand order. 498 if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0))) 499 if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { 500 Register Op1 = getRegForValue(I->getOperand(1)); 501 if (!Op1) 502 return false; 503 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 504 505 Register ResultReg = 506 fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, 507 CI->getZExtValue(), VT.getSimpleVT()); 508 if (!ResultReg) 509 return false; 510 511 // We successfully emitted code for the given LLVM Instruction. 512 updateValueMap(I, ResultReg); 513 return true; 514 } 515 516 Register Op0 = getRegForValue(I->getOperand(0)); 517 if (!Op0) // Unhandled operand. Halt "fast" selection and bail. 518 return false; 519 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 520 521 // Check if the second operand is a constant and handle it appropriately. 522 if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 523 uint64_t Imm = CI->getSExtValue(); 524 525 // Transform "sdiv exact X, 8" -> "sra X, 3". 526 if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && 527 cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) { 528 Imm = Log2_64(Imm); 529 ISDOpcode = ISD::SRA; 530 } 531 532 // Transform "urem x, pow2" -> "and x, pow2-1". 533 if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && 534 isPowerOf2_64(Imm)) { 535 --Imm; 536 ISDOpcode = ISD::AND; 537 } 538 539 Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, 540 Op0IsKill, Imm, VT.getSimpleVT()); 541 if (!ResultReg) 542 return false; 543 544 // We successfully emitted code for the given LLVM Instruction. 545 updateValueMap(I, ResultReg); 546 return true; 547 } 548 549 Register Op1 = getRegForValue(I->getOperand(1)); 550 if (!Op1) // Unhandled operand. Halt "fast" selection and bail. 551 return false; 552 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 553 554 // Now we have both operands in registers. Emit the instruction. 555 Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), 556 ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); 557 if (!ResultReg) 558 // Target-specific code wasn't able to find a machine opcode for 559 // the given ISD opcode and type. Halt "fast" selection and bail. 560 return false; 561 562 // We successfully emitted code for the given LLVM Instruction. 563 updateValueMap(I, ResultReg); 564 return true; 565 } 566 567 bool FastISel::selectGetElementPtr(const User *I) { 568 Register N = getRegForValue(I->getOperand(0)); 569 if (!N) // Unhandled operand. Halt "fast" selection and bail. 570 return false; 571 572 // FIXME: The code below does not handle vector GEPs. Halt "fast" selection 573 // and bail. 574 if (isa<VectorType>(I->getType())) 575 return false; 576 577 bool NIsKill = hasTrivialKill(I->getOperand(0)); 578 579 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 580 // into a single N = N + TotalOffset. 581 uint64_t TotalOffs = 0; 582 // FIXME: What's a good SWAG number for MaxOffs? 583 uint64_t MaxOffs = 2048; 584 MVT VT = TLI.getPointerTy(DL); 585 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 586 GTI != E; ++GTI) { 587 const Value *Idx = GTI.getOperand(); 588 if (StructType *StTy = GTI.getStructTypeOrNull()) { 589 uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); 590 if (Field) { 591 // N = N + Offset 592 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 593 if (TotalOffs >= MaxOffs) { 594 N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); 595 if (!N) // Unhandled operand. Halt "fast" selection and bail. 596 return false; 597 NIsKill = true; 598 TotalOffs = 0; 599 } 600 } 601 } else { 602 Type *Ty = GTI.getIndexedType(); 603 604 // If this is a constant subscript, handle it quickly. 605 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 606 if (CI->isZero()) 607 continue; 608 // N = N + Offset 609 uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); 610 TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; 611 if (TotalOffs >= MaxOffs) { 612 N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); 613 if (!N) // Unhandled operand. Halt "fast" selection and bail. 614 return false; 615 NIsKill = true; 616 TotalOffs = 0; 617 } 618 continue; 619 } 620 if (TotalOffs) { 621 N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); 622 if (!N) // Unhandled operand. Halt "fast" selection and bail. 623 return false; 624 NIsKill = true; 625 TotalOffs = 0; 626 } 627 628 // N = N + Idx * ElementSize; 629 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 630 std::pair<Register, bool> Pair = getRegForGEPIndex(Idx); 631 Register IdxN = Pair.first; 632 bool IdxNIsKill = Pair.second; 633 if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. 634 return false; 635 636 if (ElementSize != 1) { 637 IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); 638 if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. 639 return false; 640 IdxNIsKill = true; 641 } 642 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 643 if (!N) // Unhandled operand. Halt "fast" selection and bail. 644 return false; 645 } 646 } 647 if (TotalOffs) { 648 N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); 649 if (!N) // Unhandled operand. Halt "fast" selection and bail. 650 return false; 651 } 652 653 // We successfully emitted code for the given LLVM Instruction. 654 updateValueMap(I, N); 655 return true; 656 } 657 658 bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, 659 const CallInst *CI, unsigned StartIdx) { 660 for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { 661 Value *Val = CI->getArgOperand(i); 662 // Check for constants and encode them with a StackMaps::ConstantOp prefix. 663 if (const auto *C = dyn_cast<ConstantInt>(Val)) { 664 Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); 665 Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); 666 } else if (isa<ConstantPointerNull>(Val)) { 667 Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); 668 Ops.push_back(MachineOperand::CreateImm(0)); 669 } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { 670 // Values coming from a stack location also require a special encoding, 671 // but that is added later on by the target specific frame index 672 // elimination implementation. 673 auto SI = FuncInfo.StaticAllocaMap.find(AI); 674 if (SI != FuncInfo.StaticAllocaMap.end()) 675 Ops.push_back(MachineOperand::CreateFI(SI->second)); 676 else 677 return false; 678 } else { 679 Register Reg = getRegForValue(Val); 680 if (!Reg) 681 return false; 682 Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); 683 } 684 } 685 return true; 686 } 687 688 bool FastISel::selectStackmap(const CallInst *I) { 689 // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, 690 // [live variables...]) 691 assert(I->getCalledFunction()->getReturnType()->isVoidTy() && 692 "Stackmap cannot return a value."); 693 694 // The stackmap intrinsic only records the live variables (the arguments 695 // passed to it) and emits NOPS (if requested). Unlike the patchpoint 696 // intrinsic, this won't be lowered to a function call. This means we don't 697 // have to worry about calling conventions and target-specific lowering code. 698 // Instead we perform the call lowering right here. 699 // 700 // CALLSEQ_START(0, 0...) 701 // STACKMAP(id, nbytes, ...) 702 // CALLSEQ_END(0, 0) 703 // 704 SmallVector<MachineOperand, 32> Ops; 705 706 // Add the <id> and <numBytes> constants. 707 assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && 708 "Expected a constant integer."); 709 const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); 710 Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); 711 712 assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && 713 "Expected a constant integer."); 714 const auto *NumBytes = 715 cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); 716 Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); 717 718 // Push live variables for the stack map (skipping the first two arguments 719 // <id> and <numBytes>). 720 if (!addStackMapLiveVars(Ops, I, 2)) 721 return false; 722 723 // We are not adding any register mask info here, because the stackmap doesn't 724 // clobber anything. 725 726 // Add scratch registers as implicit def and early clobber. 727 CallingConv::ID CC = I->getCallingConv(); 728 const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); 729 for (unsigned i = 0; ScratchRegs[i]; ++i) 730 Ops.push_back(MachineOperand::CreateReg( 731 ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, 732 /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); 733 734 // Issue CALLSEQ_START 735 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 736 auto Builder = 737 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); 738 const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); 739 for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) 740 Builder.addImm(0); 741 742 // Issue STACKMAP. 743 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 744 TII.get(TargetOpcode::STACKMAP)); 745 for (auto const &MO : Ops) 746 MIB.add(MO); 747 748 // Issue CALLSEQ_END 749 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 750 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 751 .addImm(0) 752 .addImm(0); 753 754 // Inform the Frame Information that we have a stackmap in this function. 755 FuncInfo.MF->getFrameInfo().setHasStackMap(); 756 757 return true; 758 } 759 760 /// Lower an argument list according to the target calling convention. 761 /// 762 /// This is a helper for lowering intrinsics that follow a target calling 763 /// convention or require stack pointer adjustment. Only a subset of the 764 /// intrinsic's operands need to participate in the calling convention. 765 bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, 766 unsigned NumArgs, const Value *Callee, 767 bool ForceRetVoidTy, CallLoweringInfo &CLI) { 768 ArgListTy Args; 769 Args.reserve(NumArgs); 770 771 // Populate the argument list. 772 for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { 773 Value *V = CI->getOperand(ArgI); 774 775 assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); 776 777 ArgListEntry Entry; 778 Entry.Val = V; 779 Entry.Ty = V->getType(); 780 Entry.setAttributes(CI, ArgI); 781 Args.push_back(Entry); 782 } 783 784 Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext()) 785 : CI->getType(); 786 CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); 787 788 return lowerCallTo(CLI); 789 } 790 791 FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee( 792 const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy, 793 StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) { 794 SmallString<32> MangledName; 795 Mangler::getNameWithPrefix(MangledName, Target, DL); 796 MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName); 797 return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs); 798 } 799 800 bool FastISel::selectPatchpoint(const CallInst *I) { 801 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, 802 // i32 <numBytes>, 803 // i8* <target>, 804 // i32 <numArgs>, 805 // [Args...], 806 // [live variables...]) 807 CallingConv::ID CC = I->getCallingConv(); 808 bool IsAnyRegCC = CC == CallingConv::AnyReg; 809 bool HasDef = !I->getType()->isVoidTy(); 810 Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts(); 811 812 // Get the real number of arguments participating in the call <numArgs> 813 assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && 814 "Expected a constant integer."); 815 const auto *NumArgsVal = 816 cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); 817 unsigned NumArgs = NumArgsVal->getZExtValue(); 818 819 // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> 820 // This includes all meta-operands up to but not including CC. 821 unsigned NumMetaOpers = PatchPointOpers::CCPos; 822 assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && 823 "Not enough arguments provided to the patchpoint intrinsic"); 824 825 // For AnyRegCC the arguments are lowered later on manually. 826 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; 827 CallLoweringInfo CLI; 828 CLI.setIsPatchPoint(); 829 if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) 830 return false; 831 832 assert(CLI.Call && "No call instruction specified."); 833 834 SmallVector<MachineOperand, 32> Ops; 835 836 // Add an explicit result reg if we use the anyreg calling convention. 837 if (IsAnyRegCC && HasDef) { 838 assert(CLI.NumResultRegs == 0 && "Unexpected result register."); 839 CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); 840 CLI.NumResultRegs = 1; 841 Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true)); 842 } 843 844 // Add the <id> and <numBytes> constants. 845 assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && 846 "Expected a constant integer."); 847 const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); 848 Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); 849 850 assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && 851 "Expected a constant integer."); 852 const auto *NumBytes = 853 cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); 854 Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); 855 856 // Add the call target. 857 if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) { 858 uint64_t CalleeConstAddr = 859 cast<ConstantInt>(C->getOperand(0))->getZExtValue(); 860 Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); 861 } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { 862 if (C->getOpcode() == Instruction::IntToPtr) { 863 uint64_t CalleeConstAddr = 864 cast<ConstantInt>(C->getOperand(0))->getZExtValue(); 865 Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); 866 } else 867 llvm_unreachable("Unsupported ConstantExpr."); 868 } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) { 869 Ops.push_back(MachineOperand::CreateGA(GV, 0)); 870 } else if (isa<ConstantPointerNull>(Callee)) 871 Ops.push_back(MachineOperand::CreateImm(0)); 872 else 873 llvm_unreachable("Unsupported callee address."); 874 875 // Adjust <numArgs> to account for any arguments that have been passed on 876 // the stack instead. 877 unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); 878 Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs)); 879 880 // Add the calling convention 881 Ops.push_back(MachineOperand::CreateImm((unsigned)CC)); 882 883 // Add the arguments we omitted previously. The register allocator should 884 // place these in any free register. 885 if (IsAnyRegCC) { 886 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { 887 Register Reg = getRegForValue(I->getArgOperand(i)); 888 if (!Reg) 889 return false; 890 Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); 891 } 892 } 893 894 // Push the arguments from the call instruction. 895 for (auto Reg : CLI.OutRegs) 896 Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); 897 898 // Push live variables for the stack map. 899 if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) 900 return false; 901 902 // Push the register mask info. 903 Ops.push_back(MachineOperand::CreateRegMask( 904 TRI.getCallPreservedMask(*FuncInfo.MF, CC))); 905 906 // Add scratch registers as implicit def and early clobber. 907 const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); 908 for (unsigned i = 0; ScratchRegs[i]; ++i) 909 Ops.push_back(MachineOperand::CreateReg( 910 ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, 911 /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); 912 913 // Add implicit defs (return values). 914 for (auto Reg : CLI.InRegs) 915 Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true, 916 /*isImp=*/true)); 917 918 // Insert the patchpoint instruction before the call generated by the target. 919 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, 920 TII.get(TargetOpcode::PATCHPOINT)); 921 922 for (auto &MO : Ops) 923 MIB.add(MO); 924 925 MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI); 926 927 // Delete the original call instruction. 928 CLI.Call->eraseFromParent(); 929 930 // Inform the Frame Information that we have a patchpoint in this function. 931 FuncInfo.MF->getFrameInfo().setHasPatchPoint(); 932 933 if (CLI.NumResultRegs) 934 updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); 935 return true; 936 } 937 938 bool FastISel::selectXRayCustomEvent(const CallInst *I) { 939 const auto &Triple = TM.getTargetTriple(); 940 if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) 941 return true; // don't do anything to this instruction. 942 SmallVector<MachineOperand, 8> Ops; 943 Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), 944 /*isDef=*/false)); 945 Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), 946 /*isDef=*/false)); 947 MachineInstrBuilder MIB = 948 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 949 TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); 950 for (auto &MO : Ops) 951 MIB.add(MO); 952 953 // Insert the Patchable Event Call instruction, that gets lowered properly. 954 return true; 955 } 956 957 bool FastISel::selectXRayTypedEvent(const CallInst *I) { 958 const auto &Triple = TM.getTargetTriple(); 959 if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) 960 return true; // don't do anything to this instruction. 961 SmallVector<MachineOperand, 8> Ops; 962 Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), 963 /*isDef=*/false)); 964 Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), 965 /*isDef=*/false)); 966 Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), 967 /*isDef=*/false)); 968 MachineInstrBuilder MIB = 969 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 970 TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); 971 for (auto &MO : Ops) 972 MIB.add(MO); 973 974 // Insert the Patchable Typed Event Call instruction, that gets lowered properly. 975 return true; 976 } 977 978 /// Returns an AttributeList representing the attributes applied to the return 979 /// value of the given call. 980 static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) { 981 SmallVector<Attribute::AttrKind, 2> Attrs; 982 if (CLI.RetSExt) 983 Attrs.push_back(Attribute::SExt); 984 if (CLI.RetZExt) 985 Attrs.push_back(Attribute::ZExt); 986 if (CLI.IsInReg) 987 Attrs.push_back(Attribute::InReg); 988 989 return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, 990 Attrs); 991 } 992 993 bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, 994 unsigned NumArgs) { 995 MCContext &Ctx = MF->getContext(); 996 SmallString<32> MangledName; 997 Mangler::getNameWithPrefix(MangledName, SymName, DL); 998 MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName); 999 return lowerCallTo(CI, Sym, NumArgs); 1000 } 1001 1002 bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, 1003 unsigned NumArgs) { 1004 FunctionType *FTy = CI->getFunctionType(); 1005 Type *RetTy = CI->getType(); 1006 1007 ArgListTy Args; 1008 Args.reserve(NumArgs); 1009 1010 // Populate the argument list. 1011 // Attributes for args start at offset 1, after the return attribute. 1012 for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) { 1013 Value *V = CI->getOperand(ArgI); 1014 1015 assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); 1016 1017 ArgListEntry Entry; 1018 Entry.Val = V; 1019 Entry.Ty = V->getType(); 1020 Entry.setAttributes(CI, ArgI); 1021 Args.push_back(Entry); 1022 } 1023 TLI.markLibCallAttributes(MF, CI->getCallingConv(), Args); 1024 1025 CallLoweringInfo CLI; 1026 CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), *CI, NumArgs); 1027 1028 return lowerCallTo(CLI); 1029 } 1030 1031 bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { 1032 // Handle the incoming return values from the call. 1033 CLI.clearIns(); 1034 SmallVector<EVT, 4> RetTys; 1035 ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys); 1036 1037 SmallVector<ISD::OutputArg, 4> Outs; 1038 GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL); 1039 1040 bool CanLowerReturn = TLI.CanLowerReturn( 1041 CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); 1042 1043 // FIXME: sret demotion isn't supported yet - bail out. 1044 if (!CanLowerReturn) 1045 return false; 1046 1047 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 1048 EVT VT = RetTys[I]; 1049 MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); 1050 unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); 1051 for (unsigned i = 0; i != NumRegs; ++i) { 1052 ISD::InputArg MyFlags; 1053 MyFlags.VT = RegisterVT; 1054 MyFlags.ArgVT = VT; 1055 MyFlags.Used = CLI.IsReturnValueUsed; 1056 if (CLI.RetSExt) 1057 MyFlags.Flags.setSExt(); 1058 if (CLI.RetZExt) 1059 MyFlags.Flags.setZExt(); 1060 if (CLI.IsInReg) 1061 MyFlags.Flags.setInReg(); 1062 CLI.Ins.push_back(MyFlags); 1063 } 1064 } 1065 1066 // Handle all of the outgoing arguments. 1067 CLI.clearOuts(); 1068 for (auto &Arg : CLI.getArgs()) { 1069 Type *FinalType = Arg.Ty; 1070 if (Arg.IsByVal) 1071 FinalType = cast<PointerType>(Arg.Ty)->getElementType(); 1072 bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( 1073 FinalType, CLI.CallConv, CLI.IsVarArg); 1074 1075 ISD::ArgFlagsTy Flags; 1076 if (Arg.IsZExt) 1077 Flags.setZExt(); 1078 if (Arg.IsSExt) 1079 Flags.setSExt(); 1080 if (Arg.IsInReg) 1081 Flags.setInReg(); 1082 if (Arg.IsSRet) 1083 Flags.setSRet(); 1084 if (Arg.IsSwiftSelf) 1085 Flags.setSwiftSelf(); 1086 if (Arg.IsSwiftError) 1087 Flags.setSwiftError(); 1088 if (Arg.IsCFGuardTarget) 1089 Flags.setCFGuardTarget(); 1090 if (Arg.IsByVal) 1091 Flags.setByVal(); 1092 if (Arg.IsInAlloca) { 1093 Flags.setInAlloca(); 1094 // Set the byval flag for CCAssignFn callbacks that don't know about 1095 // inalloca. This way we can know how many bytes we should've allocated 1096 // and how many bytes a callee cleanup function will pop. If we port 1097 // inalloca to more targets, we'll have to add custom inalloca handling in 1098 // the various CC lowering callbacks. 1099 Flags.setByVal(); 1100 } 1101 if (Arg.IsPreallocated) { 1102 Flags.setPreallocated(); 1103 // Set the byval flag for CCAssignFn callbacks that don't know about 1104 // preallocated. This way we can know how many bytes we should've 1105 // allocated and how many bytes a callee cleanup function will pop. If we 1106 // port preallocated to more targets, we'll have to add custom 1107 // preallocated handling in the various CC lowering callbacks. 1108 Flags.setByVal(); 1109 } 1110 if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) { 1111 PointerType *Ty = cast<PointerType>(Arg.Ty); 1112 Type *ElementTy = Ty->getElementType(); 1113 unsigned FrameSize = 1114 DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy); 1115 1116 // For ByVal, alignment should come from FE. BE will guess if this info 1117 // is not there, but there are cases it cannot get right. 1118 MaybeAlign FrameAlign = Arg.Alignment; 1119 if (!FrameAlign) 1120 FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL)); 1121 Flags.setByValSize(FrameSize); 1122 Flags.setByValAlign(*FrameAlign); 1123 } 1124 if (Arg.IsNest) 1125 Flags.setNest(); 1126 if (NeedsRegBlock) 1127 Flags.setInConsecutiveRegs(); 1128 Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); 1129 1130 CLI.OutVals.push_back(Arg.Val); 1131 CLI.OutFlags.push_back(Flags); 1132 } 1133 1134 if (!fastLowerCall(CLI)) 1135 return false; 1136 1137 // Set all unused physreg defs as dead. 1138 assert(CLI.Call && "No call instruction specified."); 1139 CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); 1140 1141 if (CLI.NumResultRegs && CLI.CB) 1142 updateValueMap(CLI.CB, CLI.ResultReg, CLI.NumResultRegs); 1143 1144 // Set labels for heapallocsite call. 1145 if (CLI.CB) 1146 if (MDNode *MD = CLI.CB->getMetadata("heapallocsite")) 1147 CLI.Call->setHeapAllocMarker(*MF, MD); 1148 1149 return true; 1150 } 1151 1152 bool FastISel::lowerCall(const CallInst *CI) { 1153 FunctionType *FuncTy = CI->getFunctionType(); 1154 Type *RetTy = CI->getType(); 1155 1156 ArgListTy Args; 1157 ArgListEntry Entry; 1158 Args.reserve(CI->arg_size()); 1159 1160 for (auto i = CI->arg_begin(), e = CI->arg_end(); i != e; ++i) { 1161 Value *V = *i; 1162 1163 // Skip empty types 1164 if (V->getType()->isEmptyTy()) 1165 continue; 1166 1167 Entry.Val = V; 1168 Entry.Ty = V->getType(); 1169 1170 // Skip the first return-type Attribute to get to params. 1171 Entry.setAttributes(CI, i - CI->arg_begin()); 1172 Args.push_back(Entry); 1173 } 1174 1175 // Check if target-independent constraints permit a tail call here. 1176 // Target-dependent constraints are checked within fastLowerCall. 1177 bool IsTailCall = CI->isTailCall(); 1178 if (IsTailCall && !isInTailCallPosition(*CI, TM)) 1179 IsTailCall = false; 1180 if (IsTailCall && MF->getFunction() 1181 .getFnAttribute("disable-tail-calls") 1182 .getValueAsString() == "true") 1183 IsTailCall = false; 1184 1185 CallLoweringInfo CLI; 1186 CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI) 1187 .setTailCall(IsTailCall); 1188 1189 return lowerCallTo(CLI); 1190 } 1191 1192 bool FastISel::selectCall(const User *I) { 1193 const CallInst *Call = cast<CallInst>(I); 1194 1195 // Handle simple inline asms. 1196 if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) { 1197 // If the inline asm has side effects, then make sure that no local value 1198 // lives across by flushing the local value map. 1199 if (IA->hasSideEffects()) 1200 flushLocalValueMap(); 1201 1202 // Don't attempt to handle constraints. 1203 if (!IA->getConstraintString().empty()) 1204 return false; 1205 1206 unsigned ExtraInfo = 0; 1207 if (IA->hasSideEffects()) 1208 ExtraInfo |= InlineAsm::Extra_HasSideEffects; 1209 if (IA->isAlignStack()) 1210 ExtraInfo |= InlineAsm::Extra_IsAlignStack; 1211 if (Call->isConvergent()) 1212 ExtraInfo |= InlineAsm::Extra_IsConvergent; 1213 ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; 1214 1215 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1216 TII.get(TargetOpcode::INLINEASM)); 1217 MIB.addExternalSymbol(IA->getAsmString().c_str()); 1218 MIB.addImm(ExtraInfo); 1219 1220 const MDNode *SrcLoc = Call->getMetadata("srcloc"); 1221 if (SrcLoc) 1222 MIB.addMetadata(SrcLoc); 1223 1224 return true; 1225 } 1226 1227 // Handle intrinsic function calls. 1228 if (const auto *II = dyn_cast<IntrinsicInst>(Call)) 1229 return selectIntrinsicCall(II); 1230 1231 // Usually, it does not make sense to initialize a value, 1232 // make an unrelated function call and use the value, because 1233 // it tends to be spilled on the stack. So, we move the pointer 1234 // to the last local value to the beginning of the block, so that 1235 // all the values which have already been materialized, 1236 // appear after the call. It also makes sense to skip intrinsics 1237 // since they tend to be inlined. 1238 flushLocalValueMap(); 1239 1240 return lowerCall(Call); 1241 } 1242 1243 bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { 1244 switch (II->getIntrinsicID()) { 1245 default: 1246 break; 1247 // At -O0 we don't care about the lifetime intrinsics. 1248 case Intrinsic::lifetime_start: 1249 case Intrinsic::lifetime_end: 1250 // The donothing intrinsic does, well, nothing. 1251 case Intrinsic::donothing: 1252 // Neither does the sideeffect intrinsic. 1253 case Intrinsic::sideeffect: 1254 // Neither does the assume intrinsic; it's also OK not to codegen its operand. 1255 case Intrinsic::assume: 1256 return true; 1257 case Intrinsic::dbg_declare: { 1258 const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); 1259 assert(DI->getVariable() && "Missing variable"); 1260 if (!FuncInfo.MF->getMMI().hasDebugInfo()) { 1261 LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI 1262 << " (!hasDebugInfo)\n"); 1263 return true; 1264 } 1265 1266 const Value *Address = DI->getAddress(); 1267 if (!Address || isa<UndefValue>(Address)) { 1268 LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI 1269 << " (bad/undef address)\n"); 1270 return true; 1271 } 1272 1273 // Byval arguments with frame indices were already handled after argument 1274 // lowering and before isel. 1275 const auto *Arg = 1276 dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); 1277 if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) 1278 return true; 1279 1280 Optional<MachineOperand> Op; 1281 if (Register Reg = lookUpRegForValue(Address)) 1282 Op = MachineOperand::CreateReg(Reg, false); 1283 1284 // If we have a VLA that has a "use" in a metadata node that's then used 1285 // here but it has no other uses, then we have a problem. E.g., 1286 // 1287 // int foo (const int *x) { 1288 // char a[*x]; 1289 // return 0; 1290 // } 1291 // 1292 // If we assign 'a' a vreg and fast isel later on has to use the selection 1293 // DAG isel, it will want to copy the value to the vreg. However, there are 1294 // no uses, which goes counter to what selection DAG isel expects. 1295 if (!Op && !Address->use_empty() && isa<Instruction>(Address) && 1296 (!isa<AllocaInst>(Address) || 1297 !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) 1298 Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), 1299 false); 1300 1301 if (Op) { 1302 assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && 1303 "Expected inlined-at fields to agree"); 1304 // A dbg.declare describes the address of a source variable, so lower it 1305 // into an indirect DBG_VALUE. 1306 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1307 TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, 1308 *Op, DI->getVariable(), DI->getExpression()); 1309 } else { 1310 // We can't yet handle anything else here because it would require 1311 // generating code, thus altering codegen because of debug info. 1312 LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI 1313 << " (no materialized reg for address)\n"); 1314 } 1315 return true; 1316 } 1317 case Intrinsic::dbg_value: { 1318 // This form of DBG_VALUE is target-independent. 1319 const DbgValueInst *DI = cast<DbgValueInst>(II); 1320 const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 1321 const Value *V = DI->getValue(); 1322 assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && 1323 "Expected inlined-at fields to agree"); 1324 if (!V || isa<UndefValue>(V)) { 1325 // Currently the optimizer can produce this; insert an undef to 1326 // help debugging. 1327 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, 1328 DI->getVariable(), DI->getExpression()); 1329 } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { 1330 if (CI->getBitWidth() > 64) 1331 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 1332 .addCImm(CI) 1333 .addImm(0U) 1334 .addMetadata(DI->getVariable()) 1335 .addMetadata(DI->getExpression()); 1336 else 1337 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 1338 .addImm(CI->getZExtValue()) 1339 .addImm(0U) 1340 .addMetadata(DI->getVariable()) 1341 .addMetadata(DI->getExpression()); 1342 } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { 1343 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 1344 .addFPImm(CF) 1345 .addImm(0U) 1346 .addMetadata(DI->getVariable()) 1347 .addMetadata(DI->getExpression()); 1348 } else if (Register Reg = lookUpRegForValue(V)) { 1349 // FIXME: This does not handle register-indirect values at offset 0. 1350 bool IsIndirect = false; 1351 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, 1352 DI->getVariable(), DI->getExpression()); 1353 } else { 1354 // We don't know how to handle other cases, so we drop. 1355 LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); 1356 } 1357 return true; 1358 } 1359 case Intrinsic::dbg_label: { 1360 const DbgLabelInst *DI = cast<DbgLabelInst>(II); 1361 assert(DI->getLabel() && "Missing label"); 1362 if (!FuncInfo.MF->getMMI().hasDebugInfo()) { 1363 LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); 1364 return true; 1365 } 1366 1367 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1368 TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); 1369 return true; 1370 } 1371 case Intrinsic::objectsize: 1372 llvm_unreachable("llvm.objectsize.* should have been lowered already"); 1373 1374 case Intrinsic::is_constant: 1375 llvm_unreachable("llvm.is.constant.* should have been lowered already"); 1376 1377 case Intrinsic::launder_invariant_group: 1378 case Intrinsic::strip_invariant_group: 1379 case Intrinsic::expect: { 1380 Register ResultReg = getRegForValue(II->getArgOperand(0)); 1381 if (!ResultReg) 1382 return false; 1383 updateValueMap(II, ResultReg); 1384 return true; 1385 } 1386 case Intrinsic::experimental_stackmap: 1387 return selectStackmap(II); 1388 case Intrinsic::experimental_patchpoint_void: 1389 case Intrinsic::experimental_patchpoint_i64: 1390 return selectPatchpoint(II); 1391 1392 case Intrinsic::xray_customevent: 1393 return selectXRayCustomEvent(II); 1394 case Intrinsic::xray_typedevent: 1395 return selectXRayTypedEvent(II); 1396 1397 case Intrinsic::memcpy: 1398 case Intrinsic::memcpy_element_unordered_atomic: 1399 case Intrinsic::memcpy_inline: 1400 case Intrinsic::memmove: 1401 case Intrinsic::memmove_element_unordered_atomic: 1402 case Intrinsic::memset: 1403 case Intrinsic::memset_element_unordered_atomic: 1404 // Flush the local value map just like we do for regular calls, 1405 // to avoid excessive spills and reloads. 1406 // These intrinsics mostly turn into library calls at O0; and 1407 // even memcpy_inline should be treated like one for this purpose. 1408 flushLocalValueMap(); 1409 break; 1410 } 1411 1412 return fastLowerIntrinsicCall(II); 1413 } 1414 1415 bool FastISel::selectCast(const User *I, unsigned Opcode) { 1416 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 1417 EVT DstVT = TLI.getValueType(DL, I->getType()); 1418 1419 if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || 1420 !DstVT.isSimple()) 1421 // Unhandled type. Halt "fast" selection and bail. 1422 return false; 1423 1424 // Check if the destination type is legal. 1425 if (!TLI.isTypeLegal(DstVT)) 1426 return false; 1427 1428 // Check if the source operand is legal. 1429 if (!TLI.isTypeLegal(SrcVT)) 1430 return false; 1431 1432 Register InputReg = getRegForValue(I->getOperand(0)); 1433 if (!InputReg) 1434 // Unhandled operand. Halt "fast" selection and bail. 1435 return false; 1436 1437 bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); 1438 1439 Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), 1440 Opcode, InputReg, InputRegIsKill); 1441 if (!ResultReg) 1442 return false; 1443 1444 updateValueMap(I, ResultReg); 1445 return true; 1446 } 1447 1448 bool FastISel::selectBitCast(const User *I) { 1449 // If the bitcast doesn't change the type, just use the operand value. 1450 if (I->getType() == I->getOperand(0)->getType()) { 1451 Register Reg = getRegForValue(I->getOperand(0)); 1452 if (!Reg) 1453 return false; 1454 updateValueMap(I, Reg); 1455 return true; 1456 } 1457 1458 // Bitcasts of other values become reg-reg copies or BITCAST operators. 1459 EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 1460 EVT DstEVT = TLI.getValueType(DL, I->getType()); 1461 if (SrcEVT == MVT::Other || DstEVT == MVT::Other || 1462 !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) 1463 // Unhandled type. Halt "fast" selection and bail. 1464 return false; 1465 1466 MVT SrcVT = SrcEVT.getSimpleVT(); 1467 MVT DstVT = DstEVT.getSimpleVT(); 1468 Register Op0 = getRegForValue(I->getOperand(0)); 1469 if (!Op0) // Unhandled operand. Halt "fast" selection and bail. 1470 return false; 1471 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 1472 1473 // First, try to perform the bitcast by inserting a reg-reg copy. 1474 Register ResultReg; 1475 if (SrcVT == DstVT) { 1476 const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); 1477 const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); 1478 // Don't attempt a cross-class copy. It will likely fail. 1479 if (SrcClass == DstClass) { 1480 ResultReg = createResultReg(DstClass); 1481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1482 TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); 1483 } 1484 } 1485 1486 // If the reg-reg copy failed, select a BITCAST opcode. 1487 if (!ResultReg) 1488 ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); 1489 1490 if (!ResultReg) 1491 return false; 1492 1493 updateValueMap(I, ResultReg); 1494 return true; 1495 } 1496 1497 bool FastISel::selectFreeze(const User *I) { 1498 Register Reg = getRegForValue(I->getOperand(0)); 1499 if (!Reg) 1500 // Unhandled operand. 1501 return false; 1502 1503 EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType()); 1504 if (ETy == MVT::Other || !TLI.isTypeLegal(ETy)) 1505 // Unhandled type, bail out. 1506 return false; 1507 1508 MVT Ty = ETy.getSimpleVT(); 1509 const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); 1510 Register ResultReg = createResultReg(TyRegClass); 1511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1512 TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); 1513 1514 updateValueMap(I, ResultReg); 1515 return true; 1516 } 1517 1518 // Remove local value instructions starting from the instruction after 1519 // SavedLastLocalValue to the current function insert point. 1520 void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) 1521 { 1522 MachineInstr *CurLastLocalValue = getLastLocalValue(); 1523 if (CurLastLocalValue != SavedLastLocalValue) { 1524 // Find the first local value instruction to be deleted. 1525 // This is the instruction after SavedLastLocalValue if it is non-NULL. 1526 // Otherwise it's the first instruction in the block. 1527 MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); 1528 if (SavedLastLocalValue) 1529 ++FirstDeadInst; 1530 else 1531 FirstDeadInst = FuncInfo.MBB->getFirstNonPHI(); 1532 setLastLocalValue(SavedLastLocalValue); 1533 removeDeadCode(FirstDeadInst, FuncInfo.InsertPt); 1534 } 1535 } 1536 1537 bool FastISel::selectInstruction(const Instruction *I) { 1538 // Flush the local value map before starting each instruction. 1539 // This improves locality and debugging, and can reduce spills. 1540 // Reuse of values across IR instructions is relatively uncommon. 1541 flushLocalValueMap(); 1542 1543 MachineInstr *SavedLastLocalValue = getLastLocalValue(); 1544 // Just before the terminator instruction, insert instructions to 1545 // feed PHI nodes in successor blocks. 1546 if (I->isTerminator()) { 1547 if (!handlePHINodesInSuccessorBlocks(I->getParent())) { 1548 // PHI node handling may have generated local value instructions, 1549 // even though it failed to handle all PHI nodes. 1550 // We remove these instructions because SelectionDAGISel will generate 1551 // them again. 1552 removeDeadLocalValueCode(SavedLastLocalValue); 1553 return false; 1554 } 1555 } 1556 1557 // FastISel does not handle any operand bundles except OB_funclet. 1558 if (auto *Call = dyn_cast<CallBase>(I)) 1559 for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i) 1560 if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) 1561 return false; 1562 1563 DbgLoc = I->getDebugLoc(); 1564 1565 SavedInsertPt = FuncInfo.InsertPt; 1566 1567 if (const auto *Call = dyn_cast<CallInst>(I)) { 1568 const Function *F = Call->getCalledFunction(); 1569 LibFunc Func; 1570 1571 // As a special case, don't handle calls to builtin library functions that 1572 // may be translated directly to target instructions. 1573 if (F && !F->hasLocalLinkage() && F->hasName() && 1574 LibInfo->getLibFunc(F->getName(), Func) && 1575 LibInfo->hasOptimizedCodeGen(Func)) 1576 return false; 1577 1578 // Don't handle Intrinsic::trap if a trap function is specified. 1579 if (F && F->getIntrinsicID() == Intrinsic::trap && 1580 Call->hasFnAttr("trap-func-name")) 1581 return false; 1582 } 1583 1584 // First, try doing target-independent selection. 1585 if (!SkipTargetIndependentISel) { 1586 if (selectOperator(I, I->getOpcode())) { 1587 ++NumFastIselSuccessIndependent; 1588 DbgLoc = DebugLoc(); 1589 return true; 1590 } 1591 // Remove dead code. 1592 recomputeInsertPt(); 1593 if (SavedInsertPt != FuncInfo.InsertPt) 1594 removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); 1595 SavedInsertPt = FuncInfo.InsertPt; 1596 } 1597 // Next, try calling the target to attempt to handle the instruction. 1598 if (fastSelectInstruction(I)) { 1599 ++NumFastIselSuccessTarget; 1600 DbgLoc = DebugLoc(); 1601 return true; 1602 } 1603 // Remove dead code. 1604 recomputeInsertPt(); 1605 if (SavedInsertPt != FuncInfo.InsertPt) 1606 removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); 1607 1608 DbgLoc = DebugLoc(); 1609 // Undo phi node updates, because they will be added again by SelectionDAG. 1610 if (I->isTerminator()) { 1611 // PHI node handling may have generated local value instructions. 1612 // We remove them because SelectionDAGISel will generate them again. 1613 removeDeadLocalValueCode(SavedLastLocalValue); 1614 FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); 1615 } 1616 return false; 1617 } 1618 1619 /// Emit an unconditional branch to the given block, unless it is the immediate 1620 /// (fall-through) successor, and update the CFG. 1621 void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, 1622 const DebugLoc &DbgLoc) { 1623 if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 && 1624 FuncInfo.MBB->isLayoutSuccessor(MSucc)) { 1625 // For more accurate line information if this is the only non-debug 1626 // instruction in the block then emit it, otherwise we have the 1627 // unconditional fall-through case, which needs no instructions. 1628 } else { 1629 // The unconditional branch case. 1630 TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr, 1631 SmallVector<MachineOperand, 0>(), DbgLoc); 1632 } 1633 if (FuncInfo.BPI) { 1634 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 1635 FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock()); 1636 FuncInfo.MBB->addSuccessor(MSucc, BranchProbability); 1637 } else 1638 FuncInfo.MBB->addSuccessorWithoutProb(MSucc); 1639 } 1640 1641 void FastISel::finishCondBranch(const BasicBlock *BranchBB, 1642 MachineBasicBlock *TrueMBB, 1643 MachineBasicBlock *FalseMBB) { 1644 // Add TrueMBB as successor unless it is equal to the FalseMBB: This can 1645 // happen in degenerate IR and MachineIR forbids to have a block twice in the 1646 // successor/predecessor lists. 1647 if (TrueMBB != FalseMBB) { 1648 if (FuncInfo.BPI) { 1649 auto BranchProbability = 1650 FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock()); 1651 FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability); 1652 } else 1653 FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); 1654 } 1655 1656 fastEmitBranch(FalseMBB, DbgLoc); 1657 } 1658 1659 /// Emit an FNeg operation. 1660 bool FastISel::selectFNeg(const User *I, const Value *In) { 1661 Register OpReg = getRegForValue(In); 1662 if (!OpReg) 1663 return false; 1664 bool OpRegIsKill = hasTrivialKill(In); 1665 1666 // If the target has ISD::FNEG, use it. 1667 EVT VT = TLI.getValueType(DL, I->getType()); 1668 Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, 1669 OpReg, OpRegIsKill); 1670 if (ResultReg) { 1671 updateValueMap(I, ResultReg); 1672 return true; 1673 } 1674 1675 // Bitcast the value to integer, twiddle the sign bit with xor, 1676 // and then bitcast it back to floating-point. 1677 if (VT.getSizeInBits() > 64) 1678 return false; 1679 EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); 1680 if (!TLI.isTypeLegal(IntVT)) 1681 return false; 1682 1683 Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), 1684 ISD::BITCAST, OpReg, OpRegIsKill); 1685 if (!IntReg) 1686 return false; 1687 1688 Register IntResultReg = fastEmit_ri_( 1689 IntVT.getSimpleVT(), ISD::XOR, IntReg, /*Op0IsKill=*/true, 1690 UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); 1691 if (!IntResultReg) 1692 return false; 1693 1694 ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, 1695 IntResultReg, /*Op0IsKill=*/true); 1696 if (!ResultReg) 1697 return false; 1698 1699 updateValueMap(I, ResultReg); 1700 return true; 1701 } 1702 1703 bool FastISel::selectExtractValue(const User *U) { 1704 const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); 1705 if (!EVI) 1706 return false; 1707 1708 // Make sure we only try to handle extracts with a legal result. But also 1709 // allow i1 because it's easy. 1710 EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true); 1711 if (!RealVT.isSimple()) 1712 return false; 1713 MVT VT = RealVT.getSimpleVT(); 1714 if (!TLI.isTypeLegal(VT) && VT != MVT::i1) 1715 return false; 1716 1717 const Value *Op0 = EVI->getOperand(0); 1718 Type *AggTy = Op0->getType(); 1719 1720 // Get the base result register. 1721 unsigned ResultReg; 1722 DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(Op0); 1723 if (I != FuncInfo.ValueMap.end()) 1724 ResultReg = I->second; 1725 else if (isa<Instruction>(Op0)) 1726 ResultReg = FuncInfo.InitializeRegForValue(Op0); 1727 else 1728 return false; // fast-isel can't handle aggregate constants at the moment 1729 1730 // Get the actual result register, which is an offset from the base register. 1731 unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); 1732 1733 SmallVector<EVT, 4> AggValueVTs; 1734 ComputeValueVTs(TLI, DL, AggTy, AggValueVTs); 1735 1736 for (unsigned i = 0; i < VTIndex; i++) 1737 ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); 1738 1739 updateValueMap(EVI, ResultReg); 1740 return true; 1741 } 1742 1743 bool FastISel::selectOperator(const User *I, unsigned Opcode) { 1744 switch (Opcode) { 1745 case Instruction::Add: 1746 return selectBinaryOp(I, ISD::ADD); 1747 case Instruction::FAdd: 1748 return selectBinaryOp(I, ISD::FADD); 1749 case Instruction::Sub: 1750 return selectBinaryOp(I, ISD::SUB); 1751 case Instruction::FSub: 1752 return selectBinaryOp(I, ISD::FSUB); 1753 case Instruction::Mul: 1754 return selectBinaryOp(I, ISD::MUL); 1755 case Instruction::FMul: 1756 return selectBinaryOp(I, ISD::FMUL); 1757 case Instruction::SDiv: 1758 return selectBinaryOp(I, ISD::SDIV); 1759 case Instruction::UDiv: 1760 return selectBinaryOp(I, ISD::UDIV); 1761 case Instruction::FDiv: 1762 return selectBinaryOp(I, ISD::FDIV); 1763 case Instruction::SRem: 1764 return selectBinaryOp(I, ISD::SREM); 1765 case Instruction::URem: 1766 return selectBinaryOp(I, ISD::UREM); 1767 case Instruction::FRem: 1768 return selectBinaryOp(I, ISD::FREM); 1769 case Instruction::Shl: 1770 return selectBinaryOp(I, ISD::SHL); 1771 case Instruction::LShr: 1772 return selectBinaryOp(I, ISD::SRL); 1773 case Instruction::AShr: 1774 return selectBinaryOp(I, ISD::SRA); 1775 case Instruction::And: 1776 return selectBinaryOp(I, ISD::AND); 1777 case Instruction::Or: 1778 return selectBinaryOp(I, ISD::OR); 1779 case Instruction::Xor: 1780 return selectBinaryOp(I, ISD::XOR); 1781 1782 case Instruction::FNeg: 1783 return selectFNeg(I, I->getOperand(0)); 1784 1785 case Instruction::GetElementPtr: 1786 return selectGetElementPtr(I); 1787 1788 case Instruction::Br: { 1789 const BranchInst *BI = cast<BranchInst>(I); 1790 1791 if (BI->isUnconditional()) { 1792 const BasicBlock *LLVMSucc = BI->getSuccessor(0); 1793 MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; 1794 fastEmitBranch(MSucc, BI->getDebugLoc()); 1795 return true; 1796 } 1797 1798 // Conditional branches are not handed yet. 1799 // Halt "fast" selection and bail. 1800 return false; 1801 } 1802 1803 case Instruction::Unreachable: 1804 if (TM.Options.TrapUnreachable) 1805 return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; 1806 else 1807 return true; 1808 1809 case Instruction::Alloca: 1810 // FunctionLowering has the static-sized case covered. 1811 if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I))) 1812 return true; 1813 1814 // Dynamic-sized alloca is not handled yet. 1815 return false; 1816 1817 case Instruction::Call: 1818 // On AIX, call lowering uses the DAG-ISEL path currently so that the 1819 // callee of the direct function call instruction will be mapped to the 1820 // symbol for the function's entry point, which is distinct from the 1821 // function descriptor symbol. The latter is the symbol whose XCOFF symbol 1822 // name is the C-linkage name of the source level function. 1823 if (TM.getTargetTriple().isOSAIX()) 1824 return false; 1825 return selectCall(I); 1826 1827 case Instruction::BitCast: 1828 return selectBitCast(I); 1829 1830 case Instruction::FPToSI: 1831 return selectCast(I, ISD::FP_TO_SINT); 1832 case Instruction::ZExt: 1833 return selectCast(I, ISD::ZERO_EXTEND); 1834 case Instruction::SExt: 1835 return selectCast(I, ISD::SIGN_EXTEND); 1836 case Instruction::Trunc: 1837 return selectCast(I, ISD::TRUNCATE); 1838 case Instruction::SIToFP: 1839 return selectCast(I, ISD::SINT_TO_FP); 1840 1841 case Instruction::IntToPtr: // Deliberate fall-through. 1842 case Instruction::PtrToInt: { 1843 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 1844 EVT DstVT = TLI.getValueType(DL, I->getType()); 1845 if (DstVT.bitsGT(SrcVT)) 1846 return selectCast(I, ISD::ZERO_EXTEND); 1847 if (DstVT.bitsLT(SrcVT)) 1848 return selectCast(I, ISD::TRUNCATE); 1849 Register Reg = getRegForValue(I->getOperand(0)); 1850 if (!Reg) 1851 return false; 1852 updateValueMap(I, Reg); 1853 return true; 1854 } 1855 1856 case Instruction::ExtractValue: 1857 return selectExtractValue(I); 1858 1859 case Instruction::Freeze: 1860 return selectFreeze(I); 1861 1862 case Instruction::PHI: 1863 llvm_unreachable("FastISel shouldn't visit PHI nodes!"); 1864 1865 default: 1866 // Unhandled instruction. Halt "fast" selection and bail. 1867 return false; 1868 } 1869 } 1870 1871 FastISel::FastISel(FunctionLoweringInfo &FuncInfo, 1872 const TargetLibraryInfo *LibInfo, 1873 bool SkipTargetIndependentISel) 1874 : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), 1875 MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), 1876 TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()), 1877 TII(*MF->getSubtarget().getInstrInfo()), 1878 TLI(*MF->getSubtarget().getTargetLowering()), 1879 TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), 1880 SkipTargetIndependentISel(SkipTargetIndependentISel), 1881 LastLocalValue(nullptr), EmitStartPt(nullptr) {} 1882 1883 FastISel::~FastISel() = default; 1884 1885 bool FastISel::fastLowerArguments() { return false; } 1886 1887 bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; } 1888 1889 bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { 1890 return false; 1891 } 1892 1893 unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; } 1894 1895 unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/, 1896 bool /*Op0IsKill*/) { 1897 return 0; 1898 } 1899 1900 unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, 1901 bool /*Op0IsKill*/, unsigned /*Op1*/, 1902 bool /*Op1IsKill*/) { 1903 return 0; 1904 } 1905 1906 unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { 1907 return 0; 1908 } 1909 1910 unsigned FastISel::fastEmit_f(MVT, MVT, unsigned, 1911 const ConstantFP * /*FPImm*/) { 1912 return 0; 1913 } 1914 1915 unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, 1916 bool /*Op0IsKill*/, uint64_t /*Imm*/) { 1917 return 0; 1918 } 1919 1920 /// This method is a wrapper of fastEmit_ri. It first tries to emit an 1921 /// instruction with an immediate operand using fastEmit_ri. 1922 /// If that fails, it materializes the immediate into a register and try 1923 /// fastEmit_rr instead. 1924 Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, 1925 bool Op0IsKill, uint64_t Imm, MVT ImmType) { 1926 // If this is a multiply by a power of two, emit this as a shift left. 1927 if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { 1928 Opcode = ISD::SHL; 1929 Imm = Log2_64(Imm); 1930 } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { 1931 // div x, 8 -> srl x, 3 1932 Opcode = ISD::SRL; 1933 Imm = Log2_64(Imm); 1934 } 1935 1936 // Horrible hack (to be removed), check to make sure shift amounts are 1937 // in-range. 1938 if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && 1939 Imm >= VT.getSizeInBits()) 1940 return 0; 1941 1942 // First check if immediate type is legal. If not, we can't use the ri form. 1943 Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); 1944 if (ResultReg) 1945 return ResultReg; 1946 Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); 1947 bool IsImmKill = true; 1948 if (!MaterialReg) { 1949 // This is a bit ugly/slow, but failing here means falling out of 1950 // fast-isel, which would be very slow. 1951 IntegerType *ITy = 1952 IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); 1953 MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); 1954 if (!MaterialReg) 1955 return 0; 1956 // FIXME: If the materialized register here has no uses yet then this 1957 // will be the first use and we should be able to mark it as killed. 1958 // However, the local value area for materialising constant expressions 1959 // grows down, not up, which means that any constant expressions we generate 1960 // later which also use 'Imm' could be after this instruction and therefore 1961 // after this kill. 1962 IsImmKill = false; 1963 } 1964 return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); 1965 } 1966 1967 Register FastISel::createResultReg(const TargetRegisterClass *RC) { 1968 return MRI.createVirtualRegister(RC); 1969 } 1970 1971 Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, 1972 unsigned OpNum) { 1973 if (Op.isVirtual()) { 1974 const TargetRegisterClass *RegClass = 1975 TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); 1976 if (!MRI.constrainRegClass(Op, RegClass)) { 1977 // If it's not legal to COPY between the register classes, something 1978 // has gone very wrong before we got here. 1979 Register NewOp = createResultReg(RegClass); 1980 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1981 TII.get(TargetOpcode::COPY), NewOp).addReg(Op); 1982 return NewOp; 1983 } 1984 } 1985 return Op; 1986 } 1987 1988 Register FastISel::fastEmitInst_(unsigned MachineInstOpcode, 1989 const TargetRegisterClass *RC) { 1990 Register ResultReg = createResultReg(RC); 1991 const MCInstrDesc &II = TII.get(MachineInstOpcode); 1992 1993 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); 1994 return ResultReg; 1995 } 1996 1997 Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, 1998 const TargetRegisterClass *RC, unsigned Op0, 1999 bool Op0IsKill) { 2000 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2001 2002 Register ResultReg = createResultReg(RC); 2003 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); 2004 2005 if (II.getNumDefs() >= 1) 2006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2007 .addReg(Op0, getKillRegState(Op0IsKill)); 2008 else { 2009 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2010 .addReg(Op0, getKillRegState(Op0IsKill)); 2011 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2012 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2013 } 2014 2015 return ResultReg; 2016 } 2017 2018 Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, 2019 const TargetRegisterClass *RC, unsigned Op0, 2020 bool Op0IsKill, unsigned Op1, 2021 bool Op1IsKill) { 2022 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2023 2024 Register ResultReg = createResultReg(RC); 2025 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); 2026 Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); 2027 2028 if (II.getNumDefs() >= 1) 2029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2030 .addReg(Op0, getKillRegState(Op0IsKill)) 2031 .addReg(Op1, getKillRegState(Op1IsKill)); 2032 else { 2033 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2034 .addReg(Op0, getKillRegState(Op0IsKill)) 2035 .addReg(Op1, getKillRegState(Op1IsKill)); 2036 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2037 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2038 } 2039 return ResultReg; 2040 } 2041 2042 Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, 2043 const TargetRegisterClass *RC, unsigned Op0, 2044 bool Op0IsKill, unsigned Op1, 2045 bool Op1IsKill, unsigned Op2, 2046 bool Op2IsKill) { 2047 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2048 2049 Register ResultReg = createResultReg(RC); 2050 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); 2051 Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); 2052 Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); 2053 2054 if (II.getNumDefs() >= 1) 2055 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2056 .addReg(Op0, getKillRegState(Op0IsKill)) 2057 .addReg(Op1, getKillRegState(Op1IsKill)) 2058 .addReg(Op2, getKillRegState(Op2IsKill)); 2059 else { 2060 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2061 .addReg(Op0, getKillRegState(Op0IsKill)) 2062 .addReg(Op1, getKillRegState(Op1IsKill)) 2063 .addReg(Op2, getKillRegState(Op2IsKill)); 2064 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2065 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2066 } 2067 return ResultReg; 2068 } 2069 2070 Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, 2071 const TargetRegisterClass *RC, unsigned Op0, 2072 bool Op0IsKill, uint64_t Imm) { 2073 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2074 2075 Register ResultReg = createResultReg(RC); 2076 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); 2077 2078 if (II.getNumDefs() >= 1) 2079 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2080 .addReg(Op0, getKillRegState(Op0IsKill)) 2081 .addImm(Imm); 2082 else { 2083 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2084 .addReg(Op0, getKillRegState(Op0IsKill)) 2085 .addImm(Imm); 2086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2087 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2088 } 2089 return ResultReg; 2090 } 2091 2092 Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, 2093 const TargetRegisterClass *RC, unsigned Op0, 2094 bool Op0IsKill, uint64_t Imm1, 2095 uint64_t Imm2) { 2096 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2097 2098 Register ResultReg = createResultReg(RC); 2099 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); 2100 2101 if (II.getNumDefs() >= 1) 2102 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2103 .addReg(Op0, getKillRegState(Op0IsKill)) 2104 .addImm(Imm1) 2105 .addImm(Imm2); 2106 else { 2107 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2108 .addReg(Op0, getKillRegState(Op0IsKill)) 2109 .addImm(Imm1) 2110 .addImm(Imm2); 2111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2112 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2113 } 2114 return ResultReg; 2115 } 2116 2117 Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode, 2118 const TargetRegisterClass *RC, 2119 const ConstantFP *FPImm) { 2120 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2121 2122 Register ResultReg = createResultReg(RC); 2123 2124 if (II.getNumDefs() >= 1) 2125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2126 .addFPImm(FPImm); 2127 else { 2128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2129 .addFPImm(FPImm); 2130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2131 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2132 } 2133 return ResultReg; 2134 } 2135 2136 Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, 2137 const TargetRegisterClass *RC, unsigned Op0, 2138 bool Op0IsKill, unsigned Op1, 2139 bool Op1IsKill, uint64_t Imm) { 2140 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2141 2142 Register ResultReg = createResultReg(RC); 2143 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); 2144 Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); 2145 2146 if (II.getNumDefs() >= 1) 2147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2148 .addReg(Op0, getKillRegState(Op0IsKill)) 2149 .addReg(Op1, getKillRegState(Op1IsKill)) 2150 .addImm(Imm); 2151 else { 2152 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2153 .addReg(Op0, getKillRegState(Op0IsKill)) 2154 .addReg(Op1, getKillRegState(Op1IsKill)) 2155 .addImm(Imm); 2156 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2157 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2158 } 2159 return ResultReg; 2160 } 2161 2162 Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, 2163 const TargetRegisterClass *RC, uint64_t Imm) { 2164 Register ResultReg = createResultReg(RC); 2165 const MCInstrDesc &II = TII.get(MachineInstOpcode); 2166 2167 if (II.getNumDefs() >= 1) 2168 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 2169 .addImm(Imm); 2170 else { 2171 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); 2172 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2173 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); 2174 } 2175 return ResultReg; 2176 } 2177 2178 Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, 2179 bool Op0IsKill, uint32_t Idx) { 2180 Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 2181 assert(Register::isVirtualRegister(Op0) && 2182 "Cannot yet extract from physregs"); 2183 const TargetRegisterClass *RC = MRI.getRegClass(Op0); 2184 MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); 2185 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 2186 ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx); 2187 return ResultReg; 2188 } 2189 2190 /// Emit MachineInstrs to compute the value of Op with all but the least 2191 /// significant bit set to zero. 2192 Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { 2193 return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); 2194 } 2195 2196 /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. 2197 /// Emit code to ensure constants are copied into registers when needed. 2198 /// Remember the virtual registers that need to be added to the Machine PHI 2199 /// nodes as input. We cannot just directly add them, because expansion 2200 /// might result in multiple MBB's for one BB. As such, the start of the 2201 /// BB might correspond to a different MBB than the end. 2202 bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { 2203 const Instruction *TI = LLVMBB->getTerminator(); 2204 2205 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; 2206 FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); 2207 2208 // Check successor nodes' PHI nodes that expect a constant to be available 2209 // from this block. 2210 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { 2211 const BasicBlock *SuccBB = TI->getSuccessor(succ); 2212 if (!isa<PHINode>(SuccBB->begin())) 2213 continue; 2214 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; 2215 2216 // If this terminator has multiple identical successors (common for 2217 // switches), only handle each succ once. 2218 if (!SuccsHandled.insert(SuccMBB).second) 2219 continue; 2220 2221 MachineBasicBlock::iterator MBBI = SuccMBB->begin(); 2222 2223 // At this point we know that there is a 1-1 correspondence between LLVM PHI 2224 // nodes and Machine PHI nodes, but the incoming operands have not been 2225 // emitted yet. 2226 for (const PHINode &PN : SuccBB->phis()) { 2227 // Ignore dead phi's. 2228 if (PN.use_empty()) 2229 continue; 2230 2231 // Only handle legal types. Two interesting things to note here. First, 2232 // by bailing out early, we may leave behind some dead instructions, 2233 // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its 2234 // own moves. Second, this check is necessary because FastISel doesn't 2235 // use CreateRegs to create registers, so it always creates 2236 // exactly one register for each non-void instruction. 2237 EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true); 2238 if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { 2239 // Handle integer promotions, though, because they're common and easy. 2240 if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { 2241 FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); 2242 return false; 2243 } 2244 } 2245 2246 const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); 2247 2248 // Set the DebugLoc for the copy. Prefer the location of the operand 2249 // if there is one; use the location of the PHI otherwise. 2250 DbgLoc = PN.getDebugLoc(); 2251 if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) 2252 DbgLoc = Inst->getDebugLoc(); 2253 2254 Register Reg = getRegForValue(PHIOp); 2255 if (!Reg) { 2256 FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); 2257 return false; 2258 } 2259 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg)); 2260 DbgLoc = DebugLoc(); 2261 } 2262 } 2263 2264 return true; 2265 } 2266 2267 bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { 2268 assert(LI->hasOneUse() && 2269 "tryToFoldLoad expected a LoadInst with a single use"); 2270 // We know that the load has a single use, but don't know what it is. If it 2271 // isn't one of the folded instructions, then we can't succeed here. Handle 2272 // this by scanning the single-use users of the load until we get to FoldInst. 2273 unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. 2274 2275 const Instruction *TheUser = LI->user_back(); 2276 while (TheUser != FoldInst && // Scan up until we find FoldInst. 2277 // Stay in the right block. 2278 TheUser->getParent() == FoldInst->getParent() && 2279 --MaxUsers) { // Don't scan too far. 2280 // If there are multiple or no uses of this instruction, then bail out. 2281 if (!TheUser->hasOneUse()) 2282 return false; 2283 2284 TheUser = TheUser->user_back(); 2285 } 2286 2287 // If we didn't find the fold instruction, then we failed to collapse the 2288 // sequence. 2289 if (TheUser != FoldInst) 2290 return false; 2291 2292 // Don't try to fold volatile loads. Target has to deal with alignment 2293 // constraints. 2294 if (LI->isVolatile()) 2295 return false; 2296 2297 // Figure out which vreg this is going into. If there is no assigned vreg yet 2298 // then there actually was no reference to it. Perhaps the load is referenced 2299 // by a dead instruction. 2300 Register LoadReg = getRegForValue(LI); 2301 if (!LoadReg) 2302 return false; 2303 2304 // We can't fold if this vreg has no uses or more than one use. Multiple uses 2305 // may mean that the instruction got lowered to multiple MIs, or the use of 2306 // the loaded value ended up being multiple operands of the result. 2307 if (!MRI.hasOneUse(LoadReg)) 2308 return false; 2309 2310 MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); 2311 MachineInstr *User = RI->getParent(); 2312 2313 // Set the insertion point properly. Folding the load can cause generation of 2314 // other random instructions (like sign extends) for addressing modes; make 2315 // sure they get inserted in a logical place before the new instruction. 2316 FuncInfo.InsertPt = User; 2317 FuncInfo.MBB = User->getParent(); 2318 2319 // Ask the target to try folding the load. 2320 return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); 2321 } 2322 2323 bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { 2324 // Must be an add. 2325 if (!isa<AddOperator>(Add)) 2326 return false; 2327 // Type size needs to match. 2328 if (DL.getTypeSizeInBits(GEP->getType()) != 2329 DL.getTypeSizeInBits(Add->getType())) 2330 return false; 2331 // Must be in the same basic block. 2332 if (isa<Instruction>(Add) && 2333 FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB) 2334 return false; 2335 // Must have a constant operand. 2336 return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); 2337 } 2338 2339 MachineMemOperand * 2340 FastISel::createMachineMemOperandFor(const Instruction *I) const { 2341 const Value *Ptr; 2342 Type *ValTy; 2343 MaybeAlign Alignment; 2344 MachineMemOperand::Flags Flags; 2345 bool IsVolatile; 2346 2347 if (const auto *LI = dyn_cast<LoadInst>(I)) { 2348 Alignment = LI->getAlign(); 2349 IsVolatile = LI->isVolatile(); 2350 Flags = MachineMemOperand::MOLoad; 2351 Ptr = LI->getPointerOperand(); 2352 ValTy = LI->getType(); 2353 } else if (const auto *SI = dyn_cast<StoreInst>(I)) { 2354 Alignment = SI->getAlign(); 2355 IsVolatile = SI->isVolatile(); 2356 Flags = MachineMemOperand::MOStore; 2357 Ptr = SI->getPointerOperand(); 2358 ValTy = SI->getValueOperand()->getType(); 2359 } else 2360 return nullptr; 2361 2362 bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal); 2363 bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load); 2364 bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); 2365 const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); 2366 2367 AAMDNodes AAInfo; 2368 I->getAAMetadata(AAInfo); 2369 2370 if (!Alignment) // Ensure that codegen never sees alignment 0. 2371 Alignment = DL.getABITypeAlign(ValTy); 2372 2373 unsigned Size = DL.getTypeStoreSize(ValTy); 2374 2375 if (IsVolatile) 2376 Flags |= MachineMemOperand::MOVolatile; 2377 if (IsNonTemporal) 2378 Flags |= MachineMemOperand::MONonTemporal; 2379 if (IsDereferenceable) 2380 Flags |= MachineMemOperand::MODereferenceable; 2381 if (IsInvariant) 2382 Flags |= MachineMemOperand::MOInvariant; 2383 2384 return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, 2385 *Alignment, AAInfo, Ranges); 2386 } 2387 2388 CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { 2389 // If both operands are the same, then try to optimize or fold the cmp. 2390 CmpInst::Predicate Predicate = CI->getPredicate(); 2391 if (CI->getOperand(0) != CI->getOperand(1)) 2392 return Predicate; 2393 2394 switch (Predicate) { 2395 default: llvm_unreachable("Invalid predicate!"); 2396 case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; 2397 case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; 2398 case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; 2399 case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; 2400 case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; 2401 case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; 2402 case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; 2403 case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; 2404 case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; 2405 case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; 2406 case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; 2407 case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; 2408 case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; 2409 case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; 2410 case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; 2411 case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; 2412 2413 case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; 2414 case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; 2415 case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; 2416 case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; 2417 case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; 2418 case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; 2419 case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; 2420 case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; 2421 case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; 2422 case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; 2423 } 2424 2425 return Predicate; 2426 } 2427