1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that ARM uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ARMISelLowering.h" 16 #include "ARMCallingConv.h" 17 #include "ARMConstantPoolValue.h" 18 #include "ARMMachineFunctionInfo.h" 19 #include "ARMPerfectShuffle.h" 20 #include "ARMSubtarget.h" 21 #include "ARMTargetMachine.h" 22 #include "ARMTargetObjectFile.h" 23 #include "MCTargetDesc/ARMAddressingModes.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/IntrinsicLowering.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineFrameInfo.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineJumpTableInfo.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/IR/CallingConv.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalValue.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/Instruction.h" 43 #include "llvm/IR/Instructions.h" 44 #include "llvm/IR/IntrinsicInst.h" 45 #include "llvm/IR/Intrinsics.h" 46 #include "llvm/IR/Type.h" 47 #include "llvm/MC/MCSectionMachO.h" 48 #include "llvm/Support/CommandLine.h" 49 #include "llvm/Support/Debug.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/Support/MathExtras.h" 52 #include "llvm/Support/raw_ostream.h" 53 #include "llvm/Target/TargetOptions.h" 54 #include <utility> 55 using namespace llvm; 56 57 #define DEBUG_TYPE "arm-isel" 58 59 STATISTIC(NumTailCalls, "Number of tail calls"); 60 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 61 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 62 63 static cl::opt<bool> 64 ARMInterworking("arm-interworking", cl::Hidden, 65 cl::desc("Enable / disable ARM interworking (for debugging only)"), 66 cl::init(true)); 67 68 namespace { 69 class ARMCCState : public CCState { 70 public: 71 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 72 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, 73 ParmContext PC) 74 : CCState(CC, isVarArg, MF, locs, C) { 75 assert(((PC == Call) || (PC == Prologue)) && 76 "ARMCCState users must specify whether their context is call" 77 "or prologue generation."); 78 CallOrPrologue = PC; 79 } 80 }; 81 } 82 83 // The APCS parameter registers. 84 static const MCPhysReg GPRArgRegs[] = { 85 ARM::R0, ARM::R1, ARM::R2, ARM::R3 86 }; 87 88 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 89 MVT PromotedBitwiseVT) { 90 if (VT != PromotedLdStVT) { 91 setOperationAction(ISD::LOAD, VT, Promote); 92 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 93 94 setOperationAction(ISD::STORE, VT, Promote); 95 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 96 } 97 98 MVT ElemTy = VT.getVectorElementType(); 99 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 100 setOperationAction(ISD::SETCC, VT, Custom); 101 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 102 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 103 if (ElemTy == MVT::i32) { 104 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 105 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 106 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 107 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 108 } else { 109 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 110 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 111 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 112 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 113 } 114 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 115 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 116 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 117 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 118 setOperationAction(ISD::SELECT, VT, Expand); 119 setOperationAction(ISD::SELECT_CC, VT, Expand); 120 setOperationAction(ISD::VSELECT, VT, Expand); 121 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 122 if (VT.isInteger()) { 123 setOperationAction(ISD::SHL, VT, Custom); 124 setOperationAction(ISD::SRA, VT, Custom); 125 setOperationAction(ISD::SRL, VT, Custom); 126 } 127 128 // Promote all bit-wise operations. 129 if (VT.isInteger() && VT != PromotedBitwiseVT) { 130 setOperationAction(ISD::AND, VT, Promote); 131 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 132 setOperationAction(ISD::OR, VT, Promote); 133 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 134 setOperationAction(ISD::XOR, VT, Promote); 135 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 136 } 137 138 // Neon does not support vector divide/remainder operations. 139 setOperationAction(ISD::SDIV, VT, Expand); 140 setOperationAction(ISD::UDIV, VT, Expand); 141 setOperationAction(ISD::FDIV, VT, Expand); 142 setOperationAction(ISD::SREM, VT, Expand); 143 setOperationAction(ISD::UREM, VT, Expand); 144 setOperationAction(ISD::FREM, VT, Expand); 145 } 146 147 void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 148 addRegisterClass(VT, &ARM::DPRRegClass); 149 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 150 } 151 152 void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 153 addRegisterClass(VT, &ARM::DPairRegClass); 154 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 155 } 156 157 ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, 158 const ARMSubtarget &STI) 159 : TargetLowering(TM), Subtarget(&STI) { 160 RegInfo = Subtarget->getRegisterInfo(); 161 Itins = Subtarget->getInstrItineraryData(); 162 163 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 164 165 if (Subtarget->isTargetMachO()) { 166 // Uses VFP for Thumb libfuncs if available. 167 if (Subtarget->isThumb() && Subtarget->hasVFP2() && 168 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { 169 // Single-precision floating-point arithmetic. 170 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 171 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 172 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 173 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 174 175 // Double-precision floating-point arithmetic. 176 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 177 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 178 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 179 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 180 181 // Single-precision comparisons. 182 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 183 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 184 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 185 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 186 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 187 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 188 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 189 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 190 191 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 192 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 193 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 194 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 195 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 196 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 197 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 198 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 199 200 // Double-precision comparisons. 201 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 202 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 203 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 204 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 205 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 206 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 207 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 208 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 209 210 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 213 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 214 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 215 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 216 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 217 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 218 219 // Floating-point to integer conversions. 220 // i64 conversions are done via library routines even when generating VFP 221 // instructions, so use the same ones. 222 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 223 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 224 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 225 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 226 227 // Conversions between floating types. 228 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 229 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 230 231 // Integer to floating-point conversions. 232 // i64 conversions are done via library routines even when generating VFP 233 // instructions, so use the same ones. 234 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 235 // e.g., __floatunsidf vs. __floatunssidfvfp. 236 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 237 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 238 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 239 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 240 } 241 } 242 243 // These libcalls are not available in 32-bit. 244 setLibcallName(RTLIB::SHL_I128, nullptr); 245 setLibcallName(RTLIB::SRL_I128, nullptr); 246 setLibcallName(RTLIB::SRA_I128, nullptr); 247 248 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && 249 !Subtarget->isTargetWindows()) { 250 static const struct { 251 const RTLIB::Libcall Op; 252 const char * const Name; 253 const CallingConv::ID CC; 254 const ISD::CondCode Cond; 255 } LibraryCalls[] = { 256 // Double-precision floating-point arithmetic helper functions 257 // RTABI chapter 4.1.2, Table 2 258 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 259 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 260 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 261 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 262 263 // Double-precision floating-point comparison helper functions 264 // RTABI chapter 4.1.2, Table 3 265 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, 266 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, 267 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, 268 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, 269 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, 270 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, 271 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, 272 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, 273 274 // Single-precision floating-point arithmetic helper functions 275 // RTABI chapter 4.1.2, Table 4 276 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 277 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 278 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 279 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 280 281 // Single-precision floating-point comparison helper functions 282 // RTABI chapter 4.1.2, Table 5 283 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, 284 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, 285 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, 286 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, 287 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, 288 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, 289 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, 290 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, 291 292 // Floating-point to integer conversions. 293 // RTABI chapter 4.1.2, Table 6 294 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 295 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 296 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 297 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 298 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 299 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 300 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 301 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 302 303 // Conversions between floating types. 304 // RTABI chapter 4.1.2, Table 7 305 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 306 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 307 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 308 309 // Integer to floating-point conversions. 310 // RTABI chapter 4.1.2, Table 8 311 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 312 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 313 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 314 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 315 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 316 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 317 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 318 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 319 320 // Long long helper functions 321 // RTABI chapter 4.2, Table 9 322 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 323 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 324 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 325 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 326 327 // Integer division functions 328 // RTABI chapter 4.3.1 329 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 330 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 331 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 332 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 333 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 334 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 335 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 336 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 337 338 // Memory operations 339 // RTABI chapter 4.3.4 340 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 341 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 342 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, 343 }; 344 345 for (const auto &LC : LibraryCalls) { 346 setLibcallName(LC.Op, LC.Name); 347 setLibcallCallingConv(LC.Op, LC.CC); 348 if (LC.Cond != ISD::SETCC_INVALID) 349 setCmpLibcallCC(LC.Op, LC.Cond); 350 } 351 } 352 353 if (Subtarget->isTargetWindows()) { 354 static const struct { 355 const RTLIB::Libcall Op; 356 const char * const Name; 357 const CallingConv::ID CC; 358 } LibraryCalls[] = { 359 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, 360 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, 361 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, 362 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, 363 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, 364 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, 365 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, 366 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, 367 }; 368 369 for (const auto &LC : LibraryCalls) { 370 setLibcallName(LC.Op, LC.Name); 371 setLibcallCallingConv(LC.Op, LC.CC); 372 } 373 } 374 375 // Use divmod compiler-rt calls for iOS 5.0 and later. 376 if (Subtarget->getTargetTriple().isiOS() && 377 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 378 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 379 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 380 } 381 382 // The half <-> float conversion functions are always soft-float, but are 383 // needed for some targets which use a hard-float calling convention by 384 // default. 385 if (Subtarget->isAAPCS_ABI()) { 386 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); 387 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); 388 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); 389 } else { 390 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); 391 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); 392 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); 393 } 394 395 if (Subtarget->isThumb1Only()) 396 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 397 else 398 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 399 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && 400 !Subtarget->isThumb1Only()) { 401 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 402 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 403 } 404 405 for (MVT VT : MVT::vector_valuetypes()) { 406 for (MVT InnerVT : MVT::vector_valuetypes()) { 407 setTruncStoreAction(VT, InnerVT, Expand); 408 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 409 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 410 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 411 } 412 413 setOperationAction(ISD::MULHS, VT, Expand); 414 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 415 setOperationAction(ISD::MULHU, VT, Expand); 416 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 417 418 setOperationAction(ISD::BSWAP, VT, Expand); 419 } 420 421 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 422 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 423 424 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); 425 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); 426 427 if (Subtarget->hasNEON()) { 428 addDRTypeForNEON(MVT::v2f32); 429 addDRTypeForNEON(MVT::v8i8); 430 addDRTypeForNEON(MVT::v4i16); 431 addDRTypeForNEON(MVT::v2i32); 432 addDRTypeForNEON(MVT::v1i64); 433 434 addQRTypeForNEON(MVT::v4f32); 435 addQRTypeForNEON(MVT::v2f64); 436 addQRTypeForNEON(MVT::v16i8); 437 addQRTypeForNEON(MVT::v8i16); 438 addQRTypeForNEON(MVT::v4i32); 439 addQRTypeForNEON(MVT::v2i64); 440 441 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 442 // neither Neon nor VFP support any arithmetic operations on it. 443 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 444 // supported for v4f32. 445 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 446 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 447 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 448 // FIXME: Code duplication: FDIV and FREM are expanded always, see 449 // ARMTargetLowering::addTypeForNEON method for details. 450 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 451 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 452 // FIXME: Create unittest. 453 // In another words, find a way when "copysign" appears in DAG with vector 454 // operands. 455 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 456 // FIXME: Code duplication: SETCC has custom operation action, see 457 // ARMTargetLowering::addTypeForNEON method for details. 458 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 459 // FIXME: Create unittest for FNEG and for FABS. 460 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 461 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 462 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 463 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 464 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 465 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 466 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 467 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 468 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 469 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 470 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 471 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 472 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 473 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 474 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 475 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 476 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 477 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 478 setOperationAction(ISD::FMA, MVT::v2f64, Expand); 479 480 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 481 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 482 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 483 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 484 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 485 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 486 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 487 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 488 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 489 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 490 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 491 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 492 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 493 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 494 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 495 496 // Mark v2f32 intrinsics. 497 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 498 setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 499 setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 500 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 501 setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 502 setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 503 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 504 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 505 setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 506 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 507 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 508 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 509 setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 510 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 511 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 512 513 // Neon does not support some operations on v1i64 and v2i64 types. 514 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 515 // Custom handling for some quad-vector types to detect VMULL. 516 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 517 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 518 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 519 // Custom handling for some vector types to avoid expensive expansions 520 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 521 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 522 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 523 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 524 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 525 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 526 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 527 // a destination type that is wider than the source, and nor does 528 // it have a FP_TO_[SU]INT instruction with a narrower destination than 529 // source. 530 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 531 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 532 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 533 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 534 535 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 536 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 537 538 // NEON does not have single instruction CTPOP for vectors with element 539 // types wider than 8-bits. However, custom lowering can leverage the 540 // v8i8/v16i8 vcnt instruction. 541 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 542 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 543 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 544 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 545 546 // NEON does not have single instruction CTTZ for vectors. 547 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); 548 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); 549 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); 550 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); 551 552 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); 553 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); 554 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); 555 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); 556 557 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); 558 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); 559 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); 560 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); 561 562 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); 563 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); 564 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); 565 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); 566 567 // NEON only has FMA instructions as of VFP4. 568 if (!Subtarget->hasVFP4()) { 569 setOperationAction(ISD::FMA, MVT::v2f32, Expand); 570 setOperationAction(ISD::FMA, MVT::v4f32, Expand); 571 } 572 573 setTargetDAGCombine(ISD::INTRINSIC_VOID); 574 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 575 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 576 setTargetDAGCombine(ISD::SHL); 577 setTargetDAGCombine(ISD::SRL); 578 setTargetDAGCombine(ISD::SRA); 579 setTargetDAGCombine(ISD::SIGN_EXTEND); 580 setTargetDAGCombine(ISD::ZERO_EXTEND); 581 setTargetDAGCombine(ISD::ANY_EXTEND); 582 setTargetDAGCombine(ISD::SELECT_CC); 583 setTargetDAGCombine(ISD::BUILD_VECTOR); 584 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 585 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 586 setTargetDAGCombine(ISD::STORE); 587 setTargetDAGCombine(ISD::FP_TO_SINT); 588 setTargetDAGCombine(ISD::FP_TO_UINT); 589 setTargetDAGCombine(ISD::FDIV); 590 setTargetDAGCombine(ISD::LOAD); 591 592 // It is legal to extload from v4i8 to v4i16 or v4i32. 593 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, 594 MVT::v2i32}) { 595 for (MVT VT : MVT::integer_vector_valuetypes()) { 596 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); 597 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); 598 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); 599 } 600 } 601 } 602 603 // ARM and Thumb2 support UMLAL/SMLAL. 604 if (!Subtarget->isThumb1Only()) 605 setTargetDAGCombine(ISD::ADDC); 606 607 if (Subtarget->isFPOnlySP()) { 608 // When targetting a floating-point unit with only single-precision 609 // operations, f64 is legal for the few double-precision instructions which 610 // are present However, no double-precision operations other than moves, 611 // loads and stores are provided by the hardware. 612 setOperationAction(ISD::FADD, MVT::f64, Expand); 613 setOperationAction(ISD::FSUB, MVT::f64, Expand); 614 setOperationAction(ISD::FMUL, MVT::f64, Expand); 615 setOperationAction(ISD::FMA, MVT::f64, Expand); 616 setOperationAction(ISD::FDIV, MVT::f64, Expand); 617 setOperationAction(ISD::FREM, MVT::f64, Expand); 618 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 619 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); 620 setOperationAction(ISD::FNEG, MVT::f64, Expand); 621 setOperationAction(ISD::FABS, MVT::f64, Expand); 622 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 623 setOperationAction(ISD::FSIN, MVT::f64, Expand); 624 setOperationAction(ISD::FCOS, MVT::f64, Expand); 625 setOperationAction(ISD::FPOWI, MVT::f64, Expand); 626 setOperationAction(ISD::FPOW, MVT::f64, Expand); 627 setOperationAction(ISD::FLOG, MVT::f64, Expand); 628 setOperationAction(ISD::FLOG2, MVT::f64, Expand); 629 setOperationAction(ISD::FLOG10, MVT::f64, Expand); 630 setOperationAction(ISD::FEXP, MVT::f64, Expand); 631 setOperationAction(ISD::FEXP2, MVT::f64, Expand); 632 setOperationAction(ISD::FCEIL, MVT::f64, Expand); 633 setOperationAction(ISD::FTRUNC, MVT::f64, Expand); 634 setOperationAction(ISD::FRINT, MVT::f64, Expand); 635 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); 636 setOperationAction(ISD::FFLOOR, MVT::f64, Expand); 637 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 638 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 639 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 640 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 641 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); 642 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); 643 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); 644 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); 645 } 646 647 computeRegisterProperties(Subtarget->getRegisterInfo()); 648 649 // ARM does not have floating-point extending loads. 650 for (MVT VT : MVT::fp_valuetypes()) { 651 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); 652 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); 653 } 654 655 // ... or truncating stores 656 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 657 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 658 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 659 660 // ARM does not have i1 sign extending load. 661 for (MVT VT : MVT::integer_valuetypes()) 662 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 663 664 // ARM supports all 4 flavors of integer indexed load / store. 665 if (!Subtarget->isThumb1Only()) { 666 for (unsigned im = (unsigned)ISD::PRE_INC; 667 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 668 setIndexedLoadAction(im, MVT::i1, Legal); 669 setIndexedLoadAction(im, MVT::i8, Legal); 670 setIndexedLoadAction(im, MVT::i16, Legal); 671 setIndexedLoadAction(im, MVT::i32, Legal); 672 setIndexedStoreAction(im, MVT::i1, Legal); 673 setIndexedStoreAction(im, MVT::i8, Legal); 674 setIndexedStoreAction(im, MVT::i16, Legal); 675 setIndexedStoreAction(im, MVT::i32, Legal); 676 } 677 } 678 679 setOperationAction(ISD::SADDO, MVT::i32, Custom); 680 setOperationAction(ISD::UADDO, MVT::i32, Custom); 681 setOperationAction(ISD::SSUBO, MVT::i32, Custom); 682 setOperationAction(ISD::USUBO, MVT::i32, Custom); 683 684 // i64 operation support. 685 setOperationAction(ISD::MUL, MVT::i64, Expand); 686 setOperationAction(ISD::MULHU, MVT::i32, Expand); 687 if (Subtarget->isThumb1Only()) { 688 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 689 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 690 } 691 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 692 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 693 setOperationAction(ISD::MULHS, MVT::i32, Expand); 694 695 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 696 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 697 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 698 setOperationAction(ISD::SRL, MVT::i64, Custom); 699 setOperationAction(ISD::SRA, MVT::i64, Custom); 700 701 if (!Subtarget->isThumb1Only()) { 702 // FIXME: We should do this for Thumb1 as well. 703 setOperationAction(ISD::ADDC, MVT::i32, Custom); 704 setOperationAction(ISD::ADDE, MVT::i32, Custom); 705 setOperationAction(ISD::SUBC, MVT::i32, Custom); 706 setOperationAction(ISD::SUBE, MVT::i32, Custom); 707 } 708 709 // ARM does not have ROTL. 710 setOperationAction(ISD::ROTL, MVT::i32, Expand); 711 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 712 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 713 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 714 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 715 716 // These just redirect to CTTZ and CTLZ on ARM. 717 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 718 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 719 720 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 721 722 // Only ARMv6 has BSWAP. 723 if (!Subtarget->hasV6Ops()) 724 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 725 726 if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 727 !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 728 // These are expanded into libcalls if the cpu doesn't have HW divider. 729 setOperationAction(ISD::SDIV, MVT::i32, Expand); 730 setOperationAction(ISD::UDIV, MVT::i32, Expand); 731 } 732 733 // FIXME: Also set divmod for SREM on EABI 734 setOperationAction(ISD::SREM, MVT::i32, Expand); 735 setOperationAction(ISD::UREM, MVT::i32, Expand); 736 // Register based DivRem for AEABI (RTABI 4.2) 737 if (Subtarget->isTargetAEABI()) { 738 setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); 739 setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); 740 setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); 741 setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); 742 setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); 743 setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); 744 setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); 745 setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); 746 747 setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); 748 setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); 749 setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); 750 setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); 751 setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); 752 setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); 753 setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); 754 setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); 755 756 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 757 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 758 } else { 759 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 760 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 761 } 762 763 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 764 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 765 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 766 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 767 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 768 769 setOperationAction(ISD::TRAP, MVT::Other, Legal); 770 771 // Use the default implementation. 772 setOperationAction(ISD::VASTART, MVT::Other, Custom); 773 setOperationAction(ISD::VAARG, MVT::Other, Expand); 774 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 775 setOperationAction(ISD::VAEND, MVT::Other, Expand); 776 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 777 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 778 779 if (!Subtarget->isTargetMachO()) { 780 // Non-MachO platforms may return values in these registers via the 781 // personality function. 782 setExceptionPointerRegister(ARM::R0); 783 setExceptionSelectorRegister(ARM::R1); 784 } 785 786 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) 787 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 788 else 789 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 790 791 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 792 // the default expansion. If we are targeting a single threaded system, 793 // then set them all for expand so we can lower them later into their 794 // non-atomic form. 795 if (TM.Options.ThreadModel == ThreadModel::Single) 796 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 797 else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { 798 // ATOMIC_FENCE needs custom lowering; the others should have been expanded 799 // to ldrex/strex loops already. 800 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 801 802 // On v8, we have particularly efficient implementations of atomic fences 803 // if they can be combined with nearby atomic loads and stores. 804 if (!Subtarget->hasV8Ops()) { 805 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. 806 setInsertFencesForAtomic(true); 807 } 808 } else { 809 // If there's anything we can use as a barrier, go through custom lowering 810 // for ATOMIC_FENCE. 811 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, 812 Subtarget->hasAnyDataBarrier() ? Custom : Expand); 813 814 // Set them all for expansion, which will force libcalls. 815 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 816 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 817 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 818 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 819 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 820 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 821 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 822 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 823 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 824 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 825 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 826 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 827 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 828 // Unordered/Monotonic case. 829 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 830 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 831 } 832 833 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 834 835 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 836 if (!Subtarget->hasV6Ops()) { 837 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 838 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 839 } 840 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 841 842 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && 843 !Subtarget->isThumb1Only()) { 844 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 845 // iff target supports vfp2. 846 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 847 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 848 } 849 850 // We want to custom lower some of our intrinsics. 851 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 852 if (Subtarget->isTargetDarwin()) { 853 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 854 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 855 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 856 } 857 858 setOperationAction(ISD::SETCC, MVT::i32, Expand); 859 setOperationAction(ISD::SETCC, MVT::f32, Expand); 860 setOperationAction(ISD::SETCC, MVT::f64, Expand); 861 setOperationAction(ISD::SELECT, MVT::i32, Custom); 862 setOperationAction(ISD::SELECT, MVT::f32, Custom); 863 setOperationAction(ISD::SELECT, MVT::f64, Custom); 864 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 865 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 866 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 867 868 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 869 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 870 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 871 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 872 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 873 874 // We don't support sin/cos/fmod/copysign/pow 875 setOperationAction(ISD::FSIN, MVT::f64, Expand); 876 setOperationAction(ISD::FSIN, MVT::f32, Expand); 877 setOperationAction(ISD::FCOS, MVT::f32, Expand); 878 setOperationAction(ISD::FCOS, MVT::f64, Expand); 879 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 880 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 881 setOperationAction(ISD::FREM, MVT::f64, Expand); 882 setOperationAction(ISD::FREM, MVT::f32, Expand); 883 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && 884 !Subtarget->isThumb1Only()) { 885 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 886 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 887 } 888 setOperationAction(ISD::FPOW, MVT::f64, Expand); 889 setOperationAction(ISD::FPOW, MVT::f32, Expand); 890 891 if (!Subtarget->hasVFP4()) { 892 setOperationAction(ISD::FMA, MVT::f64, Expand); 893 setOperationAction(ISD::FMA, MVT::f32, Expand); 894 } 895 896 // Various VFP goodness 897 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { 898 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. 899 if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { 900 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 901 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); 902 } 903 904 // fp16 is a special v7 extension that adds f16 <-> f32 conversions. 905 if (!Subtarget->hasFP16()) { 906 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); 907 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); 908 } 909 } 910 911 // Combine sin / cos into one node or libcall if possible. 912 if (Subtarget->hasSinCos()) { 913 setLibcallName(RTLIB::SINCOS_F32, "sincosf"); 914 setLibcallName(RTLIB::SINCOS_F64, "sincos"); 915 if (Subtarget->getTargetTriple().isiOS()) { 916 // For iOS, we don't want to the normal expansion of a libcall to 917 // sincos. We want to issue a libcall to __sincos_stret. 918 setOperationAction(ISD::FSINCOS, MVT::f64, Custom); 919 setOperationAction(ISD::FSINCOS, MVT::f32, Custom); 920 } 921 } 922 923 // FP-ARMv8 implements a lot of rounding-like FP operations. 924 if (Subtarget->hasFPARMv8()) { 925 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 926 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 927 setOperationAction(ISD::FROUND, MVT::f32, Legal); 928 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 929 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 930 setOperationAction(ISD::FRINT, MVT::f32, Legal); 931 if (!Subtarget->isFPOnlySP()) { 932 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 933 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 934 setOperationAction(ISD::FROUND, MVT::f64, Legal); 935 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 936 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 937 setOperationAction(ISD::FRINT, MVT::f64, Legal); 938 } 939 } 940 // We have target-specific dag combine patterns for the following nodes: 941 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 942 setTargetDAGCombine(ISD::ADD); 943 setTargetDAGCombine(ISD::SUB); 944 setTargetDAGCombine(ISD::MUL); 945 setTargetDAGCombine(ISD::AND); 946 setTargetDAGCombine(ISD::OR); 947 setTargetDAGCombine(ISD::XOR); 948 949 if (Subtarget->hasV6Ops()) 950 setTargetDAGCombine(ISD::SRL); 951 952 setStackPointerRegisterToSaveRestore(ARM::SP); 953 954 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || 955 !Subtarget->hasVFP2()) 956 setSchedulingPreference(Sched::RegPressure); 957 else 958 setSchedulingPreference(Sched::Hybrid); 959 960 //// temporary - rewrite interface to use type 961 MaxStoresPerMemset = 8; 962 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 963 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 964 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 965 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 966 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 967 968 // On ARM arguments smaller than 4 bytes are extended, so all arguments 969 // are at least 4 bytes aligned. 970 setMinStackArgumentAlignment(4); 971 972 // Prefer likely predicted branches to selects on out-of-order cores. 973 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 974 975 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 976 } 977 978 bool ARMTargetLowering::useSoftFloat() const { 979 return Subtarget->useSoftFloat(); 980 } 981 982 // FIXME: It might make sense to define the representative register class as the 983 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 984 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 985 // SPR's representative would be DPR_VFP2. This should work well if register 986 // pressure tracking were modified such that a register use would increment the 987 // pressure of the register class's representative and all of it's super 988 // classes' representatives transitively. We have not implemented this because 989 // of the difficulty prior to coalescing of modeling operand register classes 990 // due to the common occurrence of cross class copies and subregister insertions 991 // and extractions. 992 std::pair<const TargetRegisterClass *, uint8_t> 993 ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, 994 MVT VT) const { 995 const TargetRegisterClass *RRC = nullptr; 996 uint8_t Cost = 1; 997 switch (VT.SimpleTy) { 998 default: 999 return TargetLowering::findRepresentativeClass(TRI, VT); 1000 // Use DPR as representative register class for all floating point 1001 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 1002 // the cost is 1 for both f32 and f64. 1003 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 1004 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 1005 RRC = &ARM::DPRRegClass; 1006 // When NEON is used for SP, only half of the register file is available 1007 // because operations that define both SP and DP results will be constrained 1008 // to the VFP2 class (D0-D15). We currently model this constraint prior to 1009 // coalescing by double-counting the SP regs. See the FIXME above. 1010 if (Subtarget->useNEONForSinglePrecisionFP()) 1011 Cost = 2; 1012 break; 1013 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 1014 case MVT::v4f32: case MVT::v2f64: 1015 RRC = &ARM::DPRRegClass; 1016 Cost = 2; 1017 break; 1018 case MVT::v4i64: 1019 RRC = &ARM::DPRRegClass; 1020 Cost = 4; 1021 break; 1022 case MVT::v8i64: 1023 RRC = &ARM::DPRRegClass; 1024 Cost = 8; 1025 break; 1026 } 1027 return std::make_pair(RRC, Cost); 1028 } 1029 1030 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 1031 switch ((ARMISD::NodeType)Opcode) { 1032 case ARMISD::FIRST_NUMBER: break; 1033 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 1034 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 1035 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 1036 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL"; 1037 case ARMISD::CALL: return "ARMISD::CALL"; 1038 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 1039 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 1040 case ARMISD::tCALL: return "ARMISD::tCALL"; 1041 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 1042 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 1043 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 1044 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 1045 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; 1046 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 1047 case ARMISD::CMP: return "ARMISD::CMP"; 1048 case ARMISD::CMN: return "ARMISD::CMN"; 1049 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 1050 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 1051 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 1052 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 1053 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 1054 1055 case ARMISD::CMOV: return "ARMISD::CMOV"; 1056 1057 case ARMISD::RBIT: return "ARMISD::RBIT"; 1058 1059 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 1060 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 1061 case ARMISD::RRX: return "ARMISD::RRX"; 1062 1063 case ARMISD::ADDC: return "ARMISD::ADDC"; 1064 case ARMISD::ADDE: return "ARMISD::ADDE"; 1065 case ARMISD::SUBC: return "ARMISD::SUBC"; 1066 case ARMISD::SUBE: return "ARMISD::SUBE"; 1067 1068 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 1069 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 1070 1071 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 1072 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 1073 1074 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 1075 1076 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 1077 1078 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 1079 1080 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 1081 1082 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 1083 1084 case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; 1085 1086 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 1087 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 1088 case ARMISD::VCGE: return "ARMISD::VCGE"; 1089 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 1090 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 1091 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 1092 case ARMISD::VCGT: return "ARMISD::VCGT"; 1093 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 1094 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 1095 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 1096 case ARMISD::VTST: return "ARMISD::VTST"; 1097 1098 case ARMISD::VSHL: return "ARMISD::VSHL"; 1099 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 1100 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 1101 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 1102 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 1103 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 1104 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 1105 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 1106 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 1107 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 1108 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 1109 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 1110 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 1111 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 1112 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 1113 case ARMISD::VSLI: return "ARMISD::VSLI"; 1114 case ARMISD::VSRI: return "ARMISD::VSRI"; 1115 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 1116 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 1117 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 1118 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 1119 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 1120 case ARMISD::VDUP: return "ARMISD::VDUP"; 1121 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 1122 case ARMISD::VEXT: return "ARMISD::VEXT"; 1123 case ARMISD::VREV64: return "ARMISD::VREV64"; 1124 case ARMISD::VREV32: return "ARMISD::VREV32"; 1125 case ARMISD::VREV16: return "ARMISD::VREV16"; 1126 case ARMISD::VZIP: return "ARMISD::VZIP"; 1127 case ARMISD::VUZP: return "ARMISD::VUZP"; 1128 case ARMISD::VTRN: return "ARMISD::VTRN"; 1129 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 1130 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 1131 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 1132 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 1133 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 1134 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 1135 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 1136 case ARMISD::FMAX: return "ARMISD::FMAX"; 1137 case ARMISD::FMIN: return "ARMISD::FMIN"; 1138 case ARMISD::VMAXNM: return "ARMISD::VMAX"; 1139 case ARMISD::VMINNM: return "ARMISD::VMIN"; 1140 case ARMISD::BFI: return "ARMISD::BFI"; 1141 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 1142 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 1143 case ARMISD::VBSL: return "ARMISD::VBSL"; 1144 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 1145 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 1146 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 1147 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 1148 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 1149 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 1150 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 1151 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 1152 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1153 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1154 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1155 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1156 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1157 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1158 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1159 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1160 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1161 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1162 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1163 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1164 } 1165 return nullptr; 1166 } 1167 1168 EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 1169 EVT VT) const { 1170 if (!VT.isVector()) 1171 return getPointerTy(DL); 1172 return VT.changeVectorElementTypeToInteger(); 1173 } 1174 1175 /// getRegClassFor - Return the register class that should be used for the 1176 /// specified value type. 1177 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 1178 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1179 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1180 // load / store 4 to 8 consecutive D registers. 1181 if (Subtarget->hasNEON()) { 1182 if (VT == MVT::v4i64) 1183 return &ARM::QQPRRegClass; 1184 if (VT == MVT::v8i64) 1185 return &ARM::QQQQPRRegClass; 1186 } 1187 return TargetLowering::getRegClassFor(VT); 1188 } 1189 1190 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the 1191 // source/dest is aligned and the copy size is large enough. We therefore want 1192 // to align such objects passed to memory intrinsics. 1193 bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, 1194 unsigned &PrefAlign) const { 1195 if (!isa<MemIntrinsic>(CI)) 1196 return false; 1197 MinSize = 8; 1198 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 1199 // cycle faster than 4-byte aligned LDM. 1200 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); 1201 return true; 1202 } 1203 1204 // Create a fast isel object. 1205 FastISel * 1206 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 1207 const TargetLibraryInfo *libInfo) const { 1208 return ARM::createFastISel(funcInfo, libInfo); 1209 } 1210 1211 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1212 unsigned NumVals = N->getNumValues(); 1213 if (!NumVals) 1214 return Sched::RegPressure; 1215 1216 for (unsigned i = 0; i != NumVals; ++i) { 1217 EVT VT = N->getValueType(i); 1218 if (VT == MVT::Glue || VT == MVT::Other) 1219 continue; 1220 if (VT.isFloatingPoint() || VT.isVector()) 1221 return Sched::ILP; 1222 } 1223 1224 if (!N->isMachineOpcode()) 1225 return Sched::RegPressure; 1226 1227 // Load are scheduled for latency even if there instruction itinerary 1228 // is not available. 1229 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); 1230 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1231 1232 if (MCID.getNumDefs() == 0) 1233 return Sched::RegPressure; 1234 if (!Itins->isEmpty() && 1235 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1236 return Sched::ILP; 1237 1238 return Sched::RegPressure; 1239 } 1240 1241 //===----------------------------------------------------------------------===// 1242 // Lowering Code 1243 //===----------------------------------------------------------------------===// 1244 1245 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1246 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1247 switch (CC) { 1248 default: llvm_unreachable("Unknown condition code!"); 1249 case ISD::SETNE: return ARMCC::NE; 1250 case ISD::SETEQ: return ARMCC::EQ; 1251 case ISD::SETGT: return ARMCC::GT; 1252 case ISD::SETGE: return ARMCC::GE; 1253 case ISD::SETLT: return ARMCC::LT; 1254 case ISD::SETLE: return ARMCC::LE; 1255 case ISD::SETUGT: return ARMCC::HI; 1256 case ISD::SETUGE: return ARMCC::HS; 1257 case ISD::SETULT: return ARMCC::LO; 1258 case ISD::SETULE: return ARMCC::LS; 1259 } 1260 } 1261 1262 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1263 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1264 ARMCC::CondCodes &CondCode2) { 1265 CondCode2 = ARMCC::AL; 1266 switch (CC) { 1267 default: llvm_unreachable("Unknown FP condition!"); 1268 case ISD::SETEQ: 1269 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1270 case ISD::SETGT: 1271 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1272 case ISD::SETGE: 1273 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1274 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1275 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1276 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1277 case ISD::SETO: CondCode = ARMCC::VC; break; 1278 case ISD::SETUO: CondCode = ARMCC::VS; break; 1279 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1280 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1281 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1282 case ISD::SETLT: 1283 case ISD::SETULT: CondCode = ARMCC::LT; break; 1284 case ISD::SETLE: 1285 case ISD::SETULE: CondCode = ARMCC::LE; break; 1286 case ISD::SETNE: 1287 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1288 } 1289 } 1290 1291 //===----------------------------------------------------------------------===// 1292 // Calling Convention Implementation 1293 //===----------------------------------------------------------------------===// 1294 1295 #include "ARMGenCallingConv.inc" 1296 1297 /// getEffectiveCallingConv - Get the effective calling convention, taking into 1298 /// account presence of floating point hardware and calling convention 1299 /// limitations, such as support for variadic functions. 1300 CallingConv::ID 1301 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, 1302 bool isVarArg) const { 1303 switch (CC) { 1304 default: 1305 llvm_unreachable("Unsupported calling convention"); 1306 case CallingConv::ARM_AAPCS: 1307 case CallingConv::ARM_APCS: 1308 case CallingConv::GHC: 1309 return CC; 1310 case CallingConv::ARM_AAPCS_VFP: 1311 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; 1312 case CallingConv::C: 1313 if (!Subtarget->isAAPCS_ABI()) 1314 return CallingConv::ARM_APCS; 1315 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && 1316 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1317 !isVarArg) 1318 return CallingConv::ARM_AAPCS_VFP; 1319 else 1320 return CallingConv::ARM_AAPCS; 1321 case CallingConv::Fast: 1322 if (!Subtarget->isAAPCS_ABI()) { 1323 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) 1324 return CallingConv::Fast; 1325 return CallingConv::ARM_APCS; 1326 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) 1327 return CallingConv::ARM_AAPCS_VFP; 1328 else 1329 return CallingConv::ARM_AAPCS; 1330 } 1331 } 1332 1333 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given 1334 /// CallingConvention. 1335 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1336 bool Return, 1337 bool isVarArg) const { 1338 switch (getEffectiveCallingConv(CC, isVarArg)) { 1339 default: 1340 llvm_unreachable("Unsupported calling convention"); 1341 case CallingConv::ARM_APCS: 1342 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1343 case CallingConv::ARM_AAPCS: 1344 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1345 case CallingConv::ARM_AAPCS_VFP: 1346 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1347 case CallingConv::Fast: 1348 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1349 case CallingConv::GHC: 1350 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 1351 } 1352 } 1353 1354 /// LowerCallResult - Lower the result values of a call into the 1355 /// appropriate copies out of appropriate physical registers. 1356 SDValue 1357 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1358 CallingConv::ID CallConv, bool isVarArg, 1359 const SmallVectorImpl<ISD::InputArg> &Ins, 1360 SDLoc dl, SelectionDAG &DAG, 1361 SmallVectorImpl<SDValue> &InVals, 1362 bool isThisReturn, SDValue ThisVal) const { 1363 1364 // Assign locations to each value returned by this call. 1365 SmallVector<CCValAssign, 16> RVLocs; 1366 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 1367 *DAG.getContext(), Call); 1368 CCInfo.AnalyzeCallResult(Ins, 1369 CCAssignFnForNode(CallConv, /* Return*/ true, 1370 isVarArg)); 1371 1372 // Copy all of the result registers out of their specified physreg. 1373 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1374 CCValAssign VA = RVLocs[i]; 1375 1376 // Pass 'this' value directly from the argument to return value, to avoid 1377 // reg unit interference 1378 if (i == 0 && isThisReturn) { 1379 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && 1380 "unexpected return calling convention register assignment"); 1381 InVals.push_back(ThisVal); 1382 continue; 1383 } 1384 1385 SDValue Val; 1386 if (VA.needsCustom()) { 1387 // Handle f64 or half of a v2f64. 1388 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1389 InFlag); 1390 Chain = Lo.getValue(1); 1391 InFlag = Lo.getValue(2); 1392 VA = RVLocs[++i]; // skip ahead to next loc 1393 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1394 InFlag); 1395 Chain = Hi.getValue(1); 1396 InFlag = Hi.getValue(2); 1397 if (!Subtarget->isLittle()) 1398 std::swap (Lo, Hi); 1399 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1400 1401 if (VA.getLocVT() == MVT::v2f64) { 1402 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1403 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1404 DAG.getConstant(0, dl, MVT::i32)); 1405 1406 VA = RVLocs[++i]; // skip ahead to next loc 1407 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1408 Chain = Lo.getValue(1); 1409 InFlag = Lo.getValue(2); 1410 VA = RVLocs[++i]; // skip ahead to next loc 1411 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1412 Chain = Hi.getValue(1); 1413 InFlag = Hi.getValue(2); 1414 if (!Subtarget->isLittle()) 1415 std::swap (Lo, Hi); 1416 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1417 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1418 DAG.getConstant(1, dl, MVT::i32)); 1419 } 1420 } else { 1421 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1422 InFlag); 1423 Chain = Val.getValue(1); 1424 InFlag = Val.getValue(2); 1425 } 1426 1427 switch (VA.getLocInfo()) { 1428 default: llvm_unreachable("Unknown loc info!"); 1429 case CCValAssign::Full: break; 1430 case CCValAssign::BCvt: 1431 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1432 break; 1433 } 1434 1435 InVals.push_back(Val); 1436 } 1437 1438 return Chain; 1439 } 1440 1441 /// LowerMemOpCallTo - Store the argument to the stack. 1442 SDValue 1443 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1444 SDValue StackPtr, SDValue Arg, 1445 SDLoc dl, SelectionDAG &DAG, 1446 const CCValAssign &VA, 1447 ISD::ArgFlagsTy Flags) const { 1448 unsigned LocMemOffset = VA.getLocMemOffset(); 1449 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 1450 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), 1451 StackPtr, PtrOff); 1452 return DAG.getStore(Chain, dl, Arg, PtrOff, 1453 MachinePointerInfo::getStack(LocMemOffset), 1454 false, false, 0); 1455 } 1456 1457 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, 1458 SDValue Chain, SDValue &Arg, 1459 RegsToPassVector &RegsToPass, 1460 CCValAssign &VA, CCValAssign &NextVA, 1461 SDValue &StackPtr, 1462 SmallVectorImpl<SDValue> &MemOpChains, 1463 ISD::ArgFlagsTy Flags) const { 1464 1465 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1466 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1467 unsigned id = Subtarget->isLittle() ? 0 : 1; 1468 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); 1469 1470 if (NextVA.isRegLoc()) 1471 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); 1472 else { 1473 assert(NextVA.isMemLoc()); 1474 if (!StackPtr.getNode()) 1475 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, 1476 getPointerTy(DAG.getDataLayout())); 1477 1478 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), 1479 dl, DAG, NextVA, 1480 Flags)); 1481 } 1482 } 1483 1484 /// LowerCall - Lowering a call into a callseq_start <- 1485 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1486 /// nodes. 1487 SDValue 1488 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1489 SmallVectorImpl<SDValue> &InVals) const { 1490 SelectionDAG &DAG = CLI.DAG; 1491 SDLoc &dl = CLI.DL; 1492 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1493 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1494 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1495 SDValue Chain = CLI.Chain; 1496 SDValue Callee = CLI.Callee; 1497 bool &isTailCall = CLI.IsTailCall; 1498 CallingConv::ID CallConv = CLI.CallConv; 1499 bool doesNotRet = CLI.DoesNotReturn; 1500 bool isVarArg = CLI.IsVarArg; 1501 1502 MachineFunction &MF = DAG.getMachineFunction(); 1503 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1504 bool isThisReturn = false; 1505 bool isSibCall = false; 1506 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); 1507 1508 // Disable tail calls if they're not supported. 1509 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") 1510 isTailCall = false; 1511 1512 if (isTailCall) { 1513 // Check if it's really possible to do a tail call. 1514 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1515 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), 1516 Outs, OutVals, Ins, DAG); 1517 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) 1518 report_fatal_error("failed to perform tail call elimination on a call " 1519 "site marked musttail"); 1520 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1521 // detected sibcalls. 1522 if (isTailCall) { 1523 ++NumTailCalls; 1524 isSibCall = true; 1525 } 1526 } 1527 1528 // Analyze operands of the call, assigning locations to each operand. 1529 SmallVector<CCValAssign, 16> ArgLocs; 1530 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 1531 *DAG.getContext(), Call); 1532 CCInfo.AnalyzeCallOperands(Outs, 1533 CCAssignFnForNode(CallConv, /* Return*/ false, 1534 isVarArg)); 1535 1536 // Get a count of how many bytes are to be pushed on the stack. 1537 unsigned NumBytes = CCInfo.getNextStackOffset(); 1538 1539 // For tail calls, memory operands are available in our caller's stack. 1540 if (isSibCall) 1541 NumBytes = 0; 1542 1543 // Adjust the stack pointer for the new arguments... 1544 // These operations are automatically eliminated by the prolog/epilog pass 1545 if (!isSibCall) 1546 Chain = DAG.getCALLSEQ_START(Chain, 1547 DAG.getIntPtrConstant(NumBytes, dl, true), dl); 1548 1549 SDValue StackPtr = 1550 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); 1551 1552 RegsToPassVector RegsToPass; 1553 SmallVector<SDValue, 8> MemOpChains; 1554 1555 // Walk the register/memloc assignments, inserting copies/loads. In the case 1556 // of tail call optimization, arguments are handled later. 1557 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1558 i != e; 1559 ++i, ++realArgIdx) { 1560 CCValAssign &VA = ArgLocs[i]; 1561 SDValue Arg = OutVals[realArgIdx]; 1562 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1563 bool isByVal = Flags.isByVal(); 1564 1565 // Promote the value if needed. 1566 switch (VA.getLocInfo()) { 1567 default: llvm_unreachable("Unknown loc info!"); 1568 case CCValAssign::Full: break; 1569 case CCValAssign::SExt: 1570 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1571 break; 1572 case CCValAssign::ZExt: 1573 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1574 break; 1575 case CCValAssign::AExt: 1576 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1577 break; 1578 case CCValAssign::BCvt: 1579 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1580 break; 1581 } 1582 1583 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1584 if (VA.needsCustom()) { 1585 if (VA.getLocVT() == MVT::v2f64) { 1586 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1587 DAG.getConstant(0, dl, MVT::i32)); 1588 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1589 DAG.getConstant(1, dl, MVT::i32)); 1590 1591 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1592 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1593 1594 VA = ArgLocs[++i]; // skip ahead to next loc 1595 if (VA.isRegLoc()) { 1596 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1597 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1598 } else { 1599 assert(VA.isMemLoc()); 1600 1601 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1602 dl, DAG, VA, Flags)); 1603 } 1604 } else { 1605 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1606 StackPtr, MemOpChains, Flags); 1607 } 1608 } else if (VA.isRegLoc()) { 1609 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { 1610 assert(VA.getLocVT() == MVT::i32 && 1611 "unexpected calling convention register assignment"); 1612 assert(!Ins.empty() && Ins[0].VT == MVT::i32 && 1613 "unexpected use of 'returned'"); 1614 isThisReturn = true; 1615 } 1616 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1617 } else if (isByVal) { 1618 assert(VA.isMemLoc()); 1619 unsigned offset = 0; 1620 1621 // True if this byval aggregate will be split between registers 1622 // and memory. 1623 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); 1624 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); 1625 1626 if (CurByValIdx < ByValArgsCount) { 1627 1628 unsigned RegBegin, RegEnd; 1629 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); 1630 1631 EVT PtrVT = 1632 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 1633 unsigned int i, j; 1634 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { 1635 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); 1636 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1637 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1638 MachinePointerInfo(), 1639 false, false, false, 1640 DAG.InferPtrAlignment(AddArg)); 1641 MemOpChains.push_back(Load.getValue(1)); 1642 RegsToPass.push_back(std::make_pair(j, Load)); 1643 } 1644 1645 // If parameter size outsides register area, "offset" value 1646 // helps us to calculate stack slot for remained part properly. 1647 offset = RegEnd - RegBegin; 1648 1649 CCInfo.nextInRegsParam(); 1650 } 1651 1652 if (Flags.getByValSize() > 4*offset) { 1653 auto PtrVT = getPointerTy(DAG.getDataLayout()); 1654 unsigned LocMemOffset = VA.getLocMemOffset(); 1655 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); 1656 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff); 1657 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); 1658 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); 1659 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, 1660 MVT::i32); 1661 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl, 1662 MVT::i32); 1663 1664 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 1665 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 1666 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 1667 Ops)); 1668 } 1669 } else if (!isSibCall) { 1670 assert(VA.isMemLoc()); 1671 1672 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1673 dl, DAG, VA, Flags)); 1674 } 1675 } 1676 1677 if (!MemOpChains.empty()) 1678 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 1679 1680 // Build a sequence of copy-to-reg nodes chained together with token chain 1681 // and flag operands which copy the outgoing args into the appropriate regs. 1682 SDValue InFlag; 1683 // Tail call byval lowering might overwrite argument registers so in case of 1684 // tail call optimization the copies to registers are lowered later. 1685 if (!isTailCall) 1686 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1687 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1688 RegsToPass[i].second, InFlag); 1689 InFlag = Chain.getValue(1); 1690 } 1691 1692 // For tail calls lower the arguments to the 'real' stack slot. 1693 if (isTailCall) { 1694 // Force all the incoming stack arguments to be loaded from the stack 1695 // before any new outgoing arguments are stored to the stack, because the 1696 // outgoing stack slots may alias the incoming argument stack slots, and 1697 // the alias isn't otherwise explicit. This is slightly more conservative 1698 // than necessary, because it means that each store effectively depends 1699 // on every argument instead of just those arguments it would clobber. 1700 1701 // Do not flag preceding copytoreg stuff together with the following stuff. 1702 InFlag = SDValue(); 1703 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1704 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1705 RegsToPass[i].second, InFlag); 1706 InFlag = Chain.getValue(1); 1707 } 1708 InFlag = SDValue(); 1709 } 1710 1711 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1712 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1713 // node so that legalize doesn't hack it. 1714 bool isDirect = false; 1715 bool isARMFunc = false; 1716 bool isLocalARMFunc = false; 1717 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1718 auto PtrVt = getPointerTy(DAG.getDataLayout()); 1719 1720 if (Subtarget->genLongCalls()) { 1721 assert((Subtarget->isTargetWindows() || 1722 getTargetMachine().getRelocationModel() == Reloc::Static) && 1723 "long-calls with non-static relocation model!"); 1724 // Handle a global address or an external symbol. If it's not one of 1725 // those, the target's already in a register, so we don't need to do 1726 // anything extra. 1727 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1728 const GlobalValue *GV = G->getGlobal(); 1729 // Create a constant pool entry for the callee address 1730 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1731 ARMConstantPoolValue *CPV = 1732 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1733 1734 // Get the address of the callee into a register 1735 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); 1736 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1737 Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, 1738 MachinePointerInfo::getConstantPool(), false, false, 1739 false, 0); 1740 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1741 const char *Sym = S->getSymbol(); 1742 1743 // Create a constant pool entry for the callee address 1744 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1745 ARMConstantPoolValue *CPV = 1746 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1747 ARMPCLabelIndex, 0); 1748 // Get the address of the callee into a register 1749 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); 1750 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1751 Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, 1752 MachinePointerInfo::getConstantPool(), false, false, 1753 false, 0); 1754 } 1755 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1756 const GlobalValue *GV = G->getGlobal(); 1757 isDirect = true; 1758 bool isDef = GV->isStrongDefinitionForLinker(); 1759 bool isStub = (!isDef && Subtarget->isTargetMachO()) && 1760 getTargetMachine().getRelocationModel() != Reloc::Static; 1761 isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); 1762 // ARM call to a local ARM function is predicable. 1763 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); 1764 // tBX takes a register source operand. 1765 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1766 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); 1767 Callee = DAG.getNode( 1768 ARMISD::WrapperPIC, dl, PtrVt, 1769 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); 1770 Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, 1771 MachinePointerInfo::getGOT(), false, false, true, 0); 1772 } else if (Subtarget->isTargetCOFF()) { 1773 assert(Subtarget->isTargetWindows() && 1774 "Windows is the only supported COFF target"); 1775 unsigned TargetFlags = GV->hasDLLImportStorageClass() 1776 ? ARMII::MO_DLLIMPORT 1777 : ARMII::MO_NO_FLAG; 1778 Callee = 1779 DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); 1780 if (GV->hasDLLImportStorageClass()) 1781 Callee = 1782 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), 1783 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), 1784 MachinePointerInfo::getGOT(), false, false, false, 0); 1785 } else { 1786 // On ELF targets for PIC code, direct calls should go through the PLT 1787 unsigned OpFlags = 0; 1788 if (Subtarget->isTargetELF() && 1789 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1790 OpFlags = ARMII::MO_PLT; 1791 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags); 1792 } 1793 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1794 isDirect = true; 1795 bool isStub = Subtarget->isTargetMachO() && 1796 getTargetMachine().getRelocationModel() != Reloc::Static; 1797 isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); 1798 // tBX takes a register source operand. 1799 const char *Sym = S->getSymbol(); 1800 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1801 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1802 ARMConstantPoolValue *CPV = 1803 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1804 ARMPCLabelIndex, 4); 1805 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); 1806 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1807 Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, 1808 MachinePointerInfo::getConstantPool(), false, false, 1809 false, 0); 1810 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); 1811 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); 1812 } else { 1813 unsigned OpFlags = 0; 1814 // On ELF targets for PIC code, direct calls should go through the PLT 1815 if (Subtarget->isTargetELF() && 1816 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1817 OpFlags = ARMII::MO_PLT; 1818 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags); 1819 } 1820 } 1821 1822 // FIXME: handle tail calls differently. 1823 unsigned CallOpc; 1824 bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize); 1825 if (Subtarget->isThumb()) { 1826 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1827 CallOpc = ARMISD::CALL_NOLINK; 1828 else 1829 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1830 } else { 1831 if (!isDirect && !Subtarget->hasV5TOps()) 1832 CallOpc = ARMISD::CALL_NOLINK; 1833 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 1834 // Emit regular call when code size is the priority 1835 !HasMinSizeAttr) 1836 // "mov lr, pc; b _foo" to avoid confusing the RSP 1837 CallOpc = ARMISD::CALL_NOLINK; 1838 else 1839 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1840 } 1841 1842 std::vector<SDValue> Ops; 1843 Ops.push_back(Chain); 1844 Ops.push_back(Callee); 1845 1846 // Add argument registers to the end of the list so that they are known live 1847 // into the call. 1848 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1849 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1850 RegsToPass[i].second.getValueType())); 1851 1852 // Add a register mask operand representing the call-preserved registers. 1853 if (!isTailCall) { 1854 const uint32_t *Mask; 1855 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); 1856 if (isThisReturn) { 1857 // For 'this' returns, use the R0-preserving mask if applicable 1858 Mask = ARI->getThisReturnPreservedMask(MF, CallConv); 1859 if (!Mask) { 1860 // Set isThisReturn to false if the calling convention is not one that 1861 // allows 'returned' to be modeled in this way, so LowerCallResult does 1862 // not try to pass 'this' straight through 1863 isThisReturn = false; 1864 Mask = ARI->getCallPreservedMask(MF, CallConv); 1865 } 1866 } else 1867 Mask = ARI->getCallPreservedMask(MF, CallConv); 1868 1869 assert(Mask && "Missing call preserved mask for calling convention"); 1870 Ops.push_back(DAG.getRegisterMask(Mask)); 1871 } 1872 1873 if (InFlag.getNode()) 1874 Ops.push_back(InFlag); 1875 1876 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1877 if (isTailCall) { 1878 MF.getFrameInfo()->setHasTailCall(); 1879 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); 1880 } 1881 1882 // Returns a chain and a flag for retval copy to use. 1883 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 1884 InFlag = Chain.getValue(1); 1885 1886 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), 1887 DAG.getIntPtrConstant(0, dl, true), InFlag, dl); 1888 if (!Ins.empty()) 1889 InFlag = Chain.getValue(1); 1890 1891 // Handle result values, copying them out of physregs into vregs that we 1892 // return. 1893 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1894 InVals, isThisReturn, 1895 isThisReturn ? OutVals[0] : SDValue()); 1896 } 1897 1898 /// HandleByVal - Every parameter *after* a byval parameter is passed 1899 /// on the stack. Remember the next parameter register to allocate, 1900 /// and then confiscate the rest of the parameter registers to insure 1901 /// this. 1902 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, 1903 unsigned Align) const { 1904 assert((State->getCallOrPrologue() == Prologue || 1905 State->getCallOrPrologue() == Call) && 1906 "unhandled ParmContext"); 1907 1908 // Byval (as with any stack) slots are always at least 4 byte aligned. 1909 Align = std::max(Align, 4U); 1910 1911 unsigned Reg = State->AllocateReg(GPRArgRegs); 1912 if (!Reg) 1913 return; 1914 1915 unsigned AlignInRegs = Align / 4; 1916 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; 1917 for (unsigned i = 0; i < Waste; ++i) 1918 Reg = State->AllocateReg(GPRArgRegs); 1919 1920 if (!Reg) 1921 return; 1922 1923 unsigned Excess = 4 * (ARM::R4 - Reg); 1924 1925 // Special case when NSAA != SP and parameter size greater than size of 1926 // all remained GPR regs. In that case we can't split parameter, we must 1927 // send it to stack. We also must set NCRN to R4, so waste all 1928 // remained registers. 1929 const unsigned NSAAOffset = State->getNextStackOffset(); 1930 if (NSAAOffset != 0 && Size > Excess) { 1931 while (State->AllocateReg(GPRArgRegs)) 1932 ; 1933 return; 1934 } 1935 1936 // First register for byval parameter is the first register that wasn't 1937 // allocated before this method call, so it would be "reg". 1938 // If parameter is small enough to be saved in range [reg, r4), then 1939 // the end (first after last) register would be reg + param-size-in-regs, 1940 // else parameter would be splitted between registers and stack, 1941 // end register would be r4 in this case. 1942 unsigned ByValRegBegin = Reg; 1943 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); 1944 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); 1945 // Note, first register is allocated in the beginning of function already, 1946 // allocate remained amount of registers we need. 1947 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) 1948 State->AllocateReg(GPRArgRegs); 1949 // A byval parameter that is split between registers and memory needs its 1950 // size truncated here. 1951 // In the case where the entire structure fits in registers, we set the 1952 // size in memory to zero. 1953 Size = std::max<int>(Size - Excess, 0); 1954 } 1955 1956 /// MatchingStackOffset - Return true if the given stack call argument is 1957 /// already available in the same position (relatively) of the caller's 1958 /// incoming argument stack. 1959 static 1960 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1961 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1962 const TargetInstrInfo *TII) { 1963 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1964 int FI = INT_MAX; 1965 if (Arg.getOpcode() == ISD::CopyFromReg) { 1966 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1967 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1968 return false; 1969 MachineInstr *Def = MRI->getVRegDef(VR); 1970 if (!Def) 1971 return false; 1972 if (!Flags.isByVal()) { 1973 if (!TII->isLoadFromStackSlot(Def, FI)) 1974 return false; 1975 } else { 1976 return false; 1977 } 1978 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1979 if (Flags.isByVal()) 1980 // ByVal argument is passed in as a pointer but it's now being 1981 // dereferenced. e.g. 1982 // define @foo(%struct.X* %A) { 1983 // tail call @bar(%struct.X* byval %A) 1984 // } 1985 return false; 1986 SDValue Ptr = Ld->getBasePtr(); 1987 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1988 if (!FINode) 1989 return false; 1990 FI = FINode->getIndex(); 1991 } else 1992 return false; 1993 1994 assert(FI != INT_MAX); 1995 if (!MFI->isFixedObjectIndex(FI)) 1996 return false; 1997 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1998 } 1999 2000 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 2001 /// for tail call optimization. Targets which want to do tail call 2002 /// optimization should implement this function. 2003 bool 2004 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 2005 CallingConv::ID CalleeCC, 2006 bool isVarArg, 2007 bool isCalleeStructRet, 2008 bool isCallerStructRet, 2009 const SmallVectorImpl<ISD::OutputArg> &Outs, 2010 const SmallVectorImpl<SDValue> &OutVals, 2011 const SmallVectorImpl<ISD::InputArg> &Ins, 2012 SelectionDAG& DAG) const { 2013 const Function *CallerF = DAG.getMachineFunction().getFunction(); 2014 CallingConv::ID CallerCC = CallerF->getCallingConv(); 2015 bool CCMatch = CallerCC == CalleeCC; 2016 2017 // Look for obvious safe cases to perform tail call optimization that do not 2018 // require ABI changes. This is what gcc calls sibcall. 2019 2020 // Do not sibcall optimize vararg calls unless the call site is not passing 2021 // any arguments. 2022 if (isVarArg && !Outs.empty()) 2023 return false; 2024 2025 // Exception-handling functions need a special set of instructions to indicate 2026 // a return to the hardware. Tail-calling another function would probably 2027 // break this. 2028 if (CallerF->hasFnAttribute("interrupt")) 2029 return false; 2030 2031 // Also avoid sibcall optimization if either caller or callee uses struct 2032 // return semantics. 2033 if (isCalleeStructRet || isCallerStructRet) 2034 return false; 2035 2036 // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: 2037 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 2038 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 2039 // support in the assembler and linker to be used. This would need to be 2040 // fixed to fully support tail calls in Thumb1. 2041 // 2042 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 2043 // LR. This means if we need to reload LR, it takes an extra instructions, 2044 // which outweighs the value of the tail call; but here we don't know yet 2045 // whether LR is going to be used. Probably the right approach is to 2046 // generate the tail call here and turn it back into CALL/RET in 2047 // emitEpilogue if LR is used. 2048 2049 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 2050 // but we need to make sure there are enough registers; the only valid 2051 // registers are the 4 used for parameters. We don't currently do this 2052 // case. 2053 if (Subtarget->isThumb1Only()) 2054 return false; 2055 2056 // Externally-defined functions with weak linkage should not be 2057 // tail-called on ARM when the OS does not support dynamic 2058 // pre-emption of symbols, as the AAELF spec requires normal calls 2059 // to undefined weak functions to be replaced with a NOP or jump to the 2060 // next instruction. The behaviour of branch instructions in this 2061 // situation (as used for tail calls) is implementation-defined, so we 2062 // cannot rely on the linker replacing the tail call with a return. 2063 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2064 const GlobalValue *GV = G->getGlobal(); 2065 const Triple &TT = getTargetMachine().getTargetTriple(); 2066 if (GV->hasExternalWeakLinkage() && 2067 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) 2068 return false; 2069 } 2070 2071 // If the calling conventions do not match, then we'd better make sure the 2072 // results are returned in the same way as what the caller expects. 2073 if (!CCMatch) { 2074 SmallVector<CCValAssign, 16> RVLocs1; 2075 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1, 2076 *DAG.getContext(), Call); 2077 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 2078 2079 SmallVector<CCValAssign, 16> RVLocs2; 2080 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2, 2081 *DAG.getContext(), Call); 2082 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 2083 2084 if (RVLocs1.size() != RVLocs2.size()) 2085 return false; 2086 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 2087 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 2088 return false; 2089 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 2090 return false; 2091 if (RVLocs1[i].isRegLoc()) { 2092 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 2093 return false; 2094 } else { 2095 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 2096 return false; 2097 } 2098 } 2099 } 2100 2101 // If Caller's vararg or byval argument has been split between registers and 2102 // stack, do not perform tail call, since part of the argument is in caller's 2103 // local frame. 2104 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 2105 getInfo<ARMFunctionInfo>(); 2106 if (AFI_Caller->getArgRegsSaveSize()) 2107 return false; 2108 2109 // If the callee takes no arguments then go on to check the results of the 2110 // call. 2111 if (!Outs.empty()) { 2112 // Check if stack adjustment is needed. For now, do not do this if any 2113 // argument is passed on the stack. 2114 SmallVector<CCValAssign, 16> ArgLocs; 2115 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, 2116 *DAG.getContext(), Call); 2117 CCInfo.AnalyzeCallOperands(Outs, 2118 CCAssignFnForNode(CalleeCC, false, isVarArg)); 2119 if (CCInfo.getNextStackOffset()) { 2120 MachineFunction &MF = DAG.getMachineFunction(); 2121 2122 // Check if the arguments are already laid out in the right way as 2123 // the caller's fixed stack objects. 2124 MachineFrameInfo *MFI = MF.getFrameInfo(); 2125 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 2126 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); 2127 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 2128 i != e; 2129 ++i, ++realArgIdx) { 2130 CCValAssign &VA = ArgLocs[i]; 2131 EVT RegVT = VA.getLocVT(); 2132 SDValue Arg = OutVals[realArgIdx]; 2133 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 2134 if (VA.getLocInfo() == CCValAssign::Indirect) 2135 return false; 2136 if (VA.needsCustom()) { 2137 // f64 and vector types are split into multiple registers or 2138 // register/stack-slot combinations. The types will not match 2139 // the registers; give up on memory f64 refs until we figure 2140 // out what to do about this. 2141 if (!VA.isRegLoc()) 2142 return false; 2143 if (!ArgLocs[++i].isRegLoc()) 2144 return false; 2145 if (RegVT == MVT::v2f64) { 2146 if (!ArgLocs[++i].isRegLoc()) 2147 return false; 2148 if (!ArgLocs[++i].isRegLoc()) 2149 return false; 2150 } 2151 } else if (!VA.isRegLoc()) { 2152 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 2153 MFI, MRI, TII)) 2154 return false; 2155 } 2156 } 2157 } 2158 } 2159 2160 return true; 2161 } 2162 2163 bool 2164 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 2165 MachineFunction &MF, bool isVarArg, 2166 const SmallVectorImpl<ISD::OutputArg> &Outs, 2167 LLVMContext &Context) const { 2168 SmallVector<CCValAssign, 16> RVLocs; 2169 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 2170 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 2171 isVarArg)); 2172 } 2173 2174 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, 2175 SDLoc DL, SelectionDAG &DAG) { 2176 const MachineFunction &MF = DAG.getMachineFunction(); 2177 const Function *F = MF.getFunction(); 2178 2179 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); 2180 2181 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset 2182 // version of the "preferred return address". These offsets affect the return 2183 // instruction if this is a return from PL1 without hypervisor extensions. 2184 // IRQ/FIQ: +4 "subs pc, lr, #4" 2185 // SWI: 0 "subs pc, lr, #0" 2186 // ABORT: +4 "subs pc, lr, #4" 2187 // UNDEF: +4/+2 "subs pc, lr, #0" 2188 // UNDEF varies depending on where the exception came from ARM or Thumb 2189 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. 2190 2191 int64_t LROffset; 2192 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || 2193 IntKind == "ABORT") 2194 LROffset = 4; 2195 else if (IntKind == "SWI" || IntKind == "UNDEF") 2196 LROffset = 0; 2197 else 2198 report_fatal_error("Unsupported interrupt attribute. If present, value " 2199 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); 2200 2201 RetOps.insert(RetOps.begin() + 1, 2202 DAG.getConstant(LROffset, DL, MVT::i32, false)); 2203 2204 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); 2205 } 2206 2207 SDValue 2208 ARMTargetLowering::LowerReturn(SDValue Chain, 2209 CallingConv::ID CallConv, bool isVarArg, 2210 const SmallVectorImpl<ISD::OutputArg> &Outs, 2211 const SmallVectorImpl<SDValue> &OutVals, 2212 SDLoc dl, SelectionDAG &DAG) const { 2213 2214 // CCValAssign - represent the assignment of the return value to a location. 2215 SmallVector<CCValAssign, 16> RVLocs; 2216 2217 // CCState - Info about the registers and stack slots. 2218 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 2219 *DAG.getContext(), Call); 2220 2221 // Analyze outgoing return values. 2222 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 2223 isVarArg)); 2224 2225 SDValue Flag; 2226 SmallVector<SDValue, 4> RetOps; 2227 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 2228 bool isLittleEndian = Subtarget->isLittle(); 2229 2230 MachineFunction &MF = DAG.getMachineFunction(); 2231 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2232 AFI->setReturnRegsCount(RVLocs.size()); 2233 2234 // Copy the result values into the output registers. 2235 for (unsigned i = 0, realRVLocIdx = 0; 2236 i != RVLocs.size(); 2237 ++i, ++realRVLocIdx) { 2238 CCValAssign &VA = RVLocs[i]; 2239 assert(VA.isRegLoc() && "Can only return in registers!"); 2240 2241 SDValue Arg = OutVals[realRVLocIdx]; 2242 2243 switch (VA.getLocInfo()) { 2244 default: llvm_unreachable("Unknown loc info!"); 2245 case CCValAssign::Full: break; 2246 case CCValAssign::BCvt: 2247 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 2248 break; 2249 } 2250 2251 if (VA.needsCustom()) { 2252 if (VA.getLocVT() == MVT::v2f64) { 2253 // Extract the first half and return it in two registers. 2254 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2255 DAG.getConstant(0, dl, MVT::i32)); 2256 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 2257 DAG.getVTList(MVT::i32, MVT::i32), Half); 2258 2259 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2260 HalfGPRs.getValue(isLittleEndian ? 0 : 1), 2261 Flag); 2262 Flag = Chain.getValue(1); 2263 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2264 VA = RVLocs[++i]; // skip ahead to next loc 2265 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2266 HalfGPRs.getValue(isLittleEndian ? 1 : 0), 2267 Flag); 2268 Flag = Chain.getValue(1); 2269 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2270 VA = RVLocs[++i]; // skip ahead to next loc 2271 2272 // Extract the 2nd half and fall through to handle it as an f64 value. 2273 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2274 DAG.getConstant(1, dl, MVT::i32)); 2275 } 2276 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 2277 // available. 2278 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 2279 DAG.getVTList(MVT::i32, MVT::i32), Arg); 2280 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2281 fmrrd.getValue(isLittleEndian ? 0 : 1), 2282 Flag); 2283 Flag = Chain.getValue(1); 2284 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2285 VA = RVLocs[++i]; // skip ahead to next loc 2286 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2287 fmrrd.getValue(isLittleEndian ? 1 : 0), 2288 Flag); 2289 } else 2290 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 2291 2292 // Guarantee that all emitted copies are 2293 // stuck together, avoiding something bad. 2294 Flag = Chain.getValue(1); 2295 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2296 } 2297 2298 // Update chain and glue. 2299 RetOps[0] = Chain; 2300 if (Flag.getNode()) 2301 RetOps.push_back(Flag); 2302 2303 // CPUs which aren't M-class use a special sequence to return from 2304 // exceptions (roughly, any instruction setting pc and cpsr simultaneously, 2305 // though we use "subs pc, lr, #N"). 2306 // 2307 // M-class CPUs actually use a normal return sequence with a special 2308 // (hardware-provided) value in LR, so the normal code path works. 2309 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && 2310 !Subtarget->isMClass()) { 2311 if (Subtarget->isThumb1Only()) 2312 report_fatal_error("interrupt attribute is not supported in Thumb1"); 2313 return LowerInterruptReturn(RetOps, dl, DAG); 2314 } 2315 2316 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); 2317 } 2318 2319 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 2320 if (N->getNumValues() != 1) 2321 return false; 2322 if (!N->hasNUsesOfValue(1, 0)) 2323 return false; 2324 2325 SDValue TCChain = Chain; 2326 SDNode *Copy = *N->use_begin(); 2327 if (Copy->getOpcode() == ISD::CopyToReg) { 2328 // If the copy has a glue operand, we conservatively assume it isn't safe to 2329 // perform a tail call. 2330 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2331 return false; 2332 TCChain = Copy->getOperand(0); 2333 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 2334 SDNode *VMov = Copy; 2335 // f64 returned in a pair of GPRs. 2336 SmallPtrSet<SDNode*, 2> Copies; 2337 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2338 UI != UE; ++UI) { 2339 if (UI->getOpcode() != ISD::CopyToReg) 2340 return false; 2341 Copies.insert(*UI); 2342 } 2343 if (Copies.size() > 2) 2344 return false; 2345 2346 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2347 UI != UE; ++UI) { 2348 SDValue UseChain = UI->getOperand(0); 2349 if (Copies.count(UseChain.getNode())) 2350 // Second CopyToReg 2351 Copy = *UI; 2352 else { 2353 // We are at the top of this chain. 2354 // If the copy has a glue operand, we conservatively assume it 2355 // isn't safe to perform a tail call. 2356 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue) 2357 return false; 2358 // First CopyToReg 2359 TCChain = UseChain; 2360 } 2361 } 2362 } else if (Copy->getOpcode() == ISD::BITCAST) { 2363 // f32 returned in a single GPR. 2364 if (!Copy->hasOneUse()) 2365 return false; 2366 Copy = *Copy->use_begin(); 2367 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 2368 return false; 2369 // If the copy has a glue operand, we conservatively assume it isn't safe to 2370 // perform a tail call. 2371 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2372 return false; 2373 TCChain = Copy->getOperand(0); 2374 } else { 2375 return false; 2376 } 2377 2378 bool HasRet = false; 2379 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 2380 UI != UE; ++UI) { 2381 if (UI->getOpcode() != ARMISD::RET_FLAG && 2382 UI->getOpcode() != ARMISD::INTRET_FLAG) 2383 return false; 2384 HasRet = true; 2385 } 2386 2387 if (!HasRet) 2388 return false; 2389 2390 Chain = TCChain; 2391 return true; 2392 } 2393 2394 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2395 if (!Subtarget->supportsTailCall()) 2396 return false; 2397 2398 auto Attr = 2399 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); 2400 if (!CI->isTailCall() || Attr.getValueAsString() == "true") 2401 return false; 2402 2403 return !Subtarget->isThumb1Only(); 2404 } 2405 2406 // Trying to write a 64 bit value so need to split into two 32 bit values first, 2407 // and pass the lower and high parts through. 2408 static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { 2409 SDLoc DL(Op); 2410 SDValue WriteValue = Op->getOperand(2); 2411 2412 // This function is only supposed to be called for i64 type argument. 2413 assert(WriteValue.getValueType() == MVT::i64 2414 && "LowerWRITE_REGISTER called for non-i64 type argument."); 2415 2416 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, 2417 DAG.getConstant(0, DL, MVT::i32)); 2418 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, 2419 DAG.getConstant(1, DL, MVT::i32)); 2420 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; 2421 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); 2422 } 2423 2424 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2425 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2426 // one of the above mentioned nodes. It has to be wrapped because otherwise 2427 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2428 // be used to form addressing mode. These wrapped nodes will be selected 2429 // into MOVi. 2430 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2431 EVT PtrVT = Op.getValueType(); 2432 // FIXME there is no actual debug info here 2433 SDLoc dl(Op); 2434 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2435 SDValue Res; 2436 if (CP->isMachineConstantPoolEntry()) 2437 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2438 CP->getAlignment()); 2439 else 2440 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2441 CP->getAlignment()); 2442 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2443 } 2444 2445 unsigned ARMTargetLowering::getJumpTableEncoding() const { 2446 return MachineJumpTableInfo::EK_Inline; 2447 } 2448 2449 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2450 SelectionDAG &DAG) const { 2451 MachineFunction &MF = DAG.getMachineFunction(); 2452 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2453 unsigned ARMPCLabelIndex = 0; 2454 SDLoc DL(Op); 2455 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2456 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2457 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2458 SDValue CPAddr; 2459 if (RelocM == Reloc::Static) { 2460 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2461 } else { 2462 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2463 ARMPCLabelIndex = AFI->createPICLabelUId(); 2464 ARMConstantPoolValue *CPV = 2465 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2466 ARMCP::CPBlockAddress, PCAdj); 2467 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2468 } 2469 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2470 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2471 MachinePointerInfo::getConstantPool(), 2472 false, false, false, 0); 2473 if (RelocM == Reloc::Static) 2474 return Result; 2475 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); 2476 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2477 } 2478 2479 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 2480 SDValue 2481 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2482 SelectionDAG &DAG) const { 2483 SDLoc dl(GA); 2484 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2485 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2486 MachineFunction &MF = DAG.getMachineFunction(); 2487 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2488 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2489 ARMConstantPoolValue *CPV = 2490 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2491 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2492 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2493 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2494 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2495 MachinePointerInfo::getConstantPool(), 2496 false, false, false, 0); 2497 SDValue Chain = Argument.getValue(1); 2498 2499 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); 2500 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2501 2502 // call __tls_get_addr. 2503 ArgListTy Args; 2504 ArgListEntry Entry; 2505 Entry.Node = Argument; 2506 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2507 Args.push_back(Entry); 2508 2509 // FIXME: is there useful debug info available here? 2510 TargetLowering::CallLoweringInfo CLI(DAG); 2511 CLI.setDebugLoc(dl).setChain(Chain) 2512 .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), 2513 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), 2514 0); 2515 2516 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2517 return CallResult.first; 2518 } 2519 2520 // Lower ISD::GlobalTLSAddress using the "initial exec" or 2521 // "local exec" model. 2522 SDValue 2523 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2524 SelectionDAG &DAG, 2525 TLSModel::Model model) const { 2526 const GlobalValue *GV = GA->getGlobal(); 2527 SDLoc dl(GA); 2528 SDValue Offset; 2529 SDValue Chain = DAG.getEntryNode(); 2530 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2531 // Get the Thread Pointer 2532 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2533 2534 if (model == TLSModel::InitialExec) { 2535 MachineFunction &MF = DAG.getMachineFunction(); 2536 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2537 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2538 // Initial exec model. 2539 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2540 ARMConstantPoolValue *CPV = 2541 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2542 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2543 true); 2544 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2545 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2546 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2547 MachinePointerInfo::getConstantPool(), 2548 false, false, false, 0); 2549 Chain = Offset.getValue(1); 2550 2551 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); 2552 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2553 2554 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2555 MachinePointerInfo::getConstantPool(), 2556 false, false, false, 0); 2557 } else { 2558 // local exec model 2559 assert(model == TLSModel::LocalExec); 2560 ARMConstantPoolValue *CPV = 2561 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2562 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2563 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2564 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2565 MachinePointerInfo::getConstantPool(), 2566 false, false, false, 0); 2567 } 2568 2569 // The address of the thread local variable is the add of the thread 2570 // pointer with the offset of the variable. 2571 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2572 } 2573 2574 SDValue 2575 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2576 // TODO: implement the "local dynamic" model 2577 assert(Subtarget->isTargetELF() && 2578 "TLS not implemented for non-ELF targets"); 2579 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2580 2581 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 2582 2583 switch (model) { 2584 case TLSModel::GeneralDynamic: 2585 case TLSModel::LocalDynamic: 2586 return LowerToTLSGeneralDynamicModel(GA, DAG); 2587 case TLSModel::InitialExec: 2588 case TLSModel::LocalExec: 2589 return LowerToTLSExecModels(GA, DAG, model); 2590 } 2591 llvm_unreachable("bogus TLS model"); 2592 } 2593 2594 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2595 SelectionDAG &DAG) const { 2596 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2597 SDLoc dl(Op); 2598 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2599 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2600 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2601 ARMConstantPoolValue *CPV = 2602 ARMConstantPoolConstant::Create(GV, 2603 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2604 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2605 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2606 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2607 CPAddr, 2608 MachinePointerInfo::getConstantPool(), 2609 false, false, false, 0); 2610 SDValue Chain = Result.getValue(1); 2611 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2612 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2613 if (!UseGOTOFF) 2614 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2615 MachinePointerInfo::getGOT(), 2616 false, false, false, 0); 2617 return Result; 2618 } 2619 2620 // If we have T2 ops, we can materialize the address directly via movt/movw 2621 // pair. This is always cheaper. 2622 if (Subtarget->useMovt(DAG.getMachineFunction())) { 2623 ++NumMovwMovt; 2624 // FIXME: Once remat is capable of dealing with instructions with register 2625 // operands, expand this into two nodes. 2626 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2627 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2628 } else { 2629 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2630 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2631 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2632 MachinePointerInfo::getConstantPool(), 2633 false, false, false, 0); 2634 } 2635 } 2636 2637 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2638 SelectionDAG &DAG) const { 2639 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2640 SDLoc dl(Op); 2641 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2642 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2643 2644 if (Subtarget->useMovt(DAG.getMachineFunction())) 2645 ++NumMovwMovt; 2646 2647 // FIXME: Once remat is capable of dealing with instructions with register 2648 // operands, expand this into multiple nodes 2649 unsigned Wrapper = 2650 RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; 2651 2652 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); 2653 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); 2654 2655 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2656 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2657 MachinePointerInfo::getGOT(), false, false, false, 0); 2658 return Result; 2659 } 2660 2661 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, 2662 SelectionDAG &DAG) const { 2663 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); 2664 assert(Subtarget->useMovt(DAG.getMachineFunction()) && 2665 "Windows on ARM expects to use movw/movt"); 2666 2667 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2668 const ARMII::TOF TargetFlags = 2669 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); 2670 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2671 SDValue Result; 2672 SDLoc DL(Op); 2673 2674 ++NumMovwMovt; 2675 2676 // FIXME: Once remat is capable of dealing with instructions with register 2677 // operands, expand this into two nodes. 2678 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, 2679 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, 2680 TargetFlags)); 2681 if (GV->hasDLLImportStorageClass()) 2682 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 2683 MachinePointerInfo::getGOT(), false, false, false, 0); 2684 return Result; 2685 } 2686 2687 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2688 SelectionDAG &DAG) const { 2689 assert(Subtarget->isTargetELF() && 2690 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2691 MachineFunction &MF = DAG.getMachineFunction(); 2692 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2693 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2694 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2695 SDLoc dl(Op); 2696 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2697 ARMConstantPoolValue *CPV = 2698 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2699 ARMPCLabelIndex, PCAdj); 2700 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2701 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2702 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2703 MachinePointerInfo::getConstantPool(), 2704 false, false, false, 0); 2705 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); 2706 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2707 } 2708 2709 SDValue 2710 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2711 SDLoc dl(Op); 2712 SDValue Val = DAG.getConstant(0, dl, MVT::i32); 2713 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2714 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2715 Op.getOperand(1), Val); 2716 } 2717 2718 SDValue 2719 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2720 SDLoc dl(Op); 2721 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2722 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); 2723 } 2724 2725 SDValue 2726 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2727 const ARMSubtarget *Subtarget) const { 2728 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2729 SDLoc dl(Op); 2730 switch (IntNo) { 2731 default: return SDValue(); // Don't custom lower most intrinsics. 2732 case Intrinsic::arm_rbit: { 2733 assert(Op.getOperand(1).getValueType() == MVT::i32 && 2734 "RBIT intrinsic must have i32 type!"); 2735 return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); 2736 } 2737 case Intrinsic::arm_thread_pointer: { 2738 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2739 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2740 } 2741 case Intrinsic::eh_sjlj_lsda: { 2742 MachineFunction &MF = DAG.getMachineFunction(); 2743 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2744 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2745 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2746 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2747 SDValue CPAddr; 2748 unsigned PCAdj = (RelocM != Reloc::PIC_) 2749 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2750 ARMConstantPoolValue *CPV = 2751 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2752 ARMCP::CPLSDA, PCAdj); 2753 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2754 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2755 SDValue Result = 2756 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2757 MachinePointerInfo::getConstantPool(), 2758 false, false, false, 0); 2759 2760 if (RelocM == Reloc::PIC_) { 2761 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); 2762 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2763 } 2764 return Result; 2765 } 2766 case Intrinsic::arm_neon_vmulls: 2767 case Intrinsic::arm_neon_vmullu: { 2768 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2769 ? ARMISD::VMULLs : ARMISD::VMULLu; 2770 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), 2771 Op.getOperand(1), Op.getOperand(2)); 2772 } 2773 } 2774 } 2775 2776 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2777 const ARMSubtarget *Subtarget) { 2778 // FIXME: handle "fence singlethread" more efficiently. 2779 SDLoc dl(Op); 2780 if (!Subtarget->hasDataBarrier()) { 2781 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2782 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2783 // here. 2784 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2785 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); 2786 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2787 DAG.getConstant(0, dl, MVT::i32)); 2788 } 2789 2790 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); 2791 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); 2792 ARM_MB::MemBOpt Domain = ARM_MB::ISH; 2793 if (Subtarget->isMClass()) { 2794 // Only a full system barrier exists in the M-class architectures. 2795 Domain = ARM_MB::SY; 2796 } else if (Subtarget->isSwift() && Ord == Release) { 2797 // Swift happens to implement ISHST barriers in a way that's compatible with 2798 // Release semantics but weaker than ISH so we'd be fools not to use 2799 // it. Beware: other processors probably don't! 2800 Domain = ARM_MB::ISHST; 2801 } 2802 2803 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), 2804 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), 2805 DAG.getConstant(Domain, dl, MVT::i32)); 2806 } 2807 2808 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2809 const ARMSubtarget *Subtarget) { 2810 // ARM pre v5TE and Thumb1 does not have preload instructions. 2811 if (!(Subtarget->isThumb2() || 2812 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2813 // Just preserve the chain. 2814 return Op.getOperand(0); 2815 2816 SDLoc dl(Op); 2817 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2818 if (!isRead && 2819 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2820 // ARMv7 with MP extension has PLDW. 2821 return Op.getOperand(0); 2822 2823 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2824 if (Subtarget->isThumb()) { 2825 // Invert the bits. 2826 isRead = ~isRead & 1; 2827 isData = ~isData & 1; 2828 } 2829 2830 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2831 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), 2832 DAG.getConstant(isData, dl, MVT::i32)); 2833 } 2834 2835 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2836 MachineFunction &MF = DAG.getMachineFunction(); 2837 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2838 2839 // vastart just stores the address of the VarArgsFrameIndex slot into the 2840 // memory location argument. 2841 SDLoc dl(Op); 2842 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 2843 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2844 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2845 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2846 MachinePointerInfo(SV), false, false, 0); 2847 } 2848 2849 SDValue 2850 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2851 SDValue &Root, SelectionDAG &DAG, 2852 SDLoc dl) const { 2853 MachineFunction &MF = DAG.getMachineFunction(); 2854 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2855 2856 const TargetRegisterClass *RC; 2857 if (AFI->isThumb1OnlyFunction()) 2858 RC = &ARM::tGPRRegClass; 2859 else 2860 RC = &ARM::GPRRegClass; 2861 2862 // Transform the arguments stored in physical registers into virtual ones. 2863 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2864 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2865 2866 SDValue ArgValue2; 2867 if (NextVA.isMemLoc()) { 2868 MachineFrameInfo *MFI = MF.getFrameInfo(); 2869 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2870 2871 // Create load node to retrieve arguments from the stack. 2872 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2873 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2874 MachinePointerInfo::getFixedStack(FI), 2875 false, false, false, 0); 2876 } else { 2877 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2878 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2879 } 2880 if (!Subtarget->isLittle()) 2881 std::swap (ArgValue, ArgValue2); 2882 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2883 } 2884 2885 // The remaining GPRs hold either the beginning of variable-argument 2886 // data, or the beginning of an aggregate passed by value (usually 2887 // byval). Either way, we allocate stack slots adjacent to the data 2888 // provided by our caller, and store the unallocated registers there. 2889 // If this is a variadic function, the va_list pointer will begin with 2890 // these values; otherwise, this reassembles a (byval) structure that 2891 // was split between registers and memory. 2892 // Return: The frame index registers were stored into. 2893 int 2894 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, 2895 SDLoc dl, SDValue &Chain, 2896 const Value *OrigArg, 2897 unsigned InRegsParamRecordIdx, 2898 int ArgOffset, 2899 unsigned ArgSize) const { 2900 // Currently, two use-cases possible: 2901 // Case #1. Non-var-args function, and we meet first byval parameter. 2902 // Setup first unallocated register as first byval register; 2903 // eat all remained registers 2904 // (these two actions are performed by HandleByVal method). 2905 // Then, here, we initialize stack frame with 2906 // "store-reg" instructions. 2907 // Case #2. Var-args function, that doesn't contain byval parameters. 2908 // The same: eat all remained unallocated registers, 2909 // initialize stack frame. 2910 2911 MachineFunction &MF = DAG.getMachineFunction(); 2912 MachineFrameInfo *MFI = MF.getFrameInfo(); 2913 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2914 unsigned RBegin, REnd; 2915 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2916 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2917 } else { 2918 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); 2919 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; 2920 REnd = ARM::R4; 2921 } 2922 2923 if (REnd != RBegin) 2924 ArgOffset = -4 * (ARM::R4 - RBegin); 2925 2926 auto PtrVT = getPointerTy(DAG.getDataLayout()); 2927 int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false); 2928 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); 2929 2930 SmallVector<SDValue, 4> MemOps; 2931 const TargetRegisterClass *RC = 2932 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; 2933 2934 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { 2935 unsigned VReg = MF.addLiveIn(Reg, RC); 2936 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2937 SDValue Store = 2938 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2939 MachinePointerInfo(OrigArg, 4 * i), false, false, 0); 2940 MemOps.push_back(Store); 2941 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); 2942 } 2943 2944 if (!MemOps.empty()) 2945 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2946 return FrameIndex; 2947 } 2948 2949 // Setup stack frame, the va_list pointer will start from. 2950 void 2951 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2952 SDLoc dl, SDValue &Chain, 2953 unsigned ArgOffset, 2954 unsigned TotalArgRegsSaveSize, 2955 bool ForceMutable) const { 2956 MachineFunction &MF = DAG.getMachineFunction(); 2957 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2958 2959 // Try to store any remaining integer argument regs 2960 // to their spots on the stack so that they may be loaded by deferencing 2961 // the result of va_next. 2962 // If there is no regs to be stored, just point address after last 2963 // argument passed via stack. 2964 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, 2965 CCInfo.getInRegsParamsCount(), 2966 CCInfo.getNextStackOffset(), 4); 2967 AFI->setVarArgsFrameIndex(FrameIndex); 2968 } 2969 2970 SDValue 2971 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2972 CallingConv::ID CallConv, bool isVarArg, 2973 const SmallVectorImpl<ISD::InputArg> 2974 &Ins, 2975 SDLoc dl, SelectionDAG &DAG, 2976 SmallVectorImpl<SDValue> &InVals) 2977 const { 2978 MachineFunction &MF = DAG.getMachineFunction(); 2979 MachineFrameInfo *MFI = MF.getFrameInfo(); 2980 2981 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2982 2983 // Assign locations to all of the incoming arguments. 2984 SmallVector<CCValAssign, 16> ArgLocs; 2985 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2986 *DAG.getContext(), Prologue); 2987 CCInfo.AnalyzeFormalArguments(Ins, 2988 CCAssignFnForNode(CallConv, /* Return*/ false, 2989 isVarArg)); 2990 2991 SmallVector<SDValue, 16> ArgValues; 2992 SDValue ArgValue; 2993 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 2994 unsigned CurArgIdx = 0; 2995 2996 // Initially ArgRegsSaveSize is zero. 2997 // Then we increase this value each time we meet byval parameter. 2998 // We also increase this value in case of varargs function. 2999 AFI->setArgRegsSaveSize(0); 3000 3001 // Calculate the amount of stack space that we need to allocate to store 3002 // byval and variadic arguments that are passed in registers. 3003 // We need to know this before we allocate the first byval or variadic 3004 // argument, as they will be allocated a stack slot below the CFA (Canonical 3005 // Frame Address, the stack pointer at entry to the function). 3006 unsigned ArgRegBegin = ARM::R4; 3007 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3008 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) 3009 break; 3010 3011 CCValAssign &VA = ArgLocs[i]; 3012 unsigned Index = VA.getValNo(); 3013 ISD::ArgFlagsTy Flags = Ins[Index].Flags; 3014 if (!Flags.isByVal()) 3015 continue; 3016 3017 assert(VA.isMemLoc() && "unexpected byval pointer in reg"); 3018 unsigned RBegin, REnd; 3019 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); 3020 ArgRegBegin = std::min(ArgRegBegin, RBegin); 3021 3022 CCInfo.nextInRegsParam(); 3023 } 3024 CCInfo.rewindByValRegsInfo(); 3025 3026 int lastInsIndex = -1; 3027 if (isVarArg && MFI->hasVAStart()) { 3028 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); 3029 if (RegIdx != array_lengthof(GPRArgRegs)) 3030 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); 3031 } 3032 3033 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); 3034 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); 3035 auto PtrVT = getPointerTy(DAG.getDataLayout()); 3036 3037 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3038 CCValAssign &VA = ArgLocs[i]; 3039 if (Ins[VA.getValNo()].isOrigArg()) { 3040 std::advance(CurOrigArg, 3041 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); 3042 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); 3043 } 3044 // Arguments stored in registers. 3045 if (VA.isRegLoc()) { 3046 EVT RegVT = VA.getLocVT(); 3047 3048 if (VA.needsCustom()) { 3049 // f64 and vector types are split up into multiple registers or 3050 // combinations of registers and stack slots. 3051 if (VA.getLocVT() == MVT::v2f64) { 3052 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 3053 Chain, DAG, dl); 3054 VA = ArgLocs[++i]; // skip ahead to next loc 3055 SDValue ArgValue2; 3056 if (VA.isMemLoc()) { 3057 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 3058 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3059 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 3060 MachinePointerInfo::getFixedStack(FI), 3061 false, false, false, 0); 3062 } else { 3063 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 3064 Chain, DAG, dl); 3065 } 3066 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 3067 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 3068 ArgValue, ArgValue1, 3069 DAG.getIntPtrConstant(0, dl)); 3070 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 3071 ArgValue, ArgValue2, 3072 DAG.getIntPtrConstant(1, dl)); 3073 } else 3074 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 3075 3076 } else { 3077 const TargetRegisterClass *RC; 3078 3079 if (RegVT == MVT::f32) 3080 RC = &ARM::SPRRegClass; 3081 else if (RegVT == MVT::f64) 3082 RC = &ARM::DPRRegClass; 3083 else if (RegVT == MVT::v2f64) 3084 RC = &ARM::QPRRegClass; 3085 else if (RegVT == MVT::i32) 3086 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass 3087 : &ARM::GPRRegClass; 3088 else 3089 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 3090 3091 // Transform the arguments in physical registers into virtual ones. 3092 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3093 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 3094 } 3095 3096 // If this is an 8 or 16-bit value, it is really passed promoted 3097 // to 32 bits. Insert an assert[sz]ext to capture this, then 3098 // truncate to the right size. 3099 switch (VA.getLocInfo()) { 3100 default: llvm_unreachable("Unknown loc info!"); 3101 case CCValAssign::Full: break; 3102 case CCValAssign::BCvt: 3103 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 3104 break; 3105 case CCValAssign::SExt: 3106 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 3107 DAG.getValueType(VA.getValVT())); 3108 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 3109 break; 3110 case CCValAssign::ZExt: 3111 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 3112 DAG.getValueType(VA.getValVT())); 3113 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 3114 break; 3115 } 3116 3117 InVals.push_back(ArgValue); 3118 3119 } else { // VA.isRegLoc() 3120 3121 // sanity check 3122 assert(VA.isMemLoc()); 3123 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 3124 3125 int index = VA.getValNo(); 3126 3127 // Some Ins[] entries become multiple ArgLoc[] entries. 3128 // Process them only once. 3129 if (index != lastInsIndex) 3130 { 3131 ISD::ArgFlagsTy Flags = Ins[index].Flags; 3132 // FIXME: For now, all byval parameter objects are marked mutable. 3133 // This can be changed with more analysis. 3134 // In case of tail call optimization mark all arguments mutable. 3135 // Since they could be overwritten by lowering of arguments in case of 3136 // a tail call. 3137 if (Flags.isByVal()) { 3138 assert(Ins[index].isOrigArg() && 3139 "Byval arguments cannot be implicit"); 3140 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); 3141 3142 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, 3143 CurByValIndex, VA.getLocMemOffset(), 3144 Flags.getByValSize()); 3145 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); 3146 CCInfo.nextInRegsParam(); 3147 } else { 3148 unsigned FIOffset = VA.getLocMemOffset(); 3149 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 3150 FIOffset, true); 3151 3152 // Create load nodes to retrieve arguments from the stack. 3153 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3154 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 3155 MachinePointerInfo::getFixedStack(FI), 3156 false, false, false, 0)); 3157 } 3158 lastInsIndex = index; 3159 } 3160 } 3161 } 3162 3163 // varargs 3164 if (isVarArg && MFI->hasVAStart()) 3165 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 3166 CCInfo.getNextStackOffset(), 3167 TotalArgRegsSaveSize); 3168 3169 AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); 3170 3171 return Chain; 3172 } 3173 3174 /// isFloatingPointZero - Return true if this is +0.0. 3175 static bool isFloatingPointZero(SDValue Op) { 3176 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 3177 return CFP->getValueAPF().isPosZero(); 3178 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 3179 // Maybe this has already been legalized into the constant pool? 3180 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 3181 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 3182 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 3183 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 3184 return CFP->getValueAPF().isPosZero(); 3185 } 3186 } else if (Op->getOpcode() == ISD::BITCAST && 3187 Op->getValueType(0) == MVT::f64) { 3188 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) 3189 // created by LowerConstantFP(). 3190 SDValue BitcastOp = Op->getOperand(0); 3191 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) { 3192 SDValue MoveOp = BitcastOp->getOperand(0); 3193 if (MoveOp->getOpcode() == ISD::TargetConstant && 3194 cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) { 3195 return true; 3196 } 3197 } 3198 } 3199 return false; 3200 } 3201 3202 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 3203 /// the given operands. 3204 SDValue 3205 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3206 SDValue &ARMcc, SelectionDAG &DAG, 3207 SDLoc dl) const { 3208 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 3209 unsigned C = RHSC->getZExtValue(); 3210 if (!isLegalICmpImmediate(C)) { 3211 // Constant does not fit, try adjusting it by one? 3212 switch (CC) { 3213 default: break; 3214 case ISD::SETLT: 3215 case ISD::SETGE: 3216 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 3217 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 3218 RHS = DAG.getConstant(C - 1, dl, MVT::i32); 3219 } 3220 break; 3221 case ISD::SETULT: 3222 case ISD::SETUGE: 3223 if (C != 0 && isLegalICmpImmediate(C-1)) { 3224 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 3225 RHS = DAG.getConstant(C - 1, dl, MVT::i32); 3226 } 3227 break; 3228 case ISD::SETLE: 3229 case ISD::SETGT: 3230 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 3231 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 3232 RHS = DAG.getConstant(C + 1, dl, MVT::i32); 3233 } 3234 break; 3235 case ISD::SETULE: 3236 case ISD::SETUGT: 3237 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 3238 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 3239 RHS = DAG.getConstant(C + 1, dl, MVT::i32); 3240 } 3241 break; 3242 } 3243 } 3244 } 3245 3246 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3247 ARMISD::NodeType CompareType; 3248 switch (CondCode) { 3249 default: 3250 CompareType = ARMISD::CMP; 3251 break; 3252 case ARMCC::EQ: 3253 case ARMCC::NE: 3254 // Uses only Z Flag 3255 CompareType = ARMISD::CMPZ; 3256 break; 3257 } 3258 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); 3259 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 3260 } 3261 3262 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 3263 SDValue 3264 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 3265 SDLoc dl) const { 3266 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); 3267 SDValue Cmp; 3268 if (!isFloatingPointZero(RHS)) 3269 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 3270 else 3271 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 3272 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 3273 } 3274 3275 /// duplicateCmp - Glue values can have only one use, so this function 3276 /// duplicates a comparison node. 3277 SDValue 3278 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 3279 unsigned Opc = Cmp.getOpcode(); 3280 SDLoc DL(Cmp); 3281 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 3282 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3283 3284 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 3285 Cmp = Cmp.getOperand(0); 3286 Opc = Cmp.getOpcode(); 3287 if (Opc == ARMISD::CMPFP) 3288 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3289 else { 3290 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 3291 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 3292 } 3293 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 3294 } 3295 3296 std::pair<SDValue, SDValue> 3297 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, 3298 SDValue &ARMcc) const { 3299 assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); 3300 3301 SDValue Value, OverflowCmp; 3302 SDValue LHS = Op.getOperand(0); 3303 SDValue RHS = Op.getOperand(1); 3304 SDLoc dl(Op); 3305 3306 // FIXME: We are currently always generating CMPs because we don't support 3307 // generating CMN through the backend. This is not as good as the natural 3308 // CMP case because it causes a register dependency and cannot be folded 3309 // later. 3310 3311 switch (Op.getOpcode()) { 3312 default: 3313 llvm_unreachable("Unknown overflow instruction!"); 3314 case ISD::SADDO: 3315 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); 3316 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); 3317 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); 3318 break; 3319 case ISD::UADDO: 3320 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); 3321 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); 3322 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); 3323 break; 3324 case ISD::SSUBO: 3325 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); 3326 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); 3327 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); 3328 break; 3329 case ISD::USUBO: 3330 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); 3331 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); 3332 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); 3333 break; 3334 } // switch (...) 3335 3336 return std::make_pair(Value, OverflowCmp); 3337 } 3338 3339 3340 SDValue 3341 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { 3342 // Let legalize expand this if it isn't a legal type yet. 3343 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) 3344 return SDValue(); 3345 3346 SDValue Value, OverflowCmp; 3347 SDValue ARMcc; 3348 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); 3349 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3350 SDLoc dl(Op); 3351 // We use 0 and 1 as false and true values. 3352 SDValue TVal = DAG.getConstant(1, dl, MVT::i32); 3353 SDValue FVal = DAG.getConstant(0, dl, MVT::i32); 3354 EVT VT = Op.getValueType(); 3355 3356 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, 3357 ARMcc, CCR, OverflowCmp); 3358 3359 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); 3360 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); 3361 } 3362 3363 3364 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 3365 SDValue Cond = Op.getOperand(0); 3366 SDValue SelectTrue = Op.getOperand(1); 3367 SDValue SelectFalse = Op.getOperand(2); 3368 SDLoc dl(Op); 3369 unsigned Opc = Cond.getOpcode(); 3370 3371 if (Cond.getResNo() == 1 && 3372 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || 3373 Opc == ISD::USUBO)) { 3374 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) 3375 return SDValue(); 3376 3377 SDValue Value, OverflowCmp; 3378 SDValue ARMcc; 3379 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); 3380 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3381 EVT VT = Op.getValueType(); 3382 3383 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, 3384 OverflowCmp, DAG); 3385 } 3386 3387 // Convert: 3388 // 3389 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 3390 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 3391 // 3392 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 3393 const ConstantSDNode *CMOVTrue = 3394 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 3395 const ConstantSDNode *CMOVFalse = 3396 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 3397 3398 if (CMOVTrue && CMOVFalse) { 3399 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 3400 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 3401 3402 SDValue True; 3403 SDValue False; 3404 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 3405 True = SelectTrue; 3406 False = SelectFalse; 3407 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 3408 True = SelectFalse; 3409 False = SelectTrue; 3410 } 3411 3412 if (True.getNode() && False.getNode()) { 3413 EVT VT = Op.getValueType(); 3414 SDValue ARMcc = Cond.getOperand(2); 3415 SDValue CCR = Cond.getOperand(3); 3416 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 3417 assert(True.getValueType() == VT); 3418 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); 3419 } 3420 } 3421 } 3422 3423 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 3424 // undefined bits before doing a full-word comparison with zero. 3425 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 3426 DAG.getConstant(1, dl, Cond.getValueType())); 3427 3428 return DAG.getSelectCC(dl, Cond, 3429 DAG.getConstant(0, dl, Cond.getValueType()), 3430 SelectTrue, SelectFalse, ISD::SETNE); 3431 } 3432 3433 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 3434 bool &swpCmpOps, bool &swpVselOps) { 3435 // Start by selecting the GE condition code for opcodes that return true for 3436 // 'equality' 3437 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || 3438 CC == ISD::SETULE) 3439 CondCode = ARMCC::GE; 3440 3441 // and GT for opcodes that return false for 'equality'. 3442 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || 3443 CC == ISD::SETULT) 3444 CondCode = ARMCC::GT; 3445 3446 // Since we are constrained to GE/GT, if the opcode contains 'less', we need 3447 // to swap the compare operands. 3448 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || 3449 CC == ISD::SETULT) 3450 swpCmpOps = true; 3451 3452 // Both GT and GE are ordered comparisons, and return false for 'unordered'. 3453 // If we have an unordered opcode, we need to swap the operands to the VSEL 3454 // instruction (effectively negating the condition). 3455 // 3456 // This also has the effect of swapping which one of 'less' or 'greater' 3457 // returns true, so we also swap the compare operands. It also switches 3458 // whether we return true for 'equality', so we compensate by picking the 3459 // opposite condition code to our original choice. 3460 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || 3461 CC == ISD::SETUGT) { 3462 swpCmpOps = !swpCmpOps; 3463 swpVselOps = !swpVselOps; 3464 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; 3465 } 3466 3467 // 'ordered' is 'anything but unordered', so use the VS condition code and 3468 // swap the VSEL operands. 3469 if (CC == ISD::SETO) { 3470 CondCode = ARMCC::VS; 3471 swpVselOps = true; 3472 } 3473 3474 // 'unordered or not equal' is 'anything but equal', so use the EQ condition 3475 // code and swap the VSEL operands. 3476 if (CC == ISD::SETUNE) { 3477 CondCode = ARMCC::EQ; 3478 swpVselOps = true; 3479 } 3480 } 3481 3482 SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, 3483 SDValue TrueVal, SDValue ARMcc, SDValue CCR, 3484 SDValue Cmp, SelectionDAG &DAG) const { 3485 if (Subtarget->isFPOnlySP() && VT == MVT::f64) { 3486 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, 3487 DAG.getVTList(MVT::i32, MVT::i32), FalseVal); 3488 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, 3489 DAG.getVTList(MVT::i32, MVT::i32), TrueVal); 3490 3491 SDValue TrueLow = TrueVal.getValue(0); 3492 SDValue TrueHigh = TrueVal.getValue(1); 3493 SDValue FalseLow = FalseVal.getValue(0); 3494 SDValue FalseHigh = FalseVal.getValue(1); 3495 3496 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, 3497 ARMcc, CCR, Cmp); 3498 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, 3499 ARMcc, CCR, duplicateCmp(Cmp, DAG)); 3500 3501 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); 3502 } else { 3503 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, 3504 Cmp); 3505 } 3506 } 3507 3508 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 3509 EVT VT = Op.getValueType(); 3510 SDValue LHS = Op.getOperand(0); 3511 SDValue RHS = Op.getOperand(1); 3512 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3513 SDValue TrueVal = Op.getOperand(2); 3514 SDValue FalseVal = Op.getOperand(3); 3515 SDLoc dl(Op); 3516 3517 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { 3518 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, 3519 dl); 3520 3521 // If softenSetCCOperands only returned one value, we should compare it to 3522 // zero. 3523 if (!RHS.getNode()) { 3524 RHS = DAG.getConstant(0, dl, LHS.getValueType()); 3525 CC = ISD::SETNE; 3526 } 3527 } 3528 3529 if (LHS.getValueType() == MVT::i32) { 3530 // Try to generate VSEL on ARMv8. 3531 // The VSEL instruction can't use all the usual ARM condition 3532 // codes: it only has two bits to select the condition code, so it's 3533 // constrained to use only GE, GT, VS and EQ. 3534 // 3535 // To implement all the various ISD::SETXXX opcodes, we sometimes need to 3536 // swap the operands of the previous compare instruction (effectively 3537 // inverting the compare condition, swapping 'less' and 'greater') and 3538 // sometimes need to swap the operands to the VSEL (which inverts the 3539 // condition in the sense of firing whenever the previous condition didn't) 3540 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 3541 TrueVal.getValueType() == MVT::f64)) { 3542 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3543 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || 3544 CondCode == ARMCC::VC || CondCode == ARMCC::NE) { 3545 CC = ISD::getSetCCInverse(CC, true); 3546 std::swap(TrueVal, FalseVal); 3547 } 3548 } 3549 3550 SDValue ARMcc; 3551 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3552 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3553 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); 3554 } 3555 3556 ARMCC::CondCodes CondCode, CondCode2; 3557 FPCCToARMCC(CC, CondCode, CondCode2); 3558 3559 // Try to generate VMAXNM/VMINNM on ARMv8. 3560 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 3561 TrueVal.getValueType() == MVT::f64)) { 3562 // We can use VMAXNM/VMINNM for a compare followed by a select with the 3563 // same operands, as follows: 3564 // c = fcmp [?gt, ?ge, ?lt, ?le] a, b 3565 // select c, a, b 3566 // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. 3567 bool swapSides = false; 3568 if (!getTargetMachine().Options.NoNaNsFPMath) { 3569 // transformability may depend on which way around we compare 3570 switch (CC) { 3571 default: 3572 break; 3573 case ISD::SETOGT: 3574 case ISD::SETOGE: 3575 case ISD::SETOLT: 3576 case ISD::SETOLE: 3577 // the non-NaN should be RHS 3578 swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS); 3579 break; 3580 case ISD::SETUGT: 3581 case ISD::SETUGE: 3582 case ISD::SETULT: 3583 case ISD::SETULE: 3584 // the non-NaN should be LHS 3585 swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS); 3586 break; 3587 } 3588 } 3589 swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal); 3590 if (swapSides) { 3591 CC = ISD::getSetCCSwappedOperands(CC); 3592 std::swap(LHS, RHS); 3593 } 3594 if (LHS == TrueVal && RHS == FalseVal) { 3595 bool canTransform = true; 3596 // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here 3597 if (!getTargetMachine().Options.UnsafeFPMath && 3598 !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { 3599 const ConstantFPSDNode *Zero; 3600 switch (CC) { 3601 default: 3602 break; 3603 case ISD::SETOGT: 3604 case ISD::SETUGT: 3605 case ISD::SETGT: 3606 // RHS must not be -0 3607 canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) && 3608 !Zero->isNegative(); 3609 break; 3610 case ISD::SETOGE: 3611 case ISD::SETUGE: 3612 case ISD::SETGE: 3613 // LHS must not be -0 3614 canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) && 3615 !Zero->isNegative(); 3616 break; 3617 case ISD::SETOLT: 3618 case ISD::SETULT: 3619 case ISD::SETLT: 3620 // RHS must not be +0 3621 canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) && 3622 Zero->isNegative(); 3623 break; 3624 case ISD::SETOLE: 3625 case ISD::SETULE: 3626 case ISD::SETLE: 3627 // LHS must not be +0 3628 canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) && 3629 Zero->isNegative(); 3630 break; 3631 } 3632 } 3633 if (canTransform) { 3634 // Note: If one of the elements in a pair is a number and the other 3635 // element is NaN, the corresponding result element is the number. 3636 // This is consistent with the IEEE 754-2008 standard. 3637 // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN 3638 switch (CC) { 3639 default: 3640 break; 3641 case ISD::SETOGT: 3642 case ISD::SETOGE: 3643 if (!DAG.isKnownNeverNaN(RHS)) 3644 break; 3645 return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS); 3646 case ISD::SETUGT: 3647 case ISD::SETUGE: 3648 if (!DAG.isKnownNeverNaN(LHS)) 3649 break; 3650 case ISD::SETGT: 3651 case ISD::SETGE: 3652 return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS); 3653 case ISD::SETOLT: 3654 case ISD::SETOLE: 3655 if (!DAG.isKnownNeverNaN(RHS)) 3656 break; 3657 return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS); 3658 case ISD::SETULT: 3659 case ISD::SETULE: 3660 if (!DAG.isKnownNeverNaN(LHS)) 3661 break; 3662 case ISD::SETLT: 3663 case ISD::SETLE: 3664 return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS); 3665 } 3666 } 3667 } 3668 3669 bool swpCmpOps = false; 3670 bool swpVselOps = false; 3671 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); 3672 3673 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || 3674 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { 3675 if (swpCmpOps) 3676 std::swap(LHS, RHS); 3677 if (swpVselOps) 3678 std::swap(TrueVal, FalseVal); 3679 } 3680 } 3681 3682 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); 3683 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3684 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3685 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); 3686 if (CondCode2 != ARMCC::AL) { 3687 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); 3688 // FIXME: Needs another CMP because flag can have but one use. 3689 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 3690 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); 3691 } 3692 return Result; 3693 } 3694 3695 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 3696 /// to morph to an integer compare sequence. 3697 static bool canChangeToInt(SDValue Op, bool &SeenZero, 3698 const ARMSubtarget *Subtarget) { 3699 SDNode *N = Op.getNode(); 3700 if (!N->hasOneUse()) 3701 // Otherwise it requires moving the value from fp to integer registers. 3702 return false; 3703 if (!N->getNumValues()) 3704 return false; 3705 EVT VT = Op.getValueType(); 3706 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 3707 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 3708 // vmrs are very slow, e.g. cortex-a8. 3709 return false; 3710 3711 if (isFloatingPointZero(Op)) { 3712 SeenZero = true; 3713 return true; 3714 } 3715 return ISD::isNormalLoad(N); 3716 } 3717 3718 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 3719 if (isFloatingPointZero(Op)) 3720 return DAG.getConstant(0, SDLoc(Op), MVT::i32); 3721 3722 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 3723 return DAG.getLoad(MVT::i32, SDLoc(Op), 3724 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 3725 Ld->isVolatile(), Ld->isNonTemporal(), 3726 Ld->isInvariant(), Ld->getAlignment()); 3727 3728 llvm_unreachable("Unknown VFP cmp argument!"); 3729 } 3730 3731 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 3732 SDValue &RetVal1, SDValue &RetVal2) { 3733 SDLoc dl(Op); 3734 3735 if (isFloatingPointZero(Op)) { 3736 RetVal1 = DAG.getConstant(0, dl, MVT::i32); 3737 RetVal2 = DAG.getConstant(0, dl, MVT::i32); 3738 return; 3739 } 3740 3741 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 3742 SDValue Ptr = Ld->getBasePtr(); 3743 RetVal1 = DAG.getLoad(MVT::i32, dl, 3744 Ld->getChain(), Ptr, 3745 Ld->getPointerInfo(), 3746 Ld->isVolatile(), Ld->isNonTemporal(), 3747 Ld->isInvariant(), Ld->getAlignment()); 3748 3749 EVT PtrType = Ptr.getValueType(); 3750 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 3751 SDValue NewPtr = DAG.getNode(ISD::ADD, dl, 3752 PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); 3753 RetVal2 = DAG.getLoad(MVT::i32, dl, 3754 Ld->getChain(), NewPtr, 3755 Ld->getPointerInfo().getWithOffset(4), 3756 Ld->isVolatile(), Ld->isNonTemporal(), 3757 Ld->isInvariant(), NewAlign); 3758 return; 3759 } 3760 3761 llvm_unreachable("Unknown VFP cmp argument!"); 3762 } 3763 3764 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 3765 /// f32 and even f64 comparisons to integer ones. 3766 SDValue 3767 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 3768 SDValue Chain = Op.getOperand(0); 3769 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3770 SDValue LHS = Op.getOperand(2); 3771 SDValue RHS = Op.getOperand(3); 3772 SDValue Dest = Op.getOperand(4); 3773 SDLoc dl(Op); 3774 3775 bool LHSSeenZero = false; 3776 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 3777 bool RHSSeenZero = false; 3778 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 3779 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 3780 // If unsafe fp math optimization is enabled and there are no other uses of 3781 // the CMP operands, and the condition code is EQ or NE, we can optimize it 3782 // to an integer comparison. 3783 if (CC == ISD::SETOEQ) 3784 CC = ISD::SETEQ; 3785 else if (CC == ISD::SETUNE) 3786 CC = ISD::SETNE; 3787 3788 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); 3789 SDValue ARMcc; 3790 if (LHS.getValueType() == MVT::f32) { 3791 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3792 bitcastf32Toi32(LHS, DAG), Mask); 3793 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3794 bitcastf32Toi32(RHS, DAG), Mask); 3795 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3796 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3797 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3798 Chain, Dest, ARMcc, CCR, Cmp); 3799 } 3800 3801 SDValue LHS1, LHS2; 3802 SDValue RHS1, RHS2; 3803 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3804 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3805 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3806 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3807 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3808 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); 3809 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3810 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3811 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); 3812 } 3813 3814 return SDValue(); 3815 } 3816 3817 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3818 SDValue Chain = Op.getOperand(0); 3819 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3820 SDValue LHS = Op.getOperand(2); 3821 SDValue RHS = Op.getOperand(3); 3822 SDValue Dest = Op.getOperand(4); 3823 SDLoc dl(Op); 3824 3825 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { 3826 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, 3827 dl); 3828 3829 // If softenSetCCOperands only returned one value, we should compare it to 3830 // zero. 3831 if (!RHS.getNode()) { 3832 RHS = DAG.getConstant(0, dl, LHS.getValueType()); 3833 CC = ISD::SETNE; 3834 } 3835 } 3836 3837 if (LHS.getValueType() == MVT::i32) { 3838 SDValue ARMcc; 3839 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3840 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3841 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3842 Chain, Dest, ARMcc, CCR, Cmp); 3843 } 3844 3845 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3846 3847 if (getTargetMachine().Options.UnsafeFPMath && 3848 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3849 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3850 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3851 if (Result.getNode()) 3852 return Result; 3853 } 3854 3855 ARMCC::CondCodes CondCode, CondCode2; 3856 FPCCToARMCC(CC, CondCode, CondCode2); 3857 3858 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); 3859 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3860 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3861 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3862 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3863 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); 3864 if (CondCode2 != ARMCC::AL) { 3865 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); 3866 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3867 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); 3868 } 3869 return Res; 3870 } 3871 3872 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3873 SDValue Chain = Op.getOperand(0); 3874 SDValue Table = Op.getOperand(1); 3875 SDValue Index = Op.getOperand(2); 3876 SDLoc dl(Op); 3877 3878 EVT PTy = getPointerTy(DAG.getDataLayout()); 3879 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3880 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3881 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); 3882 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); 3883 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3884 if (Subtarget->isThumb2()) { 3885 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3886 // which does another jump to the destination. This also makes it easier 3887 // to translate it to TBB / TBH later. 3888 // FIXME: This might not work if the function is extremely large. 3889 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3890 Addr, Op.getOperand(2), JTI); 3891 } 3892 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3893 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3894 MachinePointerInfo::getJumpTable(), 3895 false, false, false, 0); 3896 Chain = Addr.getValue(1); 3897 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3898 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); 3899 } else { 3900 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3901 MachinePointerInfo::getJumpTable(), 3902 false, false, false, 0); 3903 Chain = Addr.getValue(1); 3904 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); 3905 } 3906 } 3907 3908 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3909 EVT VT = Op.getValueType(); 3910 SDLoc dl(Op); 3911 3912 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3913 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3914 return Op; 3915 return DAG.UnrollVectorOp(Op.getNode()); 3916 } 3917 3918 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3919 "Invalid type for custom lowering!"); 3920 if (VT != MVT::v4i16) 3921 return DAG.UnrollVectorOp(Op.getNode()); 3922 3923 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3924 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3925 } 3926 3927 SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { 3928 EVT VT = Op.getValueType(); 3929 if (VT.isVector()) 3930 return LowerVectorFP_TO_INT(Op, DAG); 3931 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { 3932 RTLIB::Libcall LC; 3933 if (Op.getOpcode() == ISD::FP_TO_SINT) 3934 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), 3935 Op.getValueType()); 3936 else 3937 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), 3938 Op.getValueType()); 3939 return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, 3940 /*isSigned*/ false, SDLoc(Op)).first; 3941 } 3942 3943 return Op; 3944 } 3945 3946 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3947 EVT VT = Op.getValueType(); 3948 SDLoc dl(Op); 3949 3950 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3951 if (VT.getVectorElementType() == MVT::f32) 3952 return Op; 3953 return DAG.UnrollVectorOp(Op.getNode()); 3954 } 3955 3956 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3957 "Invalid type for custom lowering!"); 3958 if (VT != MVT::v4f32) 3959 return DAG.UnrollVectorOp(Op.getNode()); 3960 3961 unsigned CastOpc; 3962 unsigned Opc; 3963 switch (Op.getOpcode()) { 3964 default: llvm_unreachable("Invalid opcode!"); 3965 case ISD::SINT_TO_FP: 3966 CastOpc = ISD::SIGN_EXTEND; 3967 Opc = ISD::SINT_TO_FP; 3968 break; 3969 case ISD::UINT_TO_FP: 3970 CastOpc = ISD::ZERO_EXTEND; 3971 Opc = ISD::UINT_TO_FP; 3972 break; 3973 } 3974 3975 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3976 return DAG.getNode(Opc, dl, VT, Op); 3977 } 3978 3979 SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { 3980 EVT VT = Op.getValueType(); 3981 if (VT.isVector()) 3982 return LowerVectorINT_TO_FP(Op, DAG); 3983 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { 3984 RTLIB::Libcall LC; 3985 if (Op.getOpcode() == ISD::SINT_TO_FP) 3986 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), 3987 Op.getValueType()); 3988 else 3989 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), 3990 Op.getValueType()); 3991 return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, 3992 /*isSigned*/ false, SDLoc(Op)).first; 3993 } 3994 3995 return Op; 3996 } 3997 3998 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3999 // Implement fcopysign with a fabs and a conditional fneg. 4000 SDValue Tmp0 = Op.getOperand(0); 4001 SDValue Tmp1 = Op.getOperand(1); 4002 SDLoc dl(Op); 4003 EVT VT = Op.getValueType(); 4004 EVT SrcVT = Tmp1.getValueType(); 4005 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 4006 Tmp0.getOpcode() == ARMISD::VMOVDRR; 4007 bool UseNEON = !InGPR && Subtarget->hasNEON(); 4008 4009 if (UseNEON) { 4010 // Use VBSL to copy the sign bit. 4011 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 4012 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 4013 DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); 4014 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 4015 if (VT == MVT::f64) 4016 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 4017 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 4018 DAG.getConstant(32, dl, MVT::i32)); 4019 else /*if (VT == MVT::f32)*/ 4020 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 4021 if (SrcVT == MVT::f32) { 4022 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 4023 if (VT == MVT::f64) 4024 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 4025 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 4026 DAG.getConstant(32, dl, MVT::i32)); 4027 } else if (VT == MVT::f32) 4028 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 4029 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 4030 DAG.getConstant(32, dl, MVT::i32)); 4031 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 4032 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 4033 4034 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 4035 dl, MVT::i32); 4036 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 4037 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 4038 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 4039 4040 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 4041 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 4042 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 4043 if (VT == MVT::f32) { 4044 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 4045 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 4046 DAG.getConstant(0, dl, MVT::i32)); 4047 } else { 4048 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 4049 } 4050 4051 return Res; 4052 } 4053 4054 // Bitcast operand 1 to i32. 4055 if (SrcVT == MVT::f64) 4056 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 4057 Tmp1).getValue(1); 4058 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 4059 4060 // Or in the signbit with integer operations. 4061 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32); 4062 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32); 4063 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 4064 if (VT == MVT::f32) { 4065 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 4066 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 4067 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 4068 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 4069 } 4070 4071 // f64: Or the high part with signbit and then combine two parts. 4072 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 4073 Tmp0); 4074 SDValue Lo = Tmp0.getValue(0); 4075 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 4076 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 4077 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 4078 } 4079 4080 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 4081 MachineFunction &MF = DAG.getMachineFunction(); 4082 MachineFrameInfo *MFI = MF.getFrameInfo(); 4083 MFI->setReturnAddressIsTaken(true); 4084 4085 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 4086 return SDValue(); 4087 4088 EVT VT = Op.getValueType(); 4089 SDLoc dl(Op); 4090 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4091 if (Depth) { 4092 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 4093 SDValue Offset = DAG.getConstant(4, dl, MVT::i32); 4094 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 4095 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 4096 MachinePointerInfo(), false, false, false, 0); 4097 } 4098 4099 // Return LR, which contains the return address. Mark it an implicit live-in. 4100 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 4101 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 4102 } 4103 4104 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 4105 const ARMBaseRegisterInfo &ARI = 4106 *static_cast<const ARMBaseRegisterInfo*>(RegInfo); 4107 MachineFunction &MF = DAG.getMachineFunction(); 4108 MachineFrameInfo *MFI = MF.getFrameInfo(); 4109 MFI->setFrameAddressIsTaken(true); 4110 4111 EVT VT = Op.getValueType(); 4112 SDLoc dl(Op); // FIXME probably not meaningful 4113 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4114 unsigned FrameReg = ARI.getFrameRegister(MF); 4115 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 4116 while (Depth--) 4117 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 4118 MachinePointerInfo(), 4119 false, false, false, 0); 4120 return FrameAddr; 4121 } 4122 4123 // FIXME? Maybe this could be a TableGen attribute on some registers and 4124 // this table could be generated automatically from RegInfo. 4125 unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, 4126 SelectionDAG &DAG) const { 4127 unsigned Reg = StringSwitch<unsigned>(RegName) 4128 .Case("sp", ARM::SP) 4129 .Default(0); 4130 if (Reg) 4131 return Reg; 4132 report_fatal_error(Twine("Invalid register name \"" 4133 + StringRef(RegName) + "\".")); 4134 } 4135 4136 // Result is 64 bit value so split into two 32 bit values and return as a 4137 // pair of values. 4138 static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results, 4139 SelectionDAG &DAG) { 4140 SDLoc DL(N); 4141 4142 // This function is only supposed to be called for i64 type destination. 4143 assert(N->getValueType(0) == MVT::i64 4144 && "ExpandREAD_REGISTER called for non-i64 type result."); 4145 4146 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL, 4147 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other), 4148 N->getOperand(0), 4149 N->getOperand(1)); 4150 4151 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0), 4152 Read.getValue(1))); 4153 Results.push_back(Read.getOperand(0)); 4154 } 4155 4156 /// ExpandBITCAST - If the target supports VFP, this function is called to 4157 /// expand a bit convert where either the source or destination type is i64 to 4158 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 4159 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 4160 /// vectors), since the legalizer won't know what to do with that. 4161 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 4162 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4163 SDLoc dl(N); 4164 SDValue Op = N->getOperand(0); 4165 4166 // This function is only supposed to be called for i64 types, either as the 4167 // source or destination of the bit convert. 4168 EVT SrcVT = Op.getValueType(); 4169 EVT DstVT = N->getValueType(0); 4170 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 4171 "ExpandBITCAST called for non-i64 type"); 4172 4173 // Turn i64->f64 into VMOVDRR. 4174 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 4175 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 4176 DAG.getConstant(0, dl, MVT::i32)); 4177 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 4178 DAG.getConstant(1, dl, MVT::i32)); 4179 return DAG.getNode(ISD::BITCAST, dl, DstVT, 4180 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 4181 } 4182 4183 // Turn f64->i64 into VMOVRRD. 4184 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 4185 SDValue Cvt; 4186 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() && 4187 SrcVT.getVectorNumElements() > 1) 4188 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 4189 DAG.getVTList(MVT::i32, MVT::i32), 4190 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); 4191 else 4192 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 4193 DAG.getVTList(MVT::i32, MVT::i32), Op); 4194 // Merge the pieces into a single i64 value. 4195 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 4196 } 4197 4198 return SDValue(); 4199 } 4200 4201 /// getZeroVector - Returns a vector of specified type with all zero elements. 4202 /// Zero vectors are used to represent vector negation and in those cases 4203 /// will be implemented with the NEON VNEG instruction. However, VNEG does 4204 /// not support i64 elements, so sometimes the zero vectors will need to be 4205 /// explicitly constructed. Regardless, use a canonical VMOV to create the 4206 /// zero vector. 4207 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { 4208 assert(VT.isVector() && "Expected a vector type"); 4209 // The canonical modified immediate encoding of a zero vector is....0! 4210 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32); 4211 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 4212 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 4213 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4214 } 4215 4216 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 4217 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 4218 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 4219 SelectionDAG &DAG) const { 4220 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 4221 EVT VT = Op.getValueType(); 4222 unsigned VTBits = VT.getSizeInBits(); 4223 SDLoc dl(Op); 4224 SDValue ShOpLo = Op.getOperand(0); 4225 SDValue ShOpHi = Op.getOperand(1); 4226 SDValue ShAmt = Op.getOperand(2); 4227 SDValue ARMcc; 4228 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 4229 4230 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 4231 4232 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 4233 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); 4234 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 4235 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 4236 DAG.getConstant(VTBits, dl, MVT::i32)); 4237 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 4238 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 4239 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 4240 4241 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 4242 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), 4243 ISD::SETGE, ARMcc, DAG, dl); 4244 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 4245 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 4246 CCR, Cmp); 4247 4248 SDValue Ops[2] = { Lo, Hi }; 4249 return DAG.getMergeValues(Ops, dl); 4250 } 4251 4252 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 4253 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 4254 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 4255 SelectionDAG &DAG) const { 4256 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 4257 EVT VT = Op.getValueType(); 4258 unsigned VTBits = VT.getSizeInBits(); 4259 SDLoc dl(Op); 4260 SDValue ShOpLo = Op.getOperand(0); 4261 SDValue ShOpHi = Op.getOperand(1); 4262 SDValue ShAmt = Op.getOperand(2); 4263 SDValue ARMcc; 4264 4265 assert(Op.getOpcode() == ISD::SHL_PARTS); 4266 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 4267 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); 4268 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 4269 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 4270 DAG.getConstant(VTBits, dl, MVT::i32)); 4271 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 4272 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 4273 4274 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 4275 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 4276 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), 4277 ISD::SETGE, ARMcc, DAG, dl); 4278 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 4279 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 4280 CCR, Cmp); 4281 4282 SDValue Ops[2] = { Lo, Hi }; 4283 return DAG.getMergeValues(Ops, dl); 4284 } 4285 4286 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 4287 SelectionDAG &DAG) const { 4288 // The rounding mode is in bits 23:22 of the FPSCR. 4289 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 4290 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 4291 // so that the shift + and get folded into a bitfield extract. 4292 SDLoc dl(Op); 4293 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 4294 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, 4295 MVT::i32)); 4296 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 4297 DAG.getConstant(1U << 22, dl, MVT::i32)); 4298 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 4299 DAG.getConstant(22, dl, MVT::i32)); 4300 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 4301 DAG.getConstant(3, dl, MVT::i32)); 4302 } 4303 4304 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 4305 const ARMSubtarget *ST) { 4306 SDLoc dl(N); 4307 EVT VT = N->getValueType(0); 4308 if (VT.isVector()) { 4309 assert(ST->hasNEON()); 4310 4311 // Compute the least significant set bit: LSB = X & -X 4312 SDValue X = N->getOperand(0); 4313 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X); 4314 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX); 4315 4316 EVT ElemTy = VT.getVectorElementType(); 4317 4318 if (ElemTy == MVT::i8) { 4319 // Compute with: cttz(x) = ctpop(lsb - 1) 4320 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, 4321 DAG.getTargetConstant(1, dl, ElemTy)); 4322 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); 4323 return DAG.getNode(ISD::CTPOP, dl, VT, Bits); 4324 } 4325 4326 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) && 4327 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) { 4328 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0 4329 unsigned NumBits = ElemTy.getSizeInBits(); 4330 SDValue WidthMinus1 = 4331 DAG.getNode(ARMISD::VMOVIMM, dl, VT, 4332 DAG.getTargetConstant(NumBits - 1, dl, ElemTy)); 4333 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB); 4334 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ); 4335 } 4336 4337 // Compute with: cttz(x) = ctpop(lsb - 1) 4338 4339 // Since we can only compute the number of bits in a byte with vcnt.8, we 4340 // have to gather the result with pairwise addition (vpaddl) for i16, i32, 4341 // and i64. 4342 4343 // Compute LSB - 1. 4344 SDValue Bits; 4345 if (ElemTy == MVT::i64) { 4346 // Load constant 0xffff'ffff'ffff'ffff to register. 4347 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT, 4348 DAG.getTargetConstant(0x1eff, dl, MVT::i32)); 4349 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF); 4350 } else { 4351 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, 4352 DAG.getTargetConstant(1, dl, ElemTy)); 4353 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); 4354 } 4355 4356 // Count #bits with vcnt.8. 4357 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 4358 SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits); 4359 SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8); 4360 4361 // Gather the #bits with vpaddl (pairwise add.) 4362 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 4363 SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit, 4364 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), 4365 Cnt8); 4366 if (ElemTy == MVT::i16) 4367 return Cnt16; 4368 4369 EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32; 4370 SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit, 4371 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), 4372 Cnt16); 4373 if (ElemTy == MVT::i32) 4374 return Cnt32; 4375 4376 assert(ElemTy == MVT::i64); 4377 SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 4378 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), 4379 Cnt32); 4380 return Cnt64; 4381 } 4382 4383 if (!ST->hasV6T2Ops()) 4384 return SDValue(); 4385 4386 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 4387 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 4388 } 4389 4390 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 4391 /// for each 16-bit element from operand, repeated. The basic idea is to 4392 /// leverage vcnt to get the 8-bit counts, gather and add the results. 4393 /// 4394 /// Trace for v4i16: 4395 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 4396 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 4397 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 4398 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 4399 /// [b0 b1 b2 b3 b4 b5 b6 b7] 4400 /// +[b1 b0 b3 b2 b5 b4 b7 b6] 4401 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 4402 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 4403 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 4404 EVT VT = N->getValueType(0); 4405 SDLoc DL(N); 4406 4407 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 4408 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 4409 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 4410 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 4411 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 4412 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 4413 } 4414 4415 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 4416 /// bit-count for each 16-bit element from the operand. We need slightly 4417 /// different sequencing for v4i16 and v8i16 to stay within NEON's available 4418 /// 64/128-bit registers. 4419 /// 4420 /// Trace for v4i16: 4421 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 4422 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 4423 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 4424 /// v4i16:Extracted = [k0 k1 k2 k3 ] 4425 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 4426 EVT VT = N->getValueType(0); 4427 SDLoc DL(N); 4428 4429 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 4430 if (VT.is64BitVector()) { 4431 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 4432 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 4433 DAG.getIntPtrConstant(0, DL)); 4434 } else { 4435 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 4436 BitCounts, DAG.getIntPtrConstant(0, DL)); 4437 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 4438 } 4439 } 4440 4441 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 4442 /// bit-count for each 32-bit element from the operand. The idea here is 4443 /// to split the vector into 16-bit elements, leverage the 16-bit count 4444 /// routine, and then combine the results. 4445 /// 4446 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 4447 /// input = [v0 v1 ] (vi: 32-bit elements) 4448 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 4449 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 4450 /// vrev: N0 = [k1 k0 k3 k2 ] 4451 /// [k0 k1 k2 k3 ] 4452 /// N1 =+[k1 k0 k3 k2 ] 4453 /// [k0 k2 k1 k3 ] 4454 /// N2 =+[k1 k3 k0 k2 ] 4455 /// [k0 k2 k1 k3 ] 4456 /// Extended =+[k1 k3 k0 k2 ] 4457 /// [k0 k2 ] 4458 /// Extracted=+[k1 k3 ] 4459 /// 4460 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 4461 EVT VT = N->getValueType(0); 4462 SDLoc DL(N); 4463 4464 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 4465 4466 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 4467 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 4468 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 4469 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 4470 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 4471 4472 if (VT.is64BitVector()) { 4473 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 4474 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 4475 DAG.getIntPtrConstant(0, DL)); 4476 } else { 4477 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 4478 DAG.getIntPtrConstant(0, DL)); 4479 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 4480 } 4481 } 4482 4483 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 4484 const ARMSubtarget *ST) { 4485 EVT VT = N->getValueType(0); 4486 4487 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 4488 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 4489 VT == MVT::v4i16 || VT == MVT::v8i16) && 4490 "Unexpected type for custom ctpop lowering"); 4491 4492 if (VT.getVectorElementType() == MVT::i32) 4493 return lowerCTPOP32BitElements(N, DAG); 4494 else 4495 return lowerCTPOP16BitElements(N, DAG); 4496 } 4497 4498 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 4499 const ARMSubtarget *ST) { 4500 EVT VT = N->getValueType(0); 4501 SDLoc dl(N); 4502 4503 if (!VT.isVector()) 4504 return SDValue(); 4505 4506 // Lower vector shifts on NEON to use VSHL. 4507 assert(ST->hasNEON() && "unexpected vector shift"); 4508 4509 // Left shifts translate directly to the vshiftu intrinsic. 4510 if (N->getOpcode() == ISD::SHL) 4511 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 4512 DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl, 4513 MVT::i32), 4514 N->getOperand(0), N->getOperand(1)); 4515 4516 assert((N->getOpcode() == ISD::SRA || 4517 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 4518 4519 // NEON uses the same intrinsics for both left and right shifts. For 4520 // right shifts, the shift amounts are negative, so negate the vector of 4521 // shift amounts. 4522 EVT ShiftVT = N->getOperand(1).getValueType(); 4523 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 4524 getZeroVector(ShiftVT, DAG, dl), 4525 N->getOperand(1)); 4526 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 4527 Intrinsic::arm_neon_vshifts : 4528 Intrinsic::arm_neon_vshiftu); 4529 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 4530 DAG.getConstant(vshiftInt, dl, MVT::i32), 4531 N->getOperand(0), NegatedCount); 4532 } 4533 4534 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 4535 const ARMSubtarget *ST) { 4536 EVT VT = N->getValueType(0); 4537 SDLoc dl(N); 4538 4539 // We can get here for a node like i32 = ISD::SHL i32, i64 4540 if (VT != MVT::i64) 4541 return SDValue(); 4542 4543 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 4544 "Unknown shift to lower!"); 4545 4546 // We only lower SRA, SRL of 1 here, all others use generic lowering. 4547 if (!isa<ConstantSDNode>(N->getOperand(1)) || 4548 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 4549 return SDValue(); 4550 4551 // If we are in thumb mode, we don't have RRX. 4552 if (ST->isThumb1Only()) return SDValue(); 4553 4554 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 4555 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 4556 DAG.getConstant(0, dl, MVT::i32)); 4557 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 4558 DAG.getConstant(1, dl, MVT::i32)); 4559 4560 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 4561 // captures the result into a carry flag. 4562 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 4563 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); 4564 4565 // The low part is an ARMISD::RRX operand, which shifts the carry in. 4566 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 4567 4568 // Merge the pieces into a single i64 value. 4569 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4570 } 4571 4572 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 4573 SDValue TmpOp0, TmpOp1; 4574 bool Invert = false; 4575 bool Swap = false; 4576 unsigned Opc = 0; 4577 4578 SDValue Op0 = Op.getOperand(0); 4579 SDValue Op1 = Op.getOperand(1); 4580 SDValue CC = Op.getOperand(2); 4581 EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); 4582 EVT VT = Op.getValueType(); 4583 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4584 SDLoc dl(Op); 4585 4586 if (Op1.getValueType().isFloatingPoint()) { 4587 switch (SetCCOpcode) { 4588 default: llvm_unreachable("Illegal FP comparison"); 4589 case ISD::SETUNE: 4590 case ISD::SETNE: Invert = true; // Fallthrough 4591 case ISD::SETOEQ: 4592 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 4593 case ISD::SETOLT: 4594 case ISD::SETLT: Swap = true; // Fallthrough 4595 case ISD::SETOGT: 4596 case ISD::SETGT: Opc = ARMISD::VCGT; break; 4597 case ISD::SETOLE: 4598 case ISD::SETLE: Swap = true; // Fallthrough 4599 case ISD::SETOGE: 4600 case ISD::SETGE: Opc = ARMISD::VCGE; break; 4601 case ISD::SETUGE: Swap = true; // Fallthrough 4602 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 4603 case ISD::SETUGT: Swap = true; // Fallthrough 4604 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 4605 case ISD::SETUEQ: Invert = true; // Fallthrough 4606 case ISD::SETONE: 4607 // Expand this to (OLT | OGT). 4608 TmpOp0 = Op0; 4609 TmpOp1 = Op1; 4610 Opc = ISD::OR; 4611 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); 4612 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); 4613 break; 4614 case ISD::SETUO: Invert = true; // Fallthrough 4615 case ISD::SETO: 4616 // Expand this to (OLT | OGE). 4617 TmpOp0 = Op0; 4618 TmpOp1 = Op1; 4619 Opc = ISD::OR; 4620 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); 4621 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); 4622 break; 4623 } 4624 } else { 4625 // Integer comparisons. 4626 switch (SetCCOpcode) { 4627 default: llvm_unreachable("Illegal integer comparison"); 4628 case ISD::SETNE: Invert = true; 4629 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 4630 case ISD::SETLT: Swap = true; 4631 case ISD::SETGT: Opc = ARMISD::VCGT; break; 4632 case ISD::SETLE: Swap = true; 4633 case ISD::SETGE: Opc = ARMISD::VCGE; break; 4634 case ISD::SETULT: Swap = true; 4635 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 4636 case ISD::SETULE: Swap = true; 4637 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 4638 } 4639 4640 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 4641 if (Opc == ARMISD::VCEQ) { 4642 4643 SDValue AndOp; 4644 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4645 AndOp = Op0; 4646 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 4647 AndOp = Op1; 4648 4649 // Ignore bitconvert. 4650 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 4651 AndOp = AndOp.getOperand(0); 4652 4653 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 4654 Opc = ARMISD::VTST; 4655 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); 4656 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); 4657 Invert = !Invert; 4658 } 4659 } 4660 } 4661 4662 if (Swap) 4663 std::swap(Op0, Op1); 4664 4665 // If one of the operands is a constant vector zero, attempt to fold the 4666 // comparison to a specialized compare-against-zero form. 4667 SDValue SingleOp; 4668 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4669 SingleOp = Op0; 4670 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 4671 if (Opc == ARMISD::VCGE) 4672 Opc = ARMISD::VCLEZ; 4673 else if (Opc == ARMISD::VCGT) 4674 Opc = ARMISD::VCLTZ; 4675 SingleOp = Op1; 4676 } 4677 4678 SDValue Result; 4679 if (SingleOp.getNode()) { 4680 switch (Opc) { 4681 case ARMISD::VCEQ: 4682 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; 4683 case ARMISD::VCGE: 4684 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; 4685 case ARMISD::VCLEZ: 4686 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; 4687 case ARMISD::VCGT: 4688 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; 4689 case ARMISD::VCLTZ: 4690 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; 4691 default: 4692 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); 4693 } 4694 } else { 4695 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); 4696 } 4697 4698 Result = DAG.getSExtOrTrunc(Result, dl, VT); 4699 4700 if (Invert) 4701 Result = DAG.getNOT(dl, Result, VT); 4702 4703 return Result; 4704 } 4705 4706 /// isNEONModifiedImm - Check if the specified splat value corresponds to a 4707 /// valid vector constant for a NEON instruction with a "modified immediate" 4708 /// operand (e.g., VMOV). If so, return the encoded value. 4709 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 4710 unsigned SplatBitSize, SelectionDAG &DAG, 4711 SDLoc dl, EVT &VT, bool is128Bits, 4712 NEONModImmType type) { 4713 unsigned OpCmode, Imm; 4714 4715 // SplatBitSize is set to the smallest size that splats the vector, so a 4716 // zero vector will always have SplatBitSize == 8. However, NEON modified 4717 // immediate instructions others than VMOV do not support the 8-bit encoding 4718 // of a zero vector, and the default encoding of zero is supposed to be the 4719 // 32-bit version. 4720 if (SplatBits == 0) 4721 SplatBitSize = 32; 4722 4723 switch (SplatBitSize) { 4724 case 8: 4725 if (type != VMOVModImm) 4726 return SDValue(); 4727 // Any 1-byte value is OK. Op=0, Cmode=1110. 4728 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 4729 OpCmode = 0xe; 4730 Imm = SplatBits; 4731 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 4732 break; 4733 4734 case 16: 4735 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 4736 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 4737 if ((SplatBits & ~0xff) == 0) { 4738 // Value = 0x00nn: Op=x, Cmode=100x. 4739 OpCmode = 0x8; 4740 Imm = SplatBits; 4741 break; 4742 } 4743 if ((SplatBits & ~0xff00) == 0) { 4744 // Value = 0xnn00: Op=x, Cmode=101x. 4745 OpCmode = 0xa; 4746 Imm = SplatBits >> 8; 4747 break; 4748 } 4749 return SDValue(); 4750 4751 case 32: 4752 // NEON's 32-bit VMOV supports splat values where: 4753 // * only one byte is nonzero, or 4754 // * the least significant byte is 0xff and the second byte is nonzero, or 4755 // * the least significant 2 bytes are 0xff and the third is nonzero. 4756 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 4757 if ((SplatBits & ~0xff) == 0) { 4758 // Value = 0x000000nn: Op=x, Cmode=000x. 4759 OpCmode = 0; 4760 Imm = SplatBits; 4761 break; 4762 } 4763 if ((SplatBits & ~0xff00) == 0) { 4764 // Value = 0x0000nn00: Op=x, Cmode=001x. 4765 OpCmode = 0x2; 4766 Imm = SplatBits >> 8; 4767 break; 4768 } 4769 if ((SplatBits & ~0xff0000) == 0) { 4770 // Value = 0x00nn0000: Op=x, Cmode=010x. 4771 OpCmode = 0x4; 4772 Imm = SplatBits >> 16; 4773 break; 4774 } 4775 if ((SplatBits & ~0xff000000) == 0) { 4776 // Value = 0xnn000000: Op=x, Cmode=011x. 4777 OpCmode = 0x6; 4778 Imm = SplatBits >> 24; 4779 break; 4780 } 4781 4782 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 4783 if (type == OtherModImm) return SDValue(); 4784 4785 if ((SplatBits & ~0xffff) == 0 && 4786 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 4787 // Value = 0x0000nnff: Op=x, Cmode=1100. 4788 OpCmode = 0xc; 4789 Imm = SplatBits >> 8; 4790 break; 4791 } 4792 4793 if ((SplatBits & ~0xffffff) == 0 && 4794 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 4795 // Value = 0x00nnffff: Op=x, Cmode=1101. 4796 OpCmode = 0xd; 4797 Imm = SplatBits >> 16; 4798 break; 4799 } 4800 4801 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 4802 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 4803 // VMOV.I32. A (very) minor optimization would be to replicate the value 4804 // and fall through here to test for a valid 64-bit splat. But, then the 4805 // caller would also need to check and handle the change in size. 4806 return SDValue(); 4807 4808 case 64: { 4809 if (type != VMOVModImm) 4810 return SDValue(); 4811 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 4812 uint64_t BitMask = 0xff; 4813 uint64_t Val = 0; 4814 unsigned ImmMask = 1; 4815 Imm = 0; 4816 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 4817 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 4818 Val |= BitMask; 4819 Imm |= ImmMask; 4820 } else if ((SplatBits & BitMask) != 0) { 4821 return SDValue(); 4822 } 4823 BitMask <<= 8; 4824 ImmMask <<= 1; 4825 } 4826 4827 if (DAG.getDataLayout().isBigEndian()) 4828 // swap higher and lower 32 bit word 4829 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); 4830 4831 // Op=1, Cmode=1110. 4832 OpCmode = 0x1e; 4833 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 4834 break; 4835 } 4836 4837 default: 4838 llvm_unreachable("unexpected size for isNEONModifiedImm"); 4839 } 4840 4841 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 4842 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32); 4843 } 4844 4845 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 4846 const ARMSubtarget *ST) const { 4847 if (!ST->hasVFP3()) 4848 return SDValue(); 4849 4850 bool IsDouble = Op.getValueType() == MVT::f64; 4851 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 4852 4853 // Use the default (constant pool) lowering for double constants when we have 4854 // an SP-only FPU 4855 if (IsDouble && Subtarget->isFPOnlySP()) 4856 return SDValue(); 4857 4858 // Try splatting with a VMOV.f32... 4859 APFloat FPVal = CFP->getValueAPF(); 4860 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); 4861 4862 if (ImmVal != -1) { 4863 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { 4864 // We have code in place to select a valid ConstantFP already, no need to 4865 // do any mangling. 4866 return Op; 4867 } 4868 4869 // It's a float and we are trying to use NEON operations where 4870 // possible. Lower it to a splat followed by an extract. 4871 SDLoc DL(Op); 4872 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32); 4873 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 4874 NewVal); 4875 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 4876 DAG.getConstant(0, DL, MVT::i32)); 4877 } 4878 4879 // The rest of our options are NEON only, make sure that's allowed before 4880 // proceeding.. 4881 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) 4882 return SDValue(); 4883 4884 EVT VMovVT; 4885 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); 4886 4887 // It wouldn't really be worth bothering for doubles except for one very 4888 // important value, which does happen to match: 0.0. So make sure we don't do 4889 // anything stupid. 4890 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) 4891 return SDValue(); 4892 4893 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). 4894 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), 4895 VMovVT, false, VMOVModImm); 4896 if (NewVal != SDValue()) { 4897 SDLoc DL(Op); 4898 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 4899 NewVal); 4900 if (IsDouble) 4901 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 4902 4903 // It's a float: cast and extract a vector element. 4904 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4905 VecConstant); 4906 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4907 DAG.getConstant(0, DL, MVT::i32)); 4908 } 4909 4910 // Finally, try a VMVN.i32 4911 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, 4912 false, VMVNModImm); 4913 if (NewVal != SDValue()) { 4914 SDLoc DL(Op); 4915 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 4916 4917 if (IsDouble) 4918 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 4919 4920 // It's a float: cast and extract a vector element. 4921 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4922 VecConstant); 4923 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4924 DAG.getConstant(0, DL, MVT::i32)); 4925 } 4926 4927 return SDValue(); 4928 } 4929 4930 // check if an VEXT instruction can handle the shuffle mask when the 4931 // vector sources of the shuffle are the same. 4932 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { 4933 unsigned NumElts = VT.getVectorNumElements(); 4934 4935 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4936 if (M[0] < 0) 4937 return false; 4938 4939 Imm = M[0]; 4940 4941 // If this is a VEXT shuffle, the immediate value is the index of the first 4942 // element. The other shuffle indices must be the successive elements after 4943 // the first one. 4944 unsigned ExpectedElt = Imm; 4945 for (unsigned i = 1; i < NumElts; ++i) { 4946 // Increment the expected index. If it wraps around, just follow it 4947 // back to index zero and keep going. 4948 ++ExpectedElt; 4949 if (ExpectedElt == NumElts) 4950 ExpectedElt = 0; 4951 4952 if (M[i] < 0) continue; // ignore UNDEF indices 4953 if (ExpectedElt != static_cast<unsigned>(M[i])) 4954 return false; 4955 } 4956 4957 return true; 4958 } 4959 4960 4961 static bool isVEXTMask(ArrayRef<int> M, EVT VT, 4962 bool &ReverseVEXT, unsigned &Imm) { 4963 unsigned NumElts = VT.getVectorNumElements(); 4964 ReverseVEXT = false; 4965 4966 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4967 if (M[0] < 0) 4968 return false; 4969 4970 Imm = M[0]; 4971 4972 // If this is a VEXT shuffle, the immediate value is the index of the first 4973 // element. The other shuffle indices must be the successive elements after 4974 // the first one. 4975 unsigned ExpectedElt = Imm; 4976 for (unsigned i = 1; i < NumElts; ++i) { 4977 // Increment the expected index. If it wraps around, it may still be 4978 // a VEXT but the source vectors must be swapped. 4979 ExpectedElt += 1; 4980 if (ExpectedElt == NumElts * 2) { 4981 ExpectedElt = 0; 4982 ReverseVEXT = true; 4983 } 4984 4985 if (M[i] < 0) continue; // ignore UNDEF indices 4986 if (ExpectedElt != static_cast<unsigned>(M[i])) 4987 return false; 4988 } 4989 4990 // Adjust the index value if the source operands will be swapped. 4991 if (ReverseVEXT) 4992 Imm -= NumElts; 4993 4994 return true; 4995 } 4996 4997 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 4998 /// instruction with the specified blocksize. (The order of the elements 4999 /// within each block of the vector is reversed.) 5000 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 5001 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 5002 "Only possible block sizes for VREV are: 16, 32, 64"); 5003 5004 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 5005 if (EltSz == 64) 5006 return false; 5007 5008 unsigned NumElts = VT.getVectorNumElements(); 5009 unsigned BlockElts = M[0] + 1; 5010 // If the first shuffle index is UNDEF, be optimistic. 5011 if (M[0] < 0) 5012 BlockElts = BlockSize / EltSz; 5013 5014 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 5015 return false; 5016 5017 for (unsigned i = 0; i < NumElts; ++i) { 5018 if (M[i] < 0) continue; // ignore UNDEF indices 5019 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 5020 return false; 5021 } 5022 5023 return true; 5024 } 5025 5026 static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 5027 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 5028 // range, then 0 is placed into the resulting vector. So pretty much any mask 5029 // of 8 elements can work here. 5030 return VT == MVT::v8i8 && M.size() == 8; 5031 } 5032 5033 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 5034 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 5035 if (EltSz == 64) 5036 return false; 5037 5038 unsigned NumElts = VT.getVectorNumElements(); 5039 WhichResult = (M[0] == 0 ? 0 : 1); 5040 for (unsigned i = 0; i < NumElts; i += 2) { 5041 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 5042 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 5043 return false; 5044 } 5045 return true; 5046 } 5047 5048 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 5049 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 5050 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 5051 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 5052 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 5053 if (EltSz == 64) 5054 return false; 5055 5056 unsigned NumElts = VT.getVectorNumElements(); 5057 WhichResult = (M[0] == 0 ? 0 : 1); 5058 for (unsigned i = 0; i < NumElts; i += 2) { 5059 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 5060 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 5061 return false; 5062 } 5063 return true; 5064 } 5065 5066 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 5067 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 5068 if (EltSz == 64) 5069 return false; 5070 5071 unsigned NumElts = VT.getVectorNumElements(); 5072 WhichResult = (M[0] == 0 ? 0 : 1); 5073 for (unsigned i = 0; i != NumElts; ++i) { 5074 if (M[i] < 0) continue; // ignore UNDEF indices 5075 if ((unsigned) M[i] != 2 * i + WhichResult) 5076 return false; 5077 } 5078 5079 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 5080 if (VT.is64BitVector() && EltSz == 32) 5081 return false; 5082 5083 return true; 5084 } 5085 5086 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 5087 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 5088 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 5089 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 5090 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 5091 if (EltSz == 64) 5092 return false; 5093 5094 unsigned Half = VT.getVectorNumElements() / 2; 5095 WhichResult = (M[0] == 0 ? 0 : 1); 5096 for (unsigned j = 0; j != 2; ++j) { 5097 unsigned Idx = WhichResult; 5098 for (unsigned i = 0; i != Half; ++i) { 5099 int MIdx = M[i + j * Half]; 5100 if (MIdx >= 0 && (unsigned) MIdx != Idx) 5101 return false; 5102 Idx += 2; 5103 } 5104 } 5105 5106 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 5107 if (VT.is64BitVector() && EltSz == 32) 5108 return false; 5109 5110 return true; 5111 } 5112 5113 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 5114 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 5115 if (EltSz == 64) 5116 return false; 5117 5118 unsigned NumElts = VT.getVectorNumElements(); 5119 WhichResult = (M[0] == 0 ? 0 : 1); 5120 unsigned Idx = WhichResult * NumElts / 2; 5121 for (unsigned i = 0; i != NumElts; i += 2) { 5122 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 5123 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 5124 return false; 5125 Idx += 1; 5126 } 5127 5128 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 5129 if (VT.is64BitVector() && EltSz == 32) 5130 return false; 5131 5132 return true; 5133 } 5134 5135 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 5136 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 5137 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 5138 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 5139 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 5140 if (EltSz == 64) 5141 return false; 5142 5143 unsigned NumElts = VT.getVectorNumElements(); 5144 WhichResult = (M[0] == 0 ? 0 : 1); 5145 unsigned Idx = WhichResult * NumElts / 2; 5146 for (unsigned i = 0; i != NumElts; i += 2) { 5147 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 5148 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 5149 return false; 5150 Idx += 1; 5151 } 5152 5153 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 5154 if (VT.is64BitVector() && EltSz == 32) 5155 return false; 5156 5157 return true; 5158 } 5159 5160 /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), 5161 /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't. 5162 static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT, 5163 unsigned &WhichResult, 5164 bool &isV_UNDEF) { 5165 isV_UNDEF = false; 5166 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 5167 return ARMISD::VTRN; 5168 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 5169 return ARMISD::VUZP; 5170 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 5171 return ARMISD::VZIP; 5172 5173 isV_UNDEF = true; 5174 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5175 return ARMISD::VTRN; 5176 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5177 return ARMISD::VUZP; 5178 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5179 return ARMISD::VZIP; 5180 5181 return 0; 5182 } 5183 5184 /// \return true if this is a reverse operation on an vector. 5185 static bool isReverseMask(ArrayRef<int> M, EVT VT) { 5186 unsigned NumElts = VT.getVectorNumElements(); 5187 // Make sure the mask has the right size. 5188 if (NumElts != M.size()) 5189 return false; 5190 5191 // Look for <15, ..., 3, -1, 1, 0>. 5192 for (unsigned i = 0; i != NumElts; ++i) 5193 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) 5194 return false; 5195 5196 return true; 5197 } 5198 5199 // If N is an integer constant that can be moved into a register in one 5200 // instruction, return an SDValue of such a constant (will become a MOV 5201 // instruction). Otherwise return null. 5202 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 5203 const ARMSubtarget *ST, SDLoc dl) { 5204 uint64_t Val; 5205 if (!isa<ConstantSDNode>(N)) 5206 return SDValue(); 5207 Val = cast<ConstantSDNode>(N)->getZExtValue(); 5208 5209 if (ST->isThumb1Only()) { 5210 if (Val <= 255 || ~Val <= 255) 5211 return DAG.getConstant(Val, dl, MVT::i32); 5212 } else { 5213 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 5214 return DAG.getConstant(Val, dl, MVT::i32); 5215 } 5216 return SDValue(); 5217 } 5218 5219 // If this is a case we can't handle, return null and let the default 5220 // expansion code take care of it. 5221 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 5222 const ARMSubtarget *ST) const { 5223 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 5224 SDLoc dl(Op); 5225 EVT VT = Op.getValueType(); 5226 5227 APInt SplatBits, SplatUndef; 5228 unsigned SplatBitSize; 5229 bool HasAnyUndefs; 5230 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 5231 if (SplatBitSize <= 64) { 5232 // Check if an immediate VMOV works. 5233 EVT VmovVT; 5234 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 5235 SplatUndef.getZExtValue(), SplatBitSize, 5236 DAG, dl, VmovVT, VT.is128BitVector(), 5237 VMOVModImm); 5238 if (Val.getNode()) { 5239 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 5240 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 5241 } 5242 5243 // Try an immediate VMVN. 5244 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 5245 Val = isNEONModifiedImm(NegatedImm, 5246 SplatUndef.getZExtValue(), SplatBitSize, 5247 DAG, dl, VmovVT, VT.is128BitVector(), 5248 VMVNModImm); 5249 if (Val.getNode()) { 5250 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 5251 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 5252 } 5253 5254 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 5255 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 5256 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 5257 if (ImmVal != -1) { 5258 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32); 5259 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 5260 } 5261 } 5262 } 5263 } 5264 5265 // Scan through the operands to see if only one value is used. 5266 // 5267 // As an optimisation, even if more than one value is used it may be more 5268 // profitable to splat with one value then change some lanes. 5269 // 5270 // Heuristically we decide to do this if the vector has a "dominant" value, 5271 // defined as splatted to more than half of the lanes. 5272 unsigned NumElts = VT.getVectorNumElements(); 5273 bool isOnlyLowElement = true; 5274 bool usesOnlyOneValue = true; 5275 bool hasDominantValue = false; 5276 bool isConstant = true; 5277 5278 // Map of the number of times a particular SDValue appears in the 5279 // element list. 5280 DenseMap<SDValue, unsigned> ValueCounts; 5281 SDValue Value; 5282 for (unsigned i = 0; i < NumElts; ++i) { 5283 SDValue V = Op.getOperand(i); 5284 if (V.getOpcode() == ISD::UNDEF) 5285 continue; 5286 if (i > 0) 5287 isOnlyLowElement = false; 5288 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 5289 isConstant = false; 5290 5291 ValueCounts.insert(std::make_pair(V, 0)); 5292 unsigned &Count = ValueCounts[V]; 5293 5294 // Is this value dominant? (takes up more than half of the lanes) 5295 if (++Count > (NumElts / 2)) { 5296 hasDominantValue = true; 5297 Value = V; 5298 } 5299 } 5300 if (ValueCounts.size() != 1) 5301 usesOnlyOneValue = false; 5302 if (!Value.getNode() && ValueCounts.size() > 0) 5303 Value = ValueCounts.begin()->first; 5304 5305 if (ValueCounts.size() == 0) 5306 return DAG.getUNDEF(VT); 5307 5308 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. 5309 // Keep going if we are hitting this case. 5310 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) 5311 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 5312 5313 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5314 5315 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 5316 // i32 and try again. 5317 if (hasDominantValue && EltSize <= 32) { 5318 if (!isConstant) { 5319 SDValue N; 5320 5321 // If we are VDUPing a value that comes directly from a vector, that will 5322 // cause an unnecessary move to and from a GPR, where instead we could 5323 // just use VDUPLANE. We can only do this if the lane being extracted 5324 // is at a constant index, as the VDUP from lane instructions only have 5325 // constant-index forms. 5326 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5327 isa<ConstantSDNode>(Value->getOperand(1))) { 5328 // We need to create a new undef vector to use for the VDUPLANE if the 5329 // size of the vector from which we get the value is different than the 5330 // size of the vector that we need to create. We will insert the element 5331 // such that the register coalescer will remove unnecessary copies. 5332 if (VT != Value->getOperand(0).getValueType()) { 5333 ConstantSDNode *constIndex; 5334 constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); 5335 assert(constIndex && "The index is not a constant!"); 5336 unsigned index = constIndex->getAPIntValue().getLimitedValue() % 5337 VT.getVectorNumElements(); 5338 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 5339 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), 5340 Value, DAG.getConstant(index, dl, MVT::i32)), 5341 DAG.getConstant(index, dl, MVT::i32)); 5342 } else 5343 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 5344 Value->getOperand(0), Value->getOperand(1)); 5345 } else 5346 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); 5347 5348 if (!usesOnlyOneValue) { 5349 // The dominant value was splatted as 'N', but we now have to insert 5350 // all differing elements. 5351 for (unsigned I = 0; I < NumElts; ++I) { 5352 if (Op.getOperand(I) == Value) 5353 continue; 5354 SmallVector<SDValue, 3> Ops; 5355 Ops.push_back(N); 5356 Ops.push_back(Op.getOperand(I)); 5357 Ops.push_back(DAG.getConstant(I, dl, MVT::i32)); 5358 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); 5359 } 5360 } 5361 return N; 5362 } 5363 if (VT.getVectorElementType().isFloatingPoint()) { 5364 SmallVector<SDValue, 8> Ops; 5365 for (unsigned i = 0; i < NumElts; ++i) 5366 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 5367 Op.getOperand(i))); 5368 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 5369 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 5370 Val = LowerBUILD_VECTOR(Val, DAG, ST); 5371 if (Val.getNode()) 5372 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5373 } 5374 if (usesOnlyOneValue) { 5375 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 5376 if (isConstant && Val.getNode()) 5377 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 5378 } 5379 } 5380 5381 // If all elements are constants and the case above didn't get hit, fall back 5382 // to the default expansion, which will generate a load from the constant 5383 // pool. 5384 if (isConstant) 5385 return SDValue(); 5386 5387 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 5388 if (NumElts >= 4) { 5389 SDValue shuffle = ReconstructShuffle(Op, DAG); 5390 if (shuffle != SDValue()) 5391 return shuffle; 5392 } 5393 5394 // Vectors with 32- or 64-bit elements can be built by directly assigning 5395 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 5396 // will be legalized. 5397 if (EltSize >= 32) { 5398 // Do the expansion with floating-point types, since that is what the VFP 5399 // registers are defined to use, and since i64 is not legal. 5400 EVT EltVT = EVT::getFloatingPointVT(EltSize); 5401 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 5402 SmallVector<SDValue, 8> Ops; 5403 for (unsigned i = 0; i < NumElts; ++i) 5404 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 5405 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); 5406 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5407 } 5408 5409 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we 5410 // know the default expansion would otherwise fall back on something even 5411 // worse. For a vector with one or two non-undef values, that's 5412 // scalar_to_vector for the elements followed by a shuffle (provided the 5413 // shuffle is valid for the target) and materialization element by element 5414 // on the stack followed by a load for everything else. 5415 if (!isConstant && !usesOnlyOneValue) { 5416 SDValue Vec = DAG.getUNDEF(VT); 5417 for (unsigned i = 0 ; i < NumElts; ++i) { 5418 SDValue V = Op.getOperand(i); 5419 if (V.getOpcode() == ISD::UNDEF) 5420 continue; 5421 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32); 5422 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); 5423 } 5424 return Vec; 5425 } 5426 5427 return SDValue(); 5428 } 5429 5430 // Gather data to see if the operation can be modelled as a 5431 // shuffle in combination with VEXTs. 5432 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 5433 SelectionDAG &DAG) const { 5434 SDLoc dl(Op); 5435 EVT VT = Op.getValueType(); 5436 unsigned NumElts = VT.getVectorNumElements(); 5437 5438 SmallVector<SDValue, 2> SourceVecs; 5439 SmallVector<unsigned, 2> MinElts; 5440 SmallVector<unsigned, 2> MaxElts; 5441 5442 for (unsigned i = 0; i < NumElts; ++i) { 5443 SDValue V = Op.getOperand(i); 5444 if (V.getOpcode() == ISD::UNDEF) 5445 continue; 5446 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 5447 // A shuffle can only come from building a vector from various 5448 // elements of other vectors. 5449 return SDValue(); 5450 } else if (V.getOperand(0).getValueType().getVectorElementType() != 5451 VT.getVectorElementType()) { 5452 // This code doesn't know how to handle shuffles where the vector 5453 // element types do not match (this happens because type legalization 5454 // promotes the return type of EXTRACT_VECTOR_ELT). 5455 // FIXME: It might be appropriate to extend this code to handle 5456 // mismatched types. 5457 return SDValue(); 5458 } 5459 5460 // Record this extraction against the appropriate vector if possible... 5461 SDValue SourceVec = V.getOperand(0); 5462 // If the element number isn't a constant, we can't effectively 5463 // analyze what's going on. 5464 if (!isa<ConstantSDNode>(V.getOperand(1))) 5465 return SDValue(); 5466 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 5467 bool FoundSource = false; 5468 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 5469 if (SourceVecs[j] == SourceVec) { 5470 if (MinElts[j] > EltNo) 5471 MinElts[j] = EltNo; 5472 if (MaxElts[j] < EltNo) 5473 MaxElts[j] = EltNo; 5474 FoundSource = true; 5475 break; 5476 } 5477 } 5478 5479 // Or record a new source if not... 5480 if (!FoundSource) { 5481 SourceVecs.push_back(SourceVec); 5482 MinElts.push_back(EltNo); 5483 MaxElts.push_back(EltNo); 5484 } 5485 } 5486 5487 // Currently only do something sane when at most two source vectors 5488 // involved. 5489 if (SourceVecs.size() > 2) 5490 return SDValue(); 5491 5492 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 5493 int VEXTOffsets[2] = {0, 0}; 5494 5495 // This loop extracts the usage patterns of the source vectors 5496 // and prepares appropriate SDValues for a shuffle if possible. 5497 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 5498 if (SourceVecs[i].getValueType() == VT) { 5499 // No VEXT necessary 5500 ShuffleSrcs[i] = SourceVecs[i]; 5501 VEXTOffsets[i] = 0; 5502 continue; 5503 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 5504 // It probably isn't worth padding out a smaller vector just to 5505 // break it down again in a shuffle. 5506 return SDValue(); 5507 } 5508 5509 // Since only 64-bit and 128-bit vectors are legal on ARM and 5510 // we've eliminated the other cases... 5511 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 5512 "unexpected vector sizes in ReconstructShuffle"); 5513 5514 if (MaxElts[i] - MinElts[i] >= NumElts) { 5515 // Span too large for a VEXT to cope 5516 return SDValue(); 5517 } 5518 5519 if (MinElts[i] >= NumElts) { 5520 // The extraction can just take the second half 5521 VEXTOffsets[i] = NumElts; 5522 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5523 SourceVecs[i], 5524 DAG.getIntPtrConstant(NumElts, dl)); 5525 } else if (MaxElts[i] < NumElts) { 5526 // The extraction can just take the first half 5527 VEXTOffsets[i] = 0; 5528 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5529 SourceVecs[i], 5530 DAG.getIntPtrConstant(0, dl)); 5531 } else { 5532 // An actual VEXT is needed 5533 VEXTOffsets[i] = MinElts[i]; 5534 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5535 SourceVecs[i], 5536 DAG.getIntPtrConstant(0, dl)); 5537 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5538 SourceVecs[i], 5539 DAG.getIntPtrConstant(NumElts, dl)); 5540 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 5541 DAG.getConstant(VEXTOffsets[i], dl, 5542 MVT::i32)); 5543 } 5544 } 5545 5546 SmallVector<int, 8> Mask; 5547 5548 for (unsigned i = 0; i < NumElts; ++i) { 5549 SDValue Entry = Op.getOperand(i); 5550 if (Entry.getOpcode() == ISD::UNDEF) { 5551 Mask.push_back(-1); 5552 continue; 5553 } 5554 5555 SDValue ExtractVec = Entry.getOperand(0); 5556 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 5557 .getOperand(1))->getSExtValue(); 5558 if (ExtractVec == SourceVecs[0]) { 5559 Mask.push_back(ExtractElt - VEXTOffsets[0]); 5560 } else { 5561 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 5562 } 5563 } 5564 5565 // Final check before we try to produce nonsense... 5566 if (isShuffleMaskLegal(Mask, VT)) 5567 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 5568 &Mask[0]); 5569 5570 return SDValue(); 5571 } 5572 5573 /// isShuffleMaskLegal - Targets can use this to indicate that they only 5574 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5575 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5576 /// are assumed to be legal. 5577 bool 5578 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 5579 EVT VT) const { 5580 if (VT.getVectorNumElements() == 4 && 5581 (VT.is128BitVector() || VT.is64BitVector())) { 5582 unsigned PFIndexes[4]; 5583 for (unsigned i = 0; i != 4; ++i) { 5584 if (M[i] < 0) 5585 PFIndexes[i] = 8; 5586 else 5587 PFIndexes[i] = M[i]; 5588 } 5589 5590 // Compute the index in the perfect shuffle table. 5591 unsigned PFTableIndex = 5592 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5593 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5594 unsigned Cost = (PFEntry >> 30); 5595 5596 if (Cost <= 4) 5597 return true; 5598 } 5599 5600 bool ReverseVEXT, isV_UNDEF; 5601 unsigned Imm, WhichResult; 5602 5603 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5604 return (EltSize >= 32 || 5605 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 5606 isVREVMask(M, VT, 64) || 5607 isVREVMask(M, VT, 32) || 5608 isVREVMask(M, VT, 16) || 5609 isVEXTMask(M, VT, ReverseVEXT, Imm) || 5610 isVTBLMask(M, VT) || 5611 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) || 5612 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); 5613 } 5614 5615 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5616 /// the specified operations to build the shuffle. 5617 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5618 SDValue RHS, SelectionDAG &DAG, 5619 SDLoc dl) { 5620 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5621 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5622 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5623 5624 enum { 5625 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5626 OP_VREV, 5627 OP_VDUP0, 5628 OP_VDUP1, 5629 OP_VDUP2, 5630 OP_VDUP3, 5631 OP_VEXT1, 5632 OP_VEXT2, 5633 OP_VEXT3, 5634 OP_VUZPL, // VUZP, left result 5635 OP_VUZPR, // VUZP, right result 5636 OP_VZIPL, // VZIP, left result 5637 OP_VZIPR, // VZIP, right result 5638 OP_VTRNL, // VTRN, left result 5639 OP_VTRNR // VTRN, right result 5640 }; 5641 5642 if (OpNum == OP_COPY) { 5643 if (LHSID == (1*9+2)*9+3) return LHS; 5644 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5645 return RHS; 5646 } 5647 5648 SDValue OpLHS, OpRHS; 5649 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5650 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5651 EVT VT = OpLHS.getValueType(); 5652 5653 switch (OpNum) { 5654 default: llvm_unreachable("Unknown shuffle opcode!"); 5655 case OP_VREV: 5656 // VREV divides the vector in half and swaps within the half. 5657 if (VT.getVectorElementType() == MVT::i32 || 5658 VT.getVectorElementType() == MVT::f32) 5659 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 5660 // vrev <4 x i16> -> VREV32 5661 if (VT.getVectorElementType() == MVT::i16) 5662 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 5663 // vrev <4 x i8> -> VREV16 5664 assert(VT.getVectorElementType() == MVT::i8); 5665 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 5666 case OP_VDUP0: 5667 case OP_VDUP1: 5668 case OP_VDUP2: 5669 case OP_VDUP3: 5670 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 5671 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32)); 5672 case OP_VEXT1: 5673 case OP_VEXT2: 5674 case OP_VEXT3: 5675 return DAG.getNode(ARMISD::VEXT, dl, VT, 5676 OpLHS, OpRHS, 5677 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32)); 5678 case OP_VUZPL: 5679 case OP_VUZPR: 5680 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5681 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 5682 case OP_VZIPL: 5683 case OP_VZIPR: 5684 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5685 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 5686 case OP_VTRNL: 5687 case OP_VTRNR: 5688 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5689 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 5690 } 5691 } 5692 5693 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 5694 ArrayRef<int> ShuffleMask, 5695 SelectionDAG &DAG) { 5696 // Check to see if we can use the VTBL instruction. 5697 SDValue V1 = Op.getOperand(0); 5698 SDValue V2 = Op.getOperand(1); 5699 SDLoc DL(Op); 5700 5701 SmallVector<SDValue, 8> VTBLMask; 5702 for (ArrayRef<int>::iterator 5703 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 5704 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32)); 5705 5706 if (V2.getNode()->getOpcode() == ISD::UNDEF) 5707 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 5708 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); 5709 5710 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 5711 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); 5712 } 5713 5714 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, 5715 SelectionDAG &DAG) { 5716 SDLoc DL(Op); 5717 SDValue OpLHS = Op.getOperand(0); 5718 EVT VT = OpLHS.getValueType(); 5719 5720 assert((VT == MVT::v8i16 || VT == MVT::v16i8) && 5721 "Expect an v8i16/v16i8 type"); 5722 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); 5723 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, 5724 // extract the first 8 bytes into the top double word and the last 8 bytes 5725 // into the bottom double word. The v8i16 case is similar. 5726 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; 5727 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, 5728 DAG.getConstant(ExtractNum, DL, MVT::i32)); 5729 } 5730 5731 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 5732 SDValue V1 = Op.getOperand(0); 5733 SDValue V2 = Op.getOperand(1); 5734 SDLoc dl(Op); 5735 EVT VT = Op.getValueType(); 5736 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 5737 5738 // Convert shuffles that are directly supported on NEON to target-specific 5739 // DAG nodes, instead of keeping them as shuffles and matching them again 5740 // during code selection. This is more efficient and avoids the possibility 5741 // of inconsistencies between legalization and selection. 5742 // FIXME: floating-point vectors should be canonicalized to integer vectors 5743 // of the same time so that they get CSEd properly. 5744 ArrayRef<int> ShuffleMask = SVN->getMask(); 5745 5746 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5747 if (EltSize <= 32) { 5748 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 5749 int Lane = SVN->getSplatIndex(); 5750 // If this is undef splat, generate it via "just" vdup, if possible. 5751 if (Lane == -1) Lane = 0; 5752 5753 // Test if V1 is a SCALAR_TO_VECTOR. 5754 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5755 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5756 } 5757 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 5758 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 5759 // reaches it). 5760 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 5761 !isa<ConstantSDNode>(V1.getOperand(0))) { 5762 bool IsScalarToVector = true; 5763 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 5764 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 5765 IsScalarToVector = false; 5766 break; 5767 } 5768 if (IsScalarToVector) 5769 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5770 } 5771 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 5772 DAG.getConstant(Lane, dl, MVT::i32)); 5773 } 5774 5775 bool ReverseVEXT; 5776 unsigned Imm; 5777 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 5778 if (ReverseVEXT) 5779 std::swap(V1, V2); 5780 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 5781 DAG.getConstant(Imm, dl, MVT::i32)); 5782 } 5783 5784 if (isVREVMask(ShuffleMask, VT, 64)) 5785 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 5786 if (isVREVMask(ShuffleMask, VT, 32)) 5787 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 5788 if (isVREVMask(ShuffleMask, VT, 16)) 5789 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 5790 5791 if (V2->getOpcode() == ISD::UNDEF && 5792 isSingletonVEXTMask(ShuffleMask, VT, Imm)) { 5793 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, 5794 DAG.getConstant(Imm, dl, MVT::i32)); 5795 } 5796 5797 // Check for Neon shuffles that modify both input vectors in place. 5798 // If both results are used, i.e., if there are two shuffles with the same 5799 // source operands and with masks corresponding to both results of one of 5800 // these operations, DAG memoization will ensure that a single node is 5801 // used for both shuffles. 5802 unsigned WhichResult; 5803 bool isV_UNDEF; 5804 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( 5805 ShuffleMask, VT, WhichResult, isV_UNDEF)) { 5806 if (isV_UNDEF) 5807 V2 = V1; 5808 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2) 5809 .getValue(WhichResult); 5810 } 5811 5812 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize 5813 // shuffles that produce a result larger than their operands with: 5814 // shuffle(concat(v1, undef), concat(v2, undef)) 5815 // -> 5816 // shuffle(concat(v1, v2), undef) 5817 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine). 5818 // 5819 // This is useful in the general case, but there are special cases where 5820 // native shuffles produce larger results: the two-result ops. 5821 // 5822 // Look through the concat when lowering them: 5823 // shuffle(concat(v1, v2), undef) 5824 // -> 5825 // concat(VZIP(v1, v2):0, :1) 5826 // 5827 if (V1->getOpcode() == ISD::CONCAT_VECTORS && 5828 V2->getOpcode() == ISD::UNDEF) { 5829 SDValue SubV1 = V1->getOperand(0); 5830 SDValue SubV2 = V1->getOperand(1); 5831 EVT SubVT = SubV1.getValueType(); 5832 5833 // We expect these to have been canonicalized to -1. 5834 assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) { 5835 return i < (int)VT.getVectorNumElements(); 5836 }) && "Unexpected shuffle index into UNDEF operand!"); 5837 5838 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( 5839 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) { 5840 if (isV_UNDEF) 5841 SubV2 = SubV1; 5842 assert((WhichResult == 0) && 5843 "In-place shuffle of concat can only have one result!"); 5844 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT), 5845 SubV1, SubV2); 5846 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0), 5847 Res.getValue(1)); 5848 } 5849 } 5850 } 5851 5852 // If the shuffle is not directly supported and it has 4 elements, use 5853 // the PerfectShuffle-generated table to synthesize it from other shuffles. 5854 unsigned NumElts = VT.getVectorNumElements(); 5855 if (NumElts == 4) { 5856 unsigned PFIndexes[4]; 5857 for (unsigned i = 0; i != 4; ++i) { 5858 if (ShuffleMask[i] < 0) 5859 PFIndexes[i] = 8; 5860 else 5861 PFIndexes[i] = ShuffleMask[i]; 5862 } 5863 5864 // Compute the index in the perfect shuffle table. 5865 unsigned PFTableIndex = 5866 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5867 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5868 unsigned Cost = (PFEntry >> 30); 5869 5870 if (Cost <= 4) 5871 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5872 } 5873 5874 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 5875 if (EltSize >= 32) { 5876 // Do the expansion with floating-point types, since that is what the VFP 5877 // registers are defined to use, and since i64 is not legal. 5878 EVT EltVT = EVT::getFloatingPointVT(EltSize); 5879 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 5880 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 5881 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 5882 SmallVector<SDValue, 8> Ops; 5883 for (unsigned i = 0; i < NumElts; ++i) { 5884 if (ShuffleMask[i] < 0) 5885 Ops.push_back(DAG.getUNDEF(EltVT)); 5886 else 5887 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 5888 ShuffleMask[i] < (int)NumElts ? V1 : V2, 5889 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 5890 dl, MVT::i32))); 5891 } 5892 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); 5893 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5894 } 5895 5896 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) 5897 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); 5898 5899 if (VT == MVT::v8i8) { 5900 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 5901 if (NewOp.getNode()) 5902 return NewOp; 5903 } 5904 5905 return SDValue(); 5906 } 5907 5908 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5909 // INSERT_VECTOR_ELT is legal only for immediate indexes. 5910 SDValue Lane = Op.getOperand(2); 5911 if (!isa<ConstantSDNode>(Lane)) 5912 return SDValue(); 5913 5914 return Op; 5915 } 5916 5917 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5918 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 5919 SDValue Lane = Op.getOperand(1); 5920 if (!isa<ConstantSDNode>(Lane)) 5921 return SDValue(); 5922 5923 SDValue Vec = Op.getOperand(0); 5924 if (Op.getValueType() == MVT::i32 && 5925 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 5926 SDLoc dl(Op); 5927 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 5928 } 5929 5930 return Op; 5931 } 5932 5933 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 5934 // The only time a CONCAT_VECTORS operation can have legal types is when 5935 // two 64-bit vectors are concatenated to a 128-bit vector. 5936 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 5937 "unexpected CONCAT_VECTORS"); 5938 SDLoc dl(Op); 5939 SDValue Val = DAG.getUNDEF(MVT::v2f64); 5940 SDValue Op0 = Op.getOperand(0); 5941 SDValue Op1 = Op.getOperand(1); 5942 if (Op0.getOpcode() != ISD::UNDEF) 5943 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5944 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 5945 DAG.getIntPtrConstant(0, dl)); 5946 if (Op1.getOpcode() != ISD::UNDEF) 5947 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5948 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 5949 DAG.getIntPtrConstant(1, dl)); 5950 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 5951 } 5952 5953 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 5954 /// element has been zero/sign-extended, depending on the isSigned parameter, 5955 /// from an integer type half its size. 5956 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 5957 bool isSigned) { 5958 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 5959 EVT VT = N->getValueType(0); 5960 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 5961 SDNode *BVN = N->getOperand(0).getNode(); 5962 if (BVN->getValueType(0) != MVT::v4i32 || 5963 BVN->getOpcode() != ISD::BUILD_VECTOR) 5964 return false; 5965 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; 5966 unsigned HiElt = 1 - LoElt; 5967 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 5968 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 5969 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 5970 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 5971 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 5972 return false; 5973 if (isSigned) { 5974 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 5975 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 5976 return true; 5977 } else { 5978 if (Hi0->isNullValue() && Hi1->isNullValue()) 5979 return true; 5980 } 5981 return false; 5982 } 5983 5984 if (N->getOpcode() != ISD::BUILD_VECTOR) 5985 return false; 5986 5987 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 5988 SDNode *Elt = N->getOperand(i).getNode(); 5989 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 5990 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5991 unsigned HalfSize = EltSize / 2; 5992 if (isSigned) { 5993 if (!isIntN(HalfSize, C->getSExtValue())) 5994 return false; 5995 } else { 5996 if (!isUIntN(HalfSize, C->getZExtValue())) 5997 return false; 5998 } 5999 continue; 6000 } 6001 return false; 6002 } 6003 6004 return true; 6005 } 6006 6007 /// isSignExtended - Check if a node is a vector value that is sign-extended 6008 /// or a constant BUILD_VECTOR with sign-extended elements. 6009 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 6010 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 6011 return true; 6012 if (isExtendedBUILD_VECTOR(N, DAG, true)) 6013 return true; 6014 return false; 6015 } 6016 6017 /// isZeroExtended - Check if a node is a vector value that is zero-extended 6018 /// or a constant BUILD_VECTOR with zero-extended elements. 6019 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 6020 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 6021 return true; 6022 if (isExtendedBUILD_VECTOR(N, DAG, false)) 6023 return true; 6024 return false; 6025 } 6026 6027 static EVT getExtensionTo64Bits(const EVT &OrigVT) { 6028 if (OrigVT.getSizeInBits() >= 64) 6029 return OrigVT; 6030 6031 assert(OrigVT.isSimple() && "Expecting a simple value type"); 6032 6033 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; 6034 switch (OrigSimpleTy) { 6035 default: llvm_unreachable("Unexpected Vector Type"); 6036 case MVT::v2i8: 6037 case MVT::v2i16: 6038 return MVT::v2i32; 6039 case MVT::v4i8: 6040 return MVT::v4i16; 6041 } 6042 } 6043 6044 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total 6045 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. 6046 /// We insert the required extension here to get the vector to fill a D register. 6047 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, 6048 const EVT &OrigTy, 6049 const EVT &ExtTy, 6050 unsigned ExtOpcode) { 6051 // The vector originally had a size of OrigTy. It was then extended to ExtTy. 6052 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than 6053 // 64-bits we need to insert a new extension so that it will be 64-bits. 6054 assert(ExtTy.is128BitVector() && "Unexpected extension size"); 6055 if (OrigTy.getSizeInBits() >= 64) 6056 return N; 6057 6058 // Must extend size to at least 64 bits to be used as an operand for VMULL. 6059 EVT NewVT = getExtensionTo64Bits(OrigTy); 6060 6061 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); 6062 } 6063 6064 /// SkipLoadExtensionForVMULL - return a load of the original vector size that 6065 /// does not do any sign/zero extension. If the original vector is less 6066 /// than 64 bits, an appropriate extension will be added after the load to 6067 /// reach a total size of 64 bits. We have to add the extension separately 6068 /// because ARM does not have a sign/zero extending load for vectors. 6069 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { 6070 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); 6071 6072 // The load already has the right type. 6073 if (ExtendedTy == LD->getMemoryVT()) 6074 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), 6075 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 6076 LD->isNonTemporal(), LD->isInvariant(), 6077 LD->getAlignment()); 6078 6079 // We need to create a zextload/sextload. We cannot just create a load 6080 // followed by a zext/zext node because LowerMUL is also run during normal 6081 // operation legalization where we can't create illegal types. 6082 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, 6083 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), 6084 LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), 6085 LD->isNonTemporal(), LD->getAlignment()); 6086 } 6087 6088 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, 6089 /// extending load, or BUILD_VECTOR with extended elements, return the 6090 /// unextended value. The unextended vector should be 64 bits so that it can 6091 /// be used as an operand to a VMULL instruction. If the original vector size 6092 /// before extension is less than 64 bits we add a an extension to resize 6093 /// the vector to 64 bits. 6094 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { 6095 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 6096 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, 6097 N->getOperand(0)->getValueType(0), 6098 N->getValueType(0), 6099 N->getOpcode()); 6100 6101 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 6102 return SkipLoadExtensionForVMULL(LD, DAG); 6103 6104 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 6105 // have been legalized as a BITCAST from v4i32. 6106 if (N->getOpcode() == ISD::BITCAST) { 6107 SDNode *BVN = N->getOperand(0).getNode(); 6108 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 6109 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 6110 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; 6111 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, 6112 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 6113 } 6114 // Construct a new BUILD_VECTOR with elements truncated to half the size. 6115 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 6116 EVT VT = N->getValueType(0); 6117 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 6118 unsigned NumElts = VT.getVectorNumElements(); 6119 MVT TruncVT = MVT::getIntegerVT(EltSize); 6120 SmallVector<SDValue, 8> Ops; 6121 SDLoc dl(N); 6122 for (unsigned i = 0; i != NumElts; ++i) { 6123 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 6124 const APInt &CInt = C->getAPIntValue(); 6125 // Element types smaller than 32 bits are not legal, so use i32 elements. 6126 // The values are implicitly truncated so sext vs. zext doesn't matter. 6127 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); 6128 } 6129 return DAG.getNode(ISD::BUILD_VECTOR, dl, 6130 MVT::getVectorVT(TruncVT, NumElts), Ops); 6131 } 6132 6133 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 6134 unsigned Opcode = N->getOpcode(); 6135 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 6136 SDNode *N0 = N->getOperand(0).getNode(); 6137 SDNode *N1 = N->getOperand(1).getNode(); 6138 return N0->hasOneUse() && N1->hasOneUse() && 6139 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 6140 } 6141 return false; 6142 } 6143 6144 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 6145 unsigned Opcode = N->getOpcode(); 6146 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 6147 SDNode *N0 = N->getOperand(0).getNode(); 6148 SDNode *N1 = N->getOperand(1).getNode(); 6149 return N0->hasOneUse() && N1->hasOneUse() && 6150 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 6151 } 6152 return false; 6153 } 6154 6155 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 6156 // Multiplications are only custom-lowered for 128-bit vectors so that 6157 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 6158 EVT VT = Op.getValueType(); 6159 assert(VT.is128BitVector() && VT.isInteger() && 6160 "unexpected type for custom-lowering ISD::MUL"); 6161 SDNode *N0 = Op.getOperand(0).getNode(); 6162 SDNode *N1 = Op.getOperand(1).getNode(); 6163 unsigned NewOpc = 0; 6164 bool isMLA = false; 6165 bool isN0SExt = isSignExtended(N0, DAG); 6166 bool isN1SExt = isSignExtended(N1, DAG); 6167 if (isN0SExt && isN1SExt) 6168 NewOpc = ARMISD::VMULLs; 6169 else { 6170 bool isN0ZExt = isZeroExtended(N0, DAG); 6171 bool isN1ZExt = isZeroExtended(N1, DAG); 6172 if (isN0ZExt && isN1ZExt) 6173 NewOpc = ARMISD::VMULLu; 6174 else if (isN1SExt || isN1ZExt) { 6175 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 6176 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 6177 if (isN1SExt && isAddSubSExt(N0, DAG)) { 6178 NewOpc = ARMISD::VMULLs; 6179 isMLA = true; 6180 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 6181 NewOpc = ARMISD::VMULLu; 6182 isMLA = true; 6183 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 6184 std::swap(N0, N1); 6185 NewOpc = ARMISD::VMULLu; 6186 isMLA = true; 6187 } 6188 } 6189 6190 if (!NewOpc) { 6191 if (VT == MVT::v2i64) 6192 // Fall through to expand this. It is not legal. 6193 return SDValue(); 6194 else 6195 // Other vector multiplications are legal. 6196 return Op; 6197 } 6198 } 6199 6200 // Legalize to a VMULL instruction. 6201 SDLoc DL(Op); 6202 SDValue Op0; 6203 SDValue Op1 = SkipExtensionForVMULL(N1, DAG); 6204 if (!isMLA) { 6205 Op0 = SkipExtensionForVMULL(N0, DAG); 6206 assert(Op0.getValueType().is64BitVector() && 6207 Op1.getValueType().is64BitVector() && 6208 "unexpected types for extended operands to VMULL"); 6209 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 6210 } 6211 6212 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 6213 // isel lowering to take advantage of no-stall back to back vmul + vmla. 6214 // vmull q0, d4, d6 6215 // vmlal q0, d5, d6 6216 // is faster than 6217 // vaddl q0, d4, d5 6218 // vmovl q1, d6 6219 // vmul q0, q0, q1 6220 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); 6221 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); 6222 EVT Op1VT = Op1.getValueType(); 6223 return DAG.getNode(N0->getOpcode(), DL, VT, 6224 DAG.getNode(NewOpc, DL, VT, 6225 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 6226 DAG.getNode(NewOpc, DL, VT, 6227 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 6228 } 6229 6230 static SDValue 6231 LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { 6232 // Convert to float 6233 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 6234 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 6235 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 6236 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 6237 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 6238 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 6239 // Get reciprocal estimate. 6240 // float4 recip = vrecpeq_f32(yf); 6241 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 6242 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), 6243 Y); 6244 // Because char has a smaller range than uchar, we can actually get away 6245 // without any newton steps. This requires that we use a weird bias 6246 // of 0xb000, however (again, this has been exhaustively tested). 6247 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 6248 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 6249 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 6250 Y = DAG.getConstant(0xb000, dl, MVT::i32); 6251 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 6252 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 6253 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 6254 // Convert back to short. 6255 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 6256 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 6257 return X; 6258 } 6259 6260 static SDValue 6261 LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { 6262 SDValue N2; 6263 // Convert to float. 6264 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 6265 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 6266 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 6267 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 6268 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 6269 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 6270 6271 // Use reciprocal estimate and one refinement step. 6272 // float4 recip = vrecpeq_f32(yf); 6273 // recip *= vrecpsq_f32(yf, recip); 6274 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 6275 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), 6276 N1); 6277 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 6278 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), 6279 N1, N2); 6280 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 6281 // Because short has a smaller range than ushort, we can actually get away 6282 // with only a single newton step. This requires that we use a weird bias 6283 // of 89, however (again, this has been exhaustively tested). 6284 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 6285 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 6286 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 6287 N1 = DAG.getConstant(0x89, dl, MVT::i32); 6288 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 6289 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 6290 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 6291 // Convert back to integer and return. 6292 // return vmovn_s32(vcvt_s32_f32(result)); 6293 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 6294 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 6295 return N0; 6296 } 6297 6298 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 6299 EVT VT = Op.getValueType(); 6300 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 6301 "unexpected type for custom-lowering ISD::SDIV"); 6302 6303 SDLoc dl(Op); 6304 SDValue N0 = Op.getOperand(0); 6305 SDValue N1 = Op.getOperand(1); 6306 SDValue N2, N3; 6307 6308 if (VT == MVT::v8i8) { 6309 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 6310 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 6311 6312 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 6313 DAG.getIntPtrConstant(4, dl)); 6314 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 6315 DAG.getIntPtrConstant(4, dl)); 6316 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 6317 DAG.getIntPtrConstant(0, dl)); 6318 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 6319 DAG.getIntPtrConstant(0, dl)); 6320 6321 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 6322 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 6323 6324 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 6325 N0 = LowerCONCAT_VECTORS(N0, DAG); 6326 6327 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 6328 return N0; 6329 } 6330 return LowerSDIV_v4i16(N0, N1, dl, DAG); 6331 } 6332 6333 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 6334 EVT VT = Op.getValueType(); 6335 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 6336 "unexpected type for custom-lowering ISD::UDIV"); 6337 6338 SDLoc dl(Op); 6339 SDValue N0 = Op.getOperand(0); 6340 SDValue N1 = Op.getOperand(1); 6341 SDValue N2, N3; 6342 6343 if (VT == MVT::v8i8) { 6344 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 6345 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 6346 6347 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 6348 DAG.getIntPtrConstant(4, dl)); 6349 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 6350 DAG.getIntPtrConstant(4, dl)); 6351 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 6352 DAG.getIntPtrConstant(0, dl)); 6353 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 6354 DAG.getIntPtrConstant(0, dl)); 6355 6356 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 6357 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 6358 6359 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 6360 N0 = LowerCONCAT_VECTORS(N0, DAG); 6361 6362 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 6363 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl, 6364 MVT::i32), 6365 N0); 6366 return N0; 6367 } 6368 6369 // v4i16 sdiv ... Convert to float. 6370 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 6371 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 6372 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 6373 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 6374 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 6375 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 6376 6377 // Use reciprocal estimate and two refinement steps. 6378 // float4 recip = vrecpeq_f32(yf); 6379 // recip *= vrecpsq_f32(yf, recip); 6380 // recip *= vrecpsq_f32(yf, recip); 6381 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 6382 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), 6383 BN1); 6384 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 6385 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), 6386 BN1, N2); 6387 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 6388 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 6389 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), 6390 BN1, N2); 6391 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 6392 // Simply multiplying by the reciprocal estimate can leave us a few ulps 6393 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 6394 // and that it will never cause us to return an answer too large). 6395 // float4 result = as_float4(as_int4(xf*recip) + 2); 6396 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 6397 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 6398 N1 = DAG.getConstant(2, dl, MVT::i32); 6399 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 6400 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 6401 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 6402 // Convert back to integer and return. 6403 // return vmovn_u32(vcvt_s32_f32(result)); 6404 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 6405 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 6406 return N0; 6407 } 6408 6409 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 6410 EVT VT = Op.getNode()->getValueType(0); 6411 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 6412 6413 unsigned Opc; 6414 bool ExtraOp = false; 6415 switch (Op.getOpcode()) { 6416 default: llvm_unreachable("Invalid code"); 6417 case ISD::ADDC: Opc = ARMISD::ADDC; break; 6418 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 6419 case ISD::SUBC: Opc = ARMISD::SUBC; break; 6420 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 6421 } 6422 6423 if (!ExtraOp) 6424 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 6425 Op.getOperand(1)); 6426 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 6427 Op.getOperand(1), Op.getOperand(2)); 6428 } 6429 6430 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { 6431 assert(Subtarget->isTargetDarwin()); 6432 6433 // For iOS, we want to call an alternative entry point: __sincos_stret, 6434 // return values are passed via sret. 6435 SDLoc dl(Op); 6436 SDValue Arg = Op.getOperand(0); 6437 EVT ArgVT = Arg.getValueType(); 6438 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 6439 auto PtrVT = getPointerTy(DAG.getDataLayout()); 6440 6441 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 6442 6443 // Pair of floats / doubles used to pass the result. 6444 StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); 6445 6446 // Create stack object for sret. 6447 auto &DL = DAG.getDataLayout(); 6448 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); 6449 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); 6450 int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); 6451 SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); 6452 6453 ArgListTy Args; 6454 ArgListEntry Entry; 6455 6456 Entry.Node = SRet; 6457 Entry.Ty = RetTy->getPointerTo(); 6458 Entry.isSExt = false; 6459 Entry.isZExt = false; 6460 Entry.isSRet = true; 6461 Args.push_back(Entry); 6462 6463 Entry.Node = Arg; 6464 Entry.Ty = ArgTy; 6465 Entry.isSExt = false; 6466 Entry.isZExt = false; 6467 Args.push_back(Entry); 6468 6469 const char *LibcallName = (ArgVT == MVT::f64) 6470 ? "__sincos_stret" : "__sincosf_stret"; 6471 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); 6472 6473 TargetLowering::CallLoweringInfo CLI(DAG); 6474 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) 6475 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, 6476 std::move(Args), 0) 6477 .setDiscardResult(); 6478 6479 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 6480 6481 SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, 6482 MachinePointerInfo(), false, false, false, 0); 6483 6484 // Address of cos field. 6485 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, 6486 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); 6487 SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, 6488 MachinePointerInfo(), false, false, false, 0); 6489 6490 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); 6491 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, 6492 LoadSin.getValue(0), LoadCos.getValue(0)); 6493 } 6494 6495 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 6496 // Monotonic load/store is legal for all targets 6497 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 6498 return Op; 6499 6500 // Acquire/Release load/store is not legal for targets without a 6501 // dmb or equivalent available. 6502 return SDValue(); 6503 } 6504 6505 static void ReplaceREADCYCLECOUNTER(SDNode *N, 6506 SmallVectorImpl<SDValue> &Results, 6507 SelectionDAG &DAG, 6508 const ARMSubtarget *Subtarget) { 6509 SDLoc DL(N); 6510 SDValue Cycles32, OutChain; 6511 6512 if (Subtarget->hasPerfMon()) { 6513 // Under Power Management extensions, the cycle-count is: 6514 // mrc p15, #0, <Rt>, c9, c13, #0 6515 SDValue Ops[] = { N->getOperand(0), // Chain 6516 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), 6517 DAG.getConstant(15, DL, MVT::i32), 6518 DAG.getConstant(0, DL, MVT::i32), 6519 DAG.getConstant(9, DL, MVT::i32), 6520 DAG.getConstant(13, DL, MVT::i32), 6521 DAG.getConstant(0, DL, MVT::i32) 6522 }; 6523 6524 Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, 6525 DAG.getVTList(MVT::i32, MVT::Other), Ops); 6526 OutChain = Cycles32.getValue(1); 6527 } else { 6528 // Intrinsic is defined to return 0 on unsupported platforms. Technically 6529 // there are older ARM CPUs that have implementation-specific ways of 6530 // obtaining this information (FIXME!). 6531 Cycles32 = DAG.getConstant(0, DL, MVT::i32); 6532 OutChain = DAG.getEntryNode(); 6533 } 6534 6535 6536 SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 6537 Cycles32, DAG.getConstant(0, DL, MVT::i32)); 6538 Results.push_back(Cycles64); 6539 Results.push_back(OutChain); 6540 } 6541 6542 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6543 switch (Op.getOpcode()) { 6544 default: llvm_unreachable("Don't know how to custom lower this!"); 6545 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); 6546 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6547 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 6548 case ISD::GlobalAddress: 6549 switch (Subtarget->getTargetTriple().getObjectFormat()) { 6550 default: llvm_unreachable("unknown object format"); 6551 case Triple::COFF: 6552 return LowerGlobalAddressWindows(Op, DAG); 6553 case Triple::ELF: 6554 return LowerGlobalAddressELF(Op, DAG); 6555 case Triple::MachO: 6556 return LowerGlobalAddressDarwin(Op, DAG); 6557 } 6558 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6559 case ISD::SELECT: return LowerSELECT(Op, DAG); 6560 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 6561 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 6562 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 6563 case ISD::VASTART: return LowerVASTART(Op, DAG); 6564 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 6565 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 6566 case ISD::SINT_TO_FP: 6567 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 6568 case ISD::FP_TO_SINT: 6569 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 6570 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 6571 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6572 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6573 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 6574 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 6575 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 6576 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 6577 Subtarget); 6578 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 6579 case ISD::SHL: 6580 case ISD::SRL: 6581 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 6582 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 6583 case ISD::SRL_PARTS: 6584 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 6585 case ISD::CTTZ: 6586 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 6587 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); 6588 case ISD::SETCC: return LowerVSETCC(Op, DAG); 6589 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); 6590 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 6591 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6592 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 6593 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 6594 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 6595 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6596 case ISD::MUL: return LowerMUL(Op, DAG); 6597 case ISD::SDIV: return LowerSDIV(Op, DAG); 6598 case ISD::UDIV: return LowerUDIV(Op, DAG); 6599 case ISD::ADDC: 6600 case ISD::ADDE: 6601 case ISD::SUBC: 6602 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 6603 case ISD::SADDO: 6604 case ISD::UADDO: 6605 case ISD::SSUBO: 6606 case ISD::USUBO: 6607 return LowerXALUO(Op, DAG); 6608 case ISD::ATOMIC_LOAD: 6609 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 6610 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); 6611 case ISD::SDIVREM: 6612 case ISD::UDIVREM: return LowerDivRem(Op, DAG); 6613 case ISD::DYNAMIC_STACKALLOC: 6614 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) 6615 return LowerDYNAMIC_STACKALLOC(Op, DAG); 6616 llvm_unreachable("Don't know how to custom lower this!"); 6617 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); 6618 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); 6619 } 6620 } 6621 6622 /// ReplaceNodeResults - Replace the results of node with an illegal result 6623 /// type with new values built out of custom code. 6624 void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 6625 SmallVectorImpl<SDValue>&Results, 6626 SelectionDAG &DAG) const { 6627 SDValue Res; 6628 switch (N->getOpcode()) { 6629 default: 6630 llvm_unreachable("Don't know how to custom expand this!"); 6631 case ISD::READ_REGISTER: 6632 ExpandREAD_REGISTER(N, Results, DAG); 6633 break; 6634 case ISD::BITCAST: 6635 Res = ExpandBITCAST(N, DAG); 6636 break; 6637 case ISD::SRL: 6638 case ISD::SRA: 6639 Res = Expand64BitShift(N, DAG, Subtarget); 6640 break; 6641 case ISD::READCYCLECOUNTER: 6642 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); 6643 return; 6644 } 6645 if (Res.getNode()) 6646 Results.push_back(Res); 6647 } 6648 6649 //===----------------------------------------------------------------------===// 6650 // ARM Scheduler Hooks 6651 //===----------------------------------------------------------------------===// 6652 6653 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 6654 /// registers the function context. 6655 void ARMTargetLowering:: 6656 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 6657 MachineBasicBlock *DispatchBB, int FI) const { 6658 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); 6659 DebugLoc dl = MI->getDebugLoc(); 6660 MachineFunction *MF = MBB->getParent(); 6661 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6662 MachineConstantPool *MCP = MF->getConstantPool(); 6663 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6664 const Function *F = MF->getFunction(); 6665 6666 bool isThumb = Subtarget->isThumb(); 6667 bool isThumb2 = Subtarget->isThumb2(); 6668 6669 unsigned PCLabelId = AFI->createPICLabelUId(); 6670 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 6671 ARMConstantPoolValue *CPV = 6672 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 6673 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 6674 6675 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass 6676 : &ARM::GPRRegClass; 6677 6678 // Grab constant pool and fixed stack memory operands. 6679 MachineMemOperand *CPMMO = 6680 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 6681 MachineMemOperand::MOLoad, 4, 4); 6682 6683 MachineMemOperand *FIMMOSt = 6684 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6685 MachineMemOperand::MOStore, 4, 4); 6686 6687 // Load the address of the dispatch MBB into the jump buffer. 6688 if (isThumb2) { 6689 // Incoming value: jbuf 6690 // ldr.n r5, LCPI1_1 6691 // orr r5, r5, #1 6692 // add r5, pc 6693 // str r5, [$jbuf, #+4] ; &jbuf[1] 6694 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6695 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 6696 .addConstantPoolIndex(CPI) 6697 .addMemOperand(CPMMO)); 6698 // Set the low bit because of thumb mode. 6699 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6700 AddDefaultCC( 6701 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 6702 .addReg(NewVReg1, RegState::Kill) 6703 .addImm(0x01))); 6704 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6705 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 6706 .addReg(NewVReg2, RegState::Kill) 6707 .addImm(PCLabelId); 6708 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 6709 .addReg(NewVReg3, RegState::Kill) 6710 .addFrameIndex(FI) 6711 .addImm(36) // &jbuf[1] :: pc 6712 .addMemOperand(FIMMOSt)); 6713 } else if (isThumb) { 6714 // Incoming value: jbuf 6715 // ldr.n r1, LCPI1_4 6716 // add r1, pc 6717 // mov r2, #1 6718 // orrs r1, r2 6719 // add r2, $jbuf, #+4 ; &jbuf[1] 6720 // str r1, [r2] 6721 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6722 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 6723 .addConstantPoolIndex(CPI) 6724 .addMemOperand(CPMMO)); 6725 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6726 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 6727 .addReg(NewVReg1, RegState::Kill) 6728 .addImm(PCLabelId); 6729 // Set the low bit because of thumb mode. 6730 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6731 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 6732 .addReg(ARM::CPSR, RegState::Define) 6733 .addImm(1)); 6734 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6735 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 6736 .addReg(ARM::CPSR, RegState::Define) 6737 .addReg(NewVReg2, RegState::Kill) 6738 .addReg(NewVReg3, RegState::Kill)); 6739 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6740 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) 6741 .addFrameIndex(FI) 6742 .addImm(36); // &jbuf[1] :: pc 6743 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 6744 .addReg(NewVReg4, RegState::Kill) 6745 .addReg(NewVReg5, RegState::Kill) 6746 .addImm(0) 6747 .addMemOperand(FIMMOSt)); 6748 } else { 6749 // Incoming value: jbuf 6750 // ldr r1, LCPI1_1 6751 // add r1, pc, r1 6752 // str r1, [$jbuf, #+4] ; &jbuf[1] 6753 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6754 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 6755 .addConstantPoolIndex(CPI) 6756 .addImm(0) 6757 .addMemOperand(CPMMO)); 6758 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6759 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 6760 .addReg(NewVReg1, RegState::Kill) 6761 .addImm(PCLabelId)); 6762 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 6763 .addReg(NewVReg2, RegState::Kill) 6764 .addFrameIndex(FI) 6765 .addImm(36) // &jbuf[1] :: pc 6766 .addMemOperand(FIMMOSt)); 6767 } 6768 } 6769 6770 void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, 6771 MachineBasicBlock *MBB) const { 6772 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); 6773 DebugLoc dl = MI->getDebugLoc(); 6774 MachineFunction *MF = MBB->getParent(); 6775 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6776 MachineFrameInfo *MFI = MF->getFrameInfo(); 6777 int FI = MFI->getFunctionContextIndex(); 6778 6779 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass 6780 : &ARM::GPRnopcRegClass; 6781 6782 // Get a mapping of the call site numbers to all of the landing pads they're 6783 // associated with. 6784 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 6785 unsigned MaxCSNum = 0; 6786 MachineModuleInfo &MMI = MF->getMMI(); 6787 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; 6788 ++BB) { 6789 if (!BB->isLandingPad()) continue; 6790 6791 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 6792 // pad. 6793 for (MachineBasicBlock::iterator 6794 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 6795 if (!II->isEHLabel()) continue; 6796 6797 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 6798 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 6799 6800 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 6801 for (SmallVectorImpl<unsigned>::iterator 6802 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 6803 CSI != CSE; ++CSI) { 6804 CallSiteNumToLPad[*CSI].push_back(BB); 6805 MaxCSNum = std::max(MaxCSNum, *CSI); 6806 } 6807 break; 6808 } 6809 } 6810 6811 // Get an ordered list of the machine basic blocks for the jump table. 6812 std::vector<MachineBasicBlock*> LPadList; 6813 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 6814 LPadList.reserve(CallSiteNumToLPad.size()); 6815 for (unsigned I = 1; I <= MaxCSNum; ++I) { 6816 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 6817 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6818 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 6819 LPadList.push_back(*II); 6820 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 6821 } 6822 } 6823 6824 assert(!LPadList.empty() && 6825 "No landing pad destinations for the dispatch jump table!"); 6826 6827 // Create the jump table and associated information. 6828 MachineJumpTableInfo *JTI = 6829 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 6830 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 6831 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 6832 6833 // Create the MBBs for the dispatch code. 6834 6835 // Shove the dispatch's address into the return slot in the function context. 6836 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 6837 DispatchBB->setIsLandingPad(); 6838 6839 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 6840 unsigned trap_opcode; 6841 if (Subtarget->isThumb()) 6842 trap_opcode = ARM::tTRAP; 6843 else 6844 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; 6845 6846 BuildMI(TrapBB, dl, TII->get(trap_opcode)); 6847 DispatchBB->addSuccessor(TrapBB); 6848 6849 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 6850 DispatchBB->addSuccessor(DispContBB); 6851 6852 // Insert and MBBs. 6853 MF->insert(MF->end(), DispatchBB); 6854 MF->insert(MF->end(), DispContBB); 6855 MF->insert(MF->end(), TrapBB); 6856 6857 // Insert code into the entry block that creates and registers the function 6858 // context. 6859 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 6860 6861 MachineMemOperand *FIMMOLd = 6862 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6863 MachineMemOperand::MOLoad | 6864 MachineMemOperand::MOVolatile, 4, 4); 6865 6866 MachineInstrBuilder MIB; 6867 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 6868 6869 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6870 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6871 6872 // Add a register mask with no preserved registers. This results in all 6873 // registers being marked as clobbered. 6874 MIB.addRegMask(RI.getNoPreservedMask()); 6875 6876 unsigned NumLPads = LPadList.size(); 6877 if (Subtarget->isThumb2()) { 6878 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6879 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 6880 .addFrameIndex(FI) 6881 .addImm(4) 6882 .addMemOperand(FIMMOLd)); 6883 6884 if (NumLPads < 256) { 6885 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 6886 .addReg(NewVReg1) 6887 .addImm(LPadList.size())); 6888 } else { 6889 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6890 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 6891 .addImm(NumLPads & 0xFFFF)); 6892 6893 unsigned VReg2 = VReg1; 6894 if ((NumLPads & 0xFFFF0000) != 0) { 6895 VReg2 = MRI->createVirtualRegister(TRC); 6896 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 6897 .addReg(VReg1) 6898 .addImm(NumLPads >> 16)); 6899 } 6900 6901 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 6902 .addReg(NewVReg1) 6903 .addReg(VReg2)); 6904 } 6905 6906 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 6907 .addMBB(TrapBB) 6908 .addImm(ARMCC::HI) 6909 .addReg(ARM::CPSR); 6910 6911 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6912 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 6913 .addJumpTableIndex(MJTI)); 6914 6915 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6916 AddDefaultCC( 6917 AddDefaultPred( 6918 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 6919 .addReg(NewVReg3, RegState::Kill) 6920 .addReg(NewVReg1) 6921 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6922 6923 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 6924 .addReg(NewVReg4, RegState::Kill) 6925 .addReg(NewVReg1) 6926 .addJumpTableIndex(MJTI); 6927 } else if (Subtarget->isThumb()) { 6928 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6929 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 6930 .addFrameIndex(FI) 6931 .addImm(1) 6932 .addMemOperand(FIMMOLd)); 6933 6934 if (NumLPads < 256) { 6935 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 6936 .addReg(NewVReg1) 6937 .addImm(NumLPads)); 6938 } else { 6939 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6940 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6941 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6942 6943 // MachineConstantPool wants an explicit alignment. 6944 unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); 6945 if (Align == 0) 6946 Align = MF->getDataLayout().getTypeAllocSize(C->getType()); 6947 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6948 6949 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6950 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 6951 .addReg(VReg1, RegState::Define) 6952 .addConstantPoolIndex(Idx)); 6953 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 6954 .addReg(NewVReg1) 6955 .addReg(VReg1)); 6956 } 6957 6958 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 6959 .addMBB(TrapBB) 6960 .addImm(ARMCC::HI) 6961 .addReg(ARM::CPSR); 6962 6963 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6964 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 6965 .addReg(ARM::CPSR, RegState::Define) 6966 .addReg(NewVReg1) 6967 .addImm(2)); 6968 6969 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6970 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 6971 .addJumpTableIndex(MJTI)); 6972 6973 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6974 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 6975 .addReg(ARM::CPSR, RegState::Define) 6976 .addReg(NewVReg2, RegState::Kill) 6977 .addReg(NewVReg3)); 6978 6979 MachineMemOperand *JTMMOLd = 6980 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6981 MachineMemOperand::MOLoad, 4, 4); 6982 6983 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6984 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 6985 .addReg(NewVReg4, RegState::Kill) 6986 .addImm(0) 6987 .addMemOperand(JTMMOLd)); 6988 6989 unsigned NewVReg6 = NewVReg5; 6990 if (RelocM == Reloc::PIC_) { 6991 NewVReg6 = MRI->createVirtualRegister(TRC); 6992 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 6993 .addReg(ARM::CPSR, RegState::Define) 6994 .addReg(NewVReg5, RegState::Kill) 6995 .addReg(NewVReg3)); 6996 } 6997 6998 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 6999 .addReg(NewVReg6, RegState::Kill) 7000 .addJumpTableIndex(MJTI); 7001 } else { 7002 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 7003 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 7004 .addFrameIndex(FI) 7005 .addImm(4) 7006 .addMemOperand(FIMMOLd)); 7007 7008 if (NumLPads < 256) { 7009 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 7010 .addReg(NewVReg1) 7011 .addImm(NumLPads)); 7012 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 7013 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7014 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 7015 .addImm(NumLPads & 0xFFFF)); 7016 7017 unsigned VReg2 = VReg1; 7018 if ((NumLPads & 0xFFFF0000) != 0) { 7019 VReg2 = MRI->createVirtualRegister(TRC); 7020 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 7021 .addReg(VReg1) 7022 .addImm(NumLPads >> 16)); 7023 } 7024 7025 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 7026 .addReg(NewVReg1) 7027 .addReg(VReg2)); 7028 } else { 7029 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7030 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7031 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 7032 7033 // MachineConstantPool wants an explicit alignment. 7034 unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); 7035 if (Align == 0) 7036 Align = MF->getDataLayout().getTypeAllocSize(C->getType()); 7037 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7038 7039 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7040 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 7041 .addReg(VReg1, RegState::Define) 7042 .addConstantPoolIndex(Idx) 7043 .addImm(0)); 7044 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 7045 .addReg(NewVReg1) 7046 .addReg(VReg1, RegState::Kill)); 7047 } 7048 7049 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 7050 .addMBB(TrapBB) 7051 .addImm(ARMCC::HI) 7052 .addReg(ARM::CPSR); 7053 7054 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 7055 AddDefaultCC( 7056 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 7057 .addReg(NewVReg1) 7058 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 7059 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 7060 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 7061 .addJumpTableIndex(MJTI)); 7062 7063 MachineMemOperand *JTMMOLd = 7064 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 7065 MachineMemOperand::MOLoad, 4, 4); 7066 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 7067 AddDefaultPred( 7068 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 7069 .addReg(NewVReg3, RegState::Kill) 7070 .addReg(NewVReg4) 7071 .addImm(0) 7072 .addMemOperand(JTMMOLd)); 7073 7074 if (RelocM == Reloc::PIC_) { 7075 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 7076 .addReg(NewVReg5, RegState::Kill) 7077 .addReg(NewVReg4) 7078 .addJumpTableIndex(MJTI); 7079 } else { 7080 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) 7081 .addReg(NewVReg5, RegState::Kill) 7082 .addJumpTableIndex(MJTI); 7083 } 7084 } 7085 7086 // Add the jump table entries as successors to the MBB. 7087 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; 7088 for (std::vector<MachineBasicBlock*>::iterator 7089 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 7090 MachineBasicBlock *CurMBB = *I; 7091 if (SeenMBBs.insert(CurMBB).second) 7092 DispContBB->addSuccessor(CurMBB); 7093 } 7094 7095 // N.B. the order the invoke BBs are processed in doesn't matter here. 7096 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); 7097 SmallVector<MachineBasicBlock*, 64> MBBLPads; 7098 for (MachineBasicBlock *BB : InvokeBBs) { 7099 7100 // Remove the landing pad successor from the invoke block and replace it 7101 // with the new dispatch block. 7102 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 7103 BB->succ_end()); 7104 while (!Successors.empty()) { 7105 MachineBasicBlock *SMBB = Successors.pop_back_val(); 7106 if (SMBB->isLandingPad()) { 7107 BB->removeSuccessor(SMBB); 7108 MBBLPads.push_back(SMBB); 7109 } 7110 } 7111 7112 BB->addSuccessor(DispatchBB); 7113 7114 // Find the invoke call and mark all of the callee-saved registers as 7115 // 'implicit defined' so that they're spilled. This prevents code from 7116 // moving instructions to before the EH block, where they will never be 7117 // executed. 7118 for (MachineBasicBlock::reverse_iterator 7119 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 7120 if (!II->isCall()) continue; 7121 7122 DenseMap<unsigned, bool> DefRegs; 7123 for (MachineInstr::mop_iterator 7124 OI = II->operands_begin(), OE = II->operands_end(); 7125 OI != OE; ++OI) { 7126 if (!OI->isReg()) continue; 7127 DefRegs[OI->getReg()] = true; 7128 } 7129 7130 MachineInstrBuilder MIB(*MF, &*II); 7131 7132 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 7133 unsigned Reg = SavedRegs[i]; 7134 if (Subtarget->isThumb2() && 7135 !ARM::tGPRRegClass.contains(Reg) && 7136 !ARM::hGPRRegClass.contains(Reg)) 7137 continue; 7138 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) 7139 continue; 7140 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) 7141 continue; 7142 if (!DefRegs[Reg]) 7143 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 7144 } 7145 7146 break; 7147 } 7148 } 7149 7150 // Mark all former landing pads as non-landing pads. The dispatch is the only 7151 // landing pad now. 7152 for (SmallVectorImpl<MachineBasicBlock*>::iterator 7153 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 7154 (*I)->setIsLandingPad(false); 7155 7156 // The instruction is gone now. 7157 MI->eraseFromParent(); 7158 } 7159 7160 static 7161 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 7162 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 7163 E = MBB->succ_end(); I != E; ++I) 7164 if (*I != Succ) 7165 return *I; 7166 llvm_unreachable("Expecting a BB with two successors!"); 7167 } 7168 7169 /// Return the load opcode for a given load size. If load size >= 8, 7170 /// neon opcode will be returned. 7171 static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { 7172 if (LdSize >= 8) 7173 return LdSize == 16 ? ARM::VLD1q32wb_fixed 7174 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; 7175 if (IsThumb1) 7176 return LdSize == 4 ? ARM::tLDRi 7177 : LdSize == 2 ? ARM::tLDRHi 7178 : LdSize == 1 ? ARM::tLDRBi : 0; 7179 if (IsThumb2) 7180 return LdSize == 4 ? ARM::t2LDR_POST 7181 : LdSize == 2 ? ARM::t2LDRH_POST 7182 : LdSize == 1 ? ARM::t2LDRB_POST : 0; 7183 return LdSize == 4 ? ARM::LDR_POST_IMM 7184 : LdSize == 2 ? ARM::LDRH_POST 7185 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; 7186 } 7187 7188 /// Return the store opcode for a given store size. If store size >= 8, 7189 /// neon opcode will be returned. 7190 static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { 7191 if (StSize >= 8) 7192 return StSize == 16 ? ARM::VST1q32wb_fixed 7193 : StSize == 8 ? ARM::VST1d32wb_fixed : 0; 7194 if (IsThumb1) 7195 return StSize == 4 ? ARM::tSTRi 7196 : StSize == 2 ? ARM::tSTRHi 7197 : StSize == 1 ? ARM::tSTRBi : 0; 7198 if (IsThumb2) 7199 return StSize == 4 ? ARM::t2STR_POST 7200 : StSize == 2 ? ARM::t2STRH_POST 7201 : StSize == 1 ? ARM::t2STRB_POST : 0; 7202 return StSize == 4 ? ARM::STR_POST_IMM 7203 : StSize == 2 ? ARM::STRH_POST 7204 : StSize == 1 ? ARM::STRB_POST_IMM : 0; 7205 } 7206 7207 /// Emit a post-increment load operation with given size. The instructions 7208 /// will be added to BB at Pos. 7209 static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, 7210 const TargetInstrInfo *TII, DebugLoc dl, 7211 unsigned LdSize, unsigned Data, unsigned AddrIn, 7212 unsigned AddrOut, bool IsThumb1, bool IsThumb2) { 7213 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); 7214 assert(LdOpc != 0 && "Should have a load opcode"); 7215 if (LdSize >= 8) { 7216 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7217 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 7218 .addImm(0)); 7219 } else if (IsThumb1) { 7220 // load + update AddrIn 7221 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7222 .addReg(AddrIn).addImm(0)); 7223 MachineInstrBuilder MIB = 7224 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); 7225 MIB = AddDefaultT1CC(MIB); 7226 MIB.addReg(AddrIn).addImm(LdSize); 7227 AddDefaultPred(MIB); 7228 } else if (IsThumb2) { 7229 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7230 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 7231 .addImm(LdSize)); 7232 } else { // arm 7233 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) 7234 .addReg(AddrOut, RegState::Define).addReg(AddrIn) 7235 .addReg(0).addImm(LdSize)); 7236 } 7237 } 7238 7239 /// Emit a post-increment store operation with given size. The instructions 7240 /// will be added to BB at Pos. 7241 static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, 7242 const TargetInstrInfo *TII, DebugLoc dl, 7243 unsigned StSize, unsigned Data, unsigned AddrIn, 7244 unsigned AddrOut, bool IsThumb1, bool IsThumb2) { 7245 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); 7246 assert(StOpc != 0 && "Should have a store opcode"); 7247 if (StSize >= 8) { 7248 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 7249 .addReg(AddrIn).addImm(0).addReg(Data)); 7250 } else if (IsThumb1) { 7251 // store + update AddrIn 7252 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) 7253 .addReg(AddrIn).addImm(0)); 7254 MachineInstrBuilder MIB = 7255 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); 7256 MIB = AddDefaultT1CC(MIB); 7257 MIB.addReg(AddrIn).addImm(StSize); 7258 AddDefaultPred(MIB); 7259 } else if (IsThumb2) { 7260 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 7261 .addReg(Data).addReg(AddrIn).addImm(StSize)); 7262 } else { // arm 7263 AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) 7264 .addReg(Data).addReg(AddrIn).addReg(0) 7265 .addImm(StSize)); 7266 } 7267 } 7268 7269 MachineBasicBlock * 7270 ARMTargetLowering::EmitStructByval(MachineInstr *MI, 7271 MachineBasicBlock *BB) const { 7272 // This pseudo instruction has 3 operands: dst, src, size 7273 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). 7274 // Otherwise, we will generate unrolled scalar copies. 7275 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); 7276 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7277 MachineFunction::iterator It = BB; 7278 ++It; 7279 7280 unsigned dest = MI->getOperand(0).getReg(); 7281 unsigned src = MI->getOperand(1).getReg(); 7282 unsigned SizeVal = MI->getOperand(2).getImm(); 7283 unsigned Align = MI->getOperand(3).getImm(); 7284 DebugLoc dl = MI->getDebugLoc(); 7285 7286 MachineFunction *MF = BB->getParent(); 7287 MachineRegisterInfo &MRI = MF->getRegInfo(); 7288 unsigned UnitSize = 0; 7289 const TargetRegisterClass *TRC = nullptr; 7290 const TargetRegisterClass *VecTRC = nullptr; 7291 7292 bool IsThumb1 = Subtarget->isThumb1Only(); 7293 bool IsThumb2 = Subtarget->isThumb2(); 7294 7295 if (Align & 1) { 7296 UnitSize = 1; 7297 } else if (Align & 2) { 7298 UnitSize = 2; 7299 } else { 7300 // Check whether we can use NEON instructions. 7301 if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && 7302 Subtarget->hasNEON()) { 7303 if ((Align % 16 == 0) && SizeVal >= 16) 7304 UnitSize = 16; 7305 else if ((Align % 8 == 0) && SizeVal >= 8) 7306 UnitSize = 8; 7307 } 7308 // Can't use NEON instructions. 7309 if (UnitSize == 0) 7310 UnitSize = 4; 7311 } 7312 7313 // Select the correct opcode and register class for unit size load/store 7314 bool IsNeon = UnitSize >= 8; 7315 TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass; 7316 if (IsNeon) 7317 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass 7318 : UnitSize == 8 ? &ARM::DPRRegClass 7319 : nullptr; 7320 7321 unsigned BytesLeft = SizeVal % UnitSize; 7322 unsigned LoopSize = SizeVal - BytesLeft; 7323 7324 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { 7325 // Use LDR and STR to copy. 7326 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) 7327 // [destOut] = STR_POST(scratch, destIn, UnitSize) 7328 unsigned srcIn = src; 7329 unsigned destIn = dest; 7330 for (unsigned i = 0; i < LoopSize; i+=UnitSize) { 7331 unsigned srcOut = MRI.createVirtualRegister(TRC); 7332 unsigned destOut = MRI.createVirtualRegister(TRC); 7333 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 7334 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, 7335 IsThumb1, IsThumb2); 7336 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, 7337 IsThumb1, IsThumb2); 7338 srcIn = srcOut; 7339 destIn = destOut; 7340 } 7341 7342 // Handle the leftover bytes with LDRB and STRB. 7343 // [scratch, srcOut] = LDRB_POST(srcIn, 1) 7344 // [destOut] = STRB_POST(scratch, destIn, 1) 7345 for (unsigned i = 0; i < BytesLeft; i++) { 7346 unsigned srcOut = MRI.createVirtualRegister(TRC); 7347 unsigned destOut = MRI.createVirtualRegister(TRC); 7348 unsigned scratch = MRI.createVirtualRegister(TRC); 7349 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, 7350 IsThumb1, IsThumb2); 7351 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, 7352 IsThumb1, IsThumb2); 7353 srcIn = srcOut; 7354 destIn = destOut; 7355 } 7356 MI->eraseFromParent(); // The instruction is gone now. 7357 return BB; 7358 } 7359 7360 // Expand the pseudo op to a loop. 7361 // thisMBB: 7362 // ... 7363 // movw varEnd, # --> with thumb2 7364 // movt varEnd, # 7365 // ldrcp varEnd, idx --> without thumb2 7366 // fallthrough --> loopMBB 7367 // loopMBB: 7368 // PHI varPhi, varEnd, varLoop 7369 // PHI srcPhi, src, srcLoop 7370 // PHI destPhi, dst, destLoop 7371 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7372 // [destLoop] = STR_POST(scratch, destPhi, UnitSize) 7373 // subs varLoop, varPhi, #UnitSize 7374 // bne loopMBB 7375 // fallthrough --> exitMBB 7376 // exitMBB: 7377 // epilogue to handle left-over bytes 7378 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7379 // [destOut] = STRB_POST(scratch, destLoop, 1) 7380 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7381 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7382 MF->insert(It, loopMBB); 7383 MF->insert(It, exitMBB); 7384 7385 // Transfer the remainder of BB and its successor edges to exitMBB. 7386 exitMBB->splice(exitMBB->begin(), BB, 7387 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7388 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7389 7390 // Load an immediate to varEnd. 7391 unsigned varEnd = MRI.createVirtualRegister(TRC); 7392 if (Subtarget->useMovt(*MF)) { 7393 unsigned Vtmp = varEnd; 7394 if ((LoopSize & 0xFFFF0000) != 0) 7395 Vtmp = MRI.createVirtualRegister(TRC); 7396 AddDefaultPred(BuildMI(BB, dl, 7397 TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16), 7398 Vtmp).addImm(LoopSize & 0xFFFF)); 7399 7400 if ((LoopSize & 0xFFFF0000) != 0) 7401 AddDefaultPred(BuildMI(BB, dl, 7402 TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16), 7403 varEnd) 7404 .addReg(Vtmp) 7405 .addImm(LoopSize >> 16)); 7406 } else { 7407 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7408 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7409 const Constant *C = ConstantInt::get(Int32Ty, LoopSize); 7410 7411 // MachineConstantPool wants an explicit alignment. 7412 unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); 7413 if (Align == 0) 7414 Align = MF->getDataLayout().getTypeAllocSize(C->getType()); 7415 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7416 7417 if (IsThumb1) 7418 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( 7419 varEnd, RegState::Define).addConstantPoolIndex(Idx)); 7420 else 7421 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( 7422 varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); 7423 } 7424 BB->addSuccessor(loopMBB); 7425 7426 // Generate the loop body: 7427 // varPhi = PHI(varLoop, varEnd) 7428 // srcPhi = PHI(srcLoop, src) 7429 // destPhi = PHI(destLoop, dst) 7430 MachineBasicBlock *entryBB = BB; 7431 BB = loopMBB; 7432 unsigned varLoop = MRI.createVirtualRegister(TRC); 7433 unsigned varPhi = MRI.createVirtualRegister(TRC); 7434 unsigned srcLoop = MRI.createVirtualRegister(TRC); 7435 unsigned srcPhi = MRI.createVirtualRegister(TRC); 7436 unsigned destLoop = MRI.createVirtualRegister(TRC); 7437 unsigned destPhi = MRI.createVirtualRegister(TRC); 7438 7439 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) 7440 .addReg(varLoop).addMBB(loopMBB) 7441 .addReg(varEnd).addMBB(entryBB); 7442 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) 7443 .addReg(srcLoop).addMBB(loopMBB) 7444 .addReg(src).addMBB(entryBB); 7445 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) 7446 .addReg(destLoop).addMBB(loopMBB) 7447 .addReg(dest).addMBB(entryBB); 7448 7449 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7450 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) 7451 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 7452 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, 7453 IsThumb1, IsThumb2); 7454 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, 7455 IsThumb1, IsThumb2); 7456 7457 // Decrement loop variable by UnitSize. 7458 if (IsThumb1) { 7459 MachineInstrBuilder MIB = 7460 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); 7461 MIB = AddDefaultT1CC(MIB); 7462 MIB.addReg(varPhi).addImm(UnitSize); 7463 AddDefaultPred(MIB); 7464 } else { 7465 MachineInstrBuilder MIB = 7466 BuildMI(*BB, BB->end(), dl, 7467 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); 7468 AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); 7469 MIB->getOperand(5).setReg(ARM::CPSR); 7470 MIB->getOperand(5).setIsDef(true); 7471 } 7472 BuildMI(*BB, BB->end(), dl, 7473 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7474 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 7475 7476 // loopMBB can loop back to loopMBB or fall through to exitMBB. 7477 BB->addSuccessor(loopMBB); 7478 BB->addSuccessor(exitMBB); 7479 7480 // Add epilogue to handle BytesLeft. 7481 BB = exitMBB; 7482 MachineInstr *StartOfExit = exitMBB->begin(); 7483 7484 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7485 // [destOut] = STRB_POST(scratch, destLoop, 1) 7486 unsigned srcIn = srcLoop; 7487 unsigned destIn = destLoop; 7488 for (unsigned i = 0; i < BytesLeft; i++) { 7489 unsigned srcOut = MRI.createVirtualRegister(TRC); 7490 unsigned destOut = MRI.createVirtualRegister(TRC); 7491 unsigned scratch = MRI.createVirtualRegister(TRC); 7492 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, 7493 IsThumb1, IsThumb2); 7494 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, 7495 IsThumb1, IsThumb2); 7496 srcIn = srcOut; 7497 destIn = destOut; 7498 } 7499 7500 MI->eraseFromParent(); // The instruction is gone now. 7501 return BB; 7502 } 7503 7504 MachineBasicBlock * 7505 ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, 7506 MachineBasicBlock *MBB) const { 7507 const TargetMachine &TM = getTargetMachine(); 7508 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 7509 DebugLoc DL = MI->getDebugLoc(); 7510 7511 assert(Subtarget->isTargetWindows() && 7512 "__chkstk is only supported on Windows"); 7513 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); 7514 7515 // __chkstk takes the number of words to allocate on the stack in R4, and 7516 // returns the stack adjustment in number of bytes in R4. This will not 7517 // clober any other registers (other than the obvious lr). 7518 // 7519 // Although, technically, IP should be considered a register which may be 7520 // clobbered, the call itself will not touch it. Windows on ARM is a pure 7521 // thumb-2 environment, so there is no interworking required. As a result, we 7522 // do not expect a veneer to be emitted by the linker, clobbering IP. 7523 // 7524 // Each module receives its own copy of __chkstk, so no import thunk is 7525 // required, again, ensuring that IP is not clobbered. 7526 // 7527 // Finally, although some linkers may theoretically provide a trampoline for 7528 // out of range calls (which is quite common due to a 32M range limitation of 7529 // branches for Thumb), we can generate the long-call version via 7530 // -mcmodel=large, alleviating the need for the trampoline which may clobber 7531 // IP. 7532 7533 switch (TM.getCodeModel()) { 7534 case CodeModel::Small: 7535 case CodeModel::Medium: 7536 case CodeModel::Default: 7537 case CodeModel::Kernel: 7538 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) 7539 .addImm((unsigned)ARMCC::AL).addReg(0) 7540 .addExternalSymbol("__chkstk") 7541 .addReg(ARM::R4, RegState::Implicit | RegState::Kill) 7542 .addReg(ARM::R4, RegState::Implicit | RegState::Define) 7543 .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); 7544 break; 7545 case CodeModel::Large: 7546 case CodeModel::JITDefault: { 7547 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 7548 unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); 7549 7550 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) 7551 .addExternalSymbol("__chkstk"); 7552 BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) 7553 .addImm((unsigned)ARMCC::AL).addReg(0) 7554 .addReg(Reg, RegState::Kill) 7555 .addReg(ARM::R4, RegState::Implicit | RegState::Kill) 7556 .addReg(ARM::R4, RegState::Implicit | RegState::Define) 7557 .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); 7558 break; 7559 } 7560 } 7561 7562 AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), 7563 ARM::SP) 7564 .addReg(ARM::SP).addReg(ARM::R4))); 7565 7566 MI->eraseFromParent(); 7567 return MBB; 7568 } 7569 7570 MachineBasicBlock * 7571 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7572 MachineBasicBlock *BB) const { 7573 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); 7574 DebugLoc dl = MI->getDebugLoc(); 7575 bool isThumb2 = Subtarget->isThumb2(); 7576 switch (MI->getOpcode()) { 7577 default: { 7578 MI->dump(); 7579 llvm_unreachable("Unexpected instr type to insert"); 7580 } 7581 // The Thumb2 pre-indexed stores have the same MI operands, they just 7582 // define them differently in the .td files from the isel patterns, so 7583 // they need pseudos. 7584 case ARM::t2STR_preidx: 7585 MI->setDesc(TII->get(ARM::t2STR_PRE)); 7586 return BB; 7587 case ARM::t2STRB_preidx: 7588 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 7589 return BB; 7590 case ARM::t2STRH_preidx: 7591 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 7592 return BB; 7593 7594 case ARM::STRi_preidx: 7595 case ARM::STRBi_preidx: { 7596 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 7597 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 7598 // Decode the offset. 7599 unsigned Offset = MI->getOperand(4).getImm(); 7600 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 7601 Offset = ARM_AM::getAM2Offset(Offset); 7602 if (isSub) 7603 Offset = -Offset; 7604 7605 MachineMemOperand *MMO = *MI->memoperands_begin(); 7606 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 7607 .addOperand(MI->getOperand(0)) // Rn_wb 7608 .addOperand(MI->getOperand(1)) // Rt 7609 .addOperand(MI->getOperand(2)) // Rn 7610 .addImm(Offset) // offset (skip GPR==zero_reg) 7611 .addOperand(MI->getOperand(5)) // pred 7612 .addOperand(MI->getOperand(6)) 7613 .addMemOperand(MMO); 7614 MI->eraseFromParent(); 7615 return BB; 7616 } 7617 case ARM::STRr_preidx: 7618 case ARM::STRBr_preidx: 7619 case ARM::STRH_preidx: { 7620 unsigned NewOpc; 7621 switch (MI->getOpcode()) { 7622 default: llvm_unreachable("unexpected opcode!"); 7623 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 7624 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 7625 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 7626 } 7627 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 7628 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 7629 MIB.addOperand(MI->getOperand(i)); 7630 MI->eraseFromParent(); 7631 return BB; 7632 } 7633 7634 case ARM::tMOVCCr_pseudo: { 7635 // To "insert" a SELECT_CC instruction, we actually have to insert the 7636 // diamond control-flow pattern. The incoming instruction knows the 7637 // destination vreg to set, the condition code register to branch on, the 7638 // true/false values to select between, and a branch opcode to use. 7639 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7640 MachineFunction::iterator It = BB; 7641 ++It; 7642 7643 // thisMBB: 7644 // ... 7645 // TrueVal = ... 7646 // cmpTY ccX, r1, r2 7647 // bCC copy1MBB 7648 // fallthrough --> copy0MBB 7649 MachineBasicBlock *thisMBB = BB; 7650 MachineFunction *F = BB->getParent(); 7651 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7652 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7653 F->insert(It, copy0MBB); 7654 F->insert(It, sinkMBB); 7655 7656 // Transfer the remainder of BB and its successor edges to sinkMBB. 7657 sinkMBB->splice(sinkMBB->begin(), BB, 7658 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7659 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7660 7661 BB->addSuccessor(copy0MBB); 7662 BB->addSuccessor(sinkMBB); 7663 7664 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 7665 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 7666 7667 // copy0MBB: 7668 // %FalseValue = ... 7669 // # fallthrough to sinkMBB 7670 BB = copy0MBB; 7671 7672 // Update machine-CFG edges 7673 BB->addSuccessor(sinkMBB); 7674 7675 // sinkMBB: 7676 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7677 // ... 7678 BB = sinkMBB; 7679 BuildMI(*BB, BB->begin(), dl, 7680 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 7681 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 7682 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7683 7684 MI->eraseFromParent(); // The pseudo instruction is gone now. 7685 return BB; 7686 } 7687 7688 case ARM::BCCi64: 7689 case ARM::BCCZi64: { 7690 // If there is an unconditional branch to the other successor, remove it. 7691 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7692 7693 // Compare both parts that make up the double comparison separately for 7694 // equality. 7695 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 7696 7697 unsigned LHS1 = MI->getOperand(1).getReg(); 7698 unsigned LHS2 = MI->getOperand(2).getReg(); 7699 if (RHSisZero) { 7700 AddDefaultPred(BuildMI(BB, dl, 7701 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7702 .addReg(LHS1).addImm(0)); 7703 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7704 .addReg(LHS2).addImm(0) 7705 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7706 } else { 7707 unsigned RHS1 = MI->getOperand(3).getReg(); 7708 unsigned RHS2 = MI->getOperand(4).getReg(); 7709 AddDefaultPred(BuildMI(BB, dl, 7710 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7711 .addReg(LHS1).addReg(RHS1)); 7712 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7713 .addReg(LHS2).addReg(RHS2) 7714 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7715 } 7716 7717 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 7718 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 7719 if (MI->getOperand(0).getImm() == ARMCC::NE) 7720 std::swap(destMBB, exitMBB); 7721 7722 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7723 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 7724 if (isThumb2) 7725 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 7726 else 7727 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 7728 7729 MI->eraseFromParent(); // The pseudo instruction is gone now. 7730 return BB; 7731 } 7732 7733 case ARM::Int_eh_sjlj_setjmp: 7734 case ARM::Int_eh_sjlj_setjmp_nofp: 7735 case ARM::tInt_eh_sjlj_setjmp: 7736 case ARM::t2Int_eh_sjlj_setjmp: 7737 case ARM::t2Int_eh_sjlj_setjmp_nofp: 7738 EmitSjLjDispatchBlock(MI, BB); 7739 return BB; 7740 7741 case ARM::ABS: 7742 case ARM::t2ABS: { 7743 // To insert an ABS instruction, we have to insert the 7744 // diamond control-flow pattern. The incoming instruction knows the 7745 // source vreg to test against 0, the destination vreg to set, 7746 // the condition code register to branch on, the 7747 // true/false values to select between, and a branch opcode to use. 7748 // It transforms 7749 // V1 = ABS V0 7750 // into 7751 // V2 = MOVS V0 7752 // BCC (branch to SinkBB if V0 >= 0) 7753 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 7754 // SinkBB: V1 = PHI(V2, V3) 7755 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7756 MachineFunction::iterator BBI = BB; 7757 ++BBI; 7758 MachineFunction *Fn = BB->getParent(); 7759 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7760 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7761 Fn->insert(BBI, RSBBB); 7762 Fn->insert(BBI, SinkBB); 7763 7764 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 7765 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 7766 bool ABSSrcKIll = MI->getOperand(1).isKill(); 7767 bool isThumb2 = Subtarget->isThumb2(); 7768 MachineRegisterInfo &MRI = Fn->getRegInfo(); 7769 // In Thumb mode S must not be specified if source register is the SP or 7770 // PC and if destination register is the SP, so restrict register class 7771 unsigned NewRsbDstReg = 7772 MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); 7773 7774 // Transfer the remainder of BB and its successor edges to sinkMBB. 7775 SinkBB->splice(SinkBB->begin(), BB, 7776 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7777 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 7778 7779 BB->addSuccessor(RSBBB); 7780 BB->addSuccessor(SinkBB); 7781 7782 // fall through to SinkMBB 7783 RSBBB->addSuccessor(SinkBB); 7784 7785 // insert a cmp at the end of BB 7786 AddDefaultPred(BuildMI(BB, dl, 7787 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7788 .addReg(ABSSrcReg).addImm(0)); 7789 7790 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 7791 BuildMI(BB, dl, 7792 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 7793 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 7794 7795 // insert rsbri in RSBBB 7796 // Note: BCC and rsbri will be converted into predicated rsbmi 7797 // by if-conversion pass 7798 BuildMI(*RSBBB, RSBBB->begin(), dl, 7799 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 7800 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) 7801 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 7802 7803 // insert PHI in SinkBB, 7804 // reuse ABSDstReg to not change uses of ABS instruction 7805 BuildMI(*SinkBB, SinkBB->begin(), dl, 7806 TII->get(ARM::PHI), ABSDstReg) 7807 .addReg(NewRsbDstReg).addMBB(RSBBB) 7808 .addReg(ABSSrcReg).addMBB(BB); 7809 7810 // remove ABS instruction 7811 MI->eraseFromParent(); 7812 7813 // return last added BB 7814 return SinkBB; 7815 } 7816 case ARM::COPY_STRUCT_BYVAL_I32: 7817 ++NumLoopByVals; 7818 return EmitStructByval(MI, BB); 7819 case ARM::WIN__CHKSTK: 7820 return EmitLowered__chkstk(MI, BB); 7821 } 7822 } 7823 7824 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 7825 SDNode *Node) const { 7826 const MCInstrDesc *MCID = &MI->getDesc(); 7827 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 7828 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 7829 // operand is still set to noreg. If needed, set the optional operand's 7830 // register to CPSR, and remove the redundant implicit def. 7831 // 7832 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 7833 7834 // Rename pseudo opcodes. 7835 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 7836 if (NewOpc) { 7837 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); 7838 MCID = &TII->get(NewOpc); 7839 7840 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 7841 "converted opcode should be the same except for cc_out"); 7842 7843 MI->setDesc(*MCID); 7844 7845 // Add the optional cc_out operand 7846 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 7847 } 7848 unsigned ccOutIdx = MCID->getNumOperands() - 1; 7849 7850 // Any ARM instruction that sets the 's' bit should specify an optional 7851 // "cc_out" operand in the last operand position. 7852 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 7853 assert(!NewOpc && "Optional cc_out operand required"); 7854 return; 7855 } 7856 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 7857 // since we already have an optional CPSR def. 7858 bool definesCPSR = false; 7859 bool deadCPSR = false; 7860 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 7861 i != e; ++i) { 7862 const MachineOperand &MO = MI->getOperand(i); 7863 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 7864 definesCPSR = true; 7865 if (MO.isDead()) 7866 deadCPSR = true; 7867 MI->RemoveOperand(i); 7868 break; 7869 } 7870 } 7871 if (!definesCPSR) { 7872 assert(!NewOpc && "Optional cc_out operand required"); 7873 return; 7874 } 7875 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 7876 if (deadCPSR) { 7877 assert(!MI->getOperand(ccOutIdx).getReg() && 7878 "expect uninitialized optional cc_out operand"); 7879 return; 7880 } 7881 7882 // If this instruction was defined with an optional CPSR def and its dag node 7883 // had a live implicit CPSR def, then activate the optional CPSR def. 7884 MachineOperand &MO = MI->getOperand(ccOutIdx); 7885 MO.setReg(ARM::CPSR); 7886 MO.setIsDef(true); 7887 } 7888 7889 //===----------------------------------------------------------------------===// 7890 // ARM Optimization Hooks 7891 //===----------------------------------------------------------------------===// 7892 7893 // Helper function that checks if N is a null or all ones constant. 7894 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { 7895 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); 7896 if (!C) 7897 return false; 7898 return AllOnes ? C->isAllOnesValue() : C->isNullValue(); 7899 } 7900 7901 // Return true if N is conditionally 0 or all ones. 7902 // Detects these expressions where cc is an i1 value: 7903 // 7904 // (select cc 0, y) [AllOnes=0] 7905 // (select cc y, 0) [AllOnes=0] 7906 // (zext cc) [AllOnes=0] 7907 // (sext cc) [AllOnes=0/1] 7908 // (select cc -1, y) [AllOnes=1] 7909 // (select cc y, -1) [AllOnes=1] 7910 // 7911 // Invert is set when N is the null/all ones constant when CC is false. 7912 // OtherOp is set to the alternative value of N. 7913 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, 7914 SDValue &CC, bool &Invert, 7915 SDValue &OtherOp, 7916 SelectionDAG &DAG) { 7917 switch (N->getOpcode()) { 7918 default: return false; 7919 case ISD::SELECT: { 7920 CC = N->getOperand(0); 7921 SDValue N1 = N->getOperand(1); 7922 SDValue N2 = N->getOperand(2); 7923 if (isZeroOrAllOnes(N1, AllOnes)) { 7924 Invert = false; 7925 OtherOp = N2; 7926 return true; 7927 } 7928 if (isZeroOrAllOnes(N2, AllOnes)) { 7929 Invert = true; 7930 OtherOp = N1; 7931 return true; 7932 } 7933 return false; 7934 } 7935 case ISD::ZERO_EXTEND: 7936 // (zext cc) can never be the all ones value. 7937 if (AllOnes) 7938 return false; 7939 // Fall through. 7940 case ISD::SIGN_EXTEND: { 7941 SDLoc dl(N); 7942 EVT VT = N->getValueType(0); 7943 CC = N->getOperand(0); 7944 if (CC.getValueType() != MVT::i1) 7945 return false; 7946 Invert = !AllOnes; 7947 if (AllOnes) 7948 // When looking for an AllOnes constant, N is an sext, and the 'other' 7949 // value is 0. 7950 OtherOp = DAG.getConstant(0, dl, VT); 7951 else if (N->getOpcode() == ISD::ZERO_EXTEND) 7952 // When looking for a 0 constant, N can be zext or sext. 7953 OtherOp = DAG.getConstant(1, dl, VT); 7954 else 7955 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, 7956 VT); 7957 return true; 7958 } 7959 } 7960 } 7961 7962 // Combine a constant select operand into its use: 7963 // 7964 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 7965 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 7966 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] 7967 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 7968 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 7969 // 7970 // The transform is rejected if the select doesn't have a constant operand that 7971 // is null, or all ones when AllOnes is set. 7972 // 7973 // Also recognize sext/zext from i1: 7974 // 7975 // (add (zext cc), x) -> (select cc (add x, 1), x) 7976 // (add (sext cc), x) -> (select cc (add x, -1), x) 7977 // 7978 // These transformations eventually create predicated instructions. 7979 // 7980 // @param N The node to transform. 7981 // @param Slct The N operand that is a select. 7982 // @param OtherOp The other N operand (x above). 7983 // @param DCI Context. 7984 // @param AllOnes Require the select constant to be all ones instead of null. 7985 // @returns The new node, or SDValue() on failure. 7986 static 7987 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 7988 TargetLowering::DAGCombinerInfo &DCI, 7989 bool AllOnes = false) { 7990 SelectionDAG &DAG = DCI.DAG; 7991 EVT VT = N->getValueType(0); 7992 SDValue NonConstantVal; 7993 SDValue CCOp; 7994 bool SwapSelectOps; 7995 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, 7996 NonConstantVal, DAG)) 7997 return SDValue(); 7998 7999 // Slct is now know to be the desired identity constant when CC is true. 8000 SDValue TrueVal = OtherOp; 8001 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 8002 OtherOp, NonConstantVal); 8003 // Unless SwapSelectOps says CC should be false. 8004 if (SwapSelectOps) 8005 std::swap(TrueVal, FalseVal); 8006 8007 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 8008 CCOp, TrueVal, FalseVal); 8009 } 8010 8011 // Attempt combineSelectAndUse on each operand of a commutative operator N. 8012 static 8013 SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, 8014 TargetLowering::DAGCombinerInfo &DCI) { 8015 SDValue N0 = N->getOperand(0); 8016 SDValue N1 = N->getOperand(1); 8017 if (N0.getNode()->hasOneUse()) { 8018 SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); 8019 if (Result.getNode()) 8020 return Result; 8021 } 8022 if (N1.getNode()->hasOneUse()) { 8023 SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); 8024 if (Result.getNode()) 8025 return Result; 8026 } 8027 return SDValue(); 8028 } 8029 8030 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 8031 // (only after legalization). 8032 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 8033 TargetLowering::DAGCombinerInfo &DCI, 8034 const ARMSubtarget *Subtarget) { 8035 8036 // Only perform optimization if after legalize, and if NEON is available. We 8037 // also expected both operands to be BUILD_VECTORs. 8038 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 8039 || N0.getOpcode() != ISD::BUILD_VECTOR 8040 || N1.getOpcode() != ISD::BUILD_VECTOR) 8041 return SDValue(); 8042 8043 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 8044 EVT VT = N->getValueType(0); 8045 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 8046 return SDValue(); 8047 8048 // Check that the vector operands are of the right form. 8049 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 8050 // operands, where N is the size of the formed vector. 8051 // Each EXTRACT_VECTOR should have the same input vector and odd or even 8052 // index such that we have a pair wise add pattern. 8053 8054 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 8055 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 8056 return SDValue(); 8057 SDValue Vec = N0->getOperand(0)->getOperand(0); 8058 SDNode *V = Vec.getNode(); 8059 unsigned nextIndex = 0; 8060 8061 // For each operands to the ADD which are BUILD_VECTORs, 8062 // check to see if each of their operands are an EXTRACT_VECTOR with 8063 // the same vector and appropriate index. 8064 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 8065 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 8066 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 8067 8068 SDValue ExtVec0 = N0->getOperand(i); 8069 SDValue ExtVec1 = N1->getOperand(i); 8070 8071 // First operand is the vector, verify its the same. 8072 if (V != ExtVec0->getOperand(0).getNode() || 8073 V != ExtVec1->getOperand(0).getNode()) 8074 return SDValue(); 8075 8076 // Second is the constant, verify its correct. 8077 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 8078 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 8079 8080 // For the constant, we want to see all the even or all the odd. 8081 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 8082 || C1->getZExtValue() != nextIndex+1) 8083 return SDValue(); 8084 8085 // Increment index. 8086 nextIndex+=2; 8087 } else 8088 return SDValue(); 8089 } 8090 8091 // Create VPADDL node. 8092 SelectionDAG &DAG = DCI.DAG; 8093 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8094 8095 SDLoc dl(N); 8096 8097 // Build operand list. 8098 SmallVector<SDValue, 8> Ops; 8099 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl, 8100 TLI.getPointerTy(DAG.getDataLayout()))); 8101 8102 // Input is the vector. 8103 Ops.push_back(Vec); 8104 8105 // Get widened type and narrowed type. 8106 MVT widenType; 8107 unsigned numElem = VT.getVectorNumElements(); 8108 8109 EVT inputLaneType = Vec.getValueType().getVectorElementType(); 8110 switch (inputLaneType.getSimpleVT().SimpleTy) { 8111 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 8112 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 8113 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 8114 default: 8115 llvm_unreachable("Invalid vector element type for padd optimization."); 8116 } 8117 8118 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops); 8119 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; 8120 return DAG.getNode(ExtOp, dl, VT, tmp); 8121 } 8122 8123 static SDValue findMUL_LOHI(SDValue V) { 8124 if (V->getOpcode() == ISD::UMUL_LOHI || 8125 V->getOpcode() == ISD::SMUL_LOHI) 8126 return V; 8127 return SDValue(); 8128 } 8129 8130 static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, 8131 TargetLowering::DAGCombinerInfo &DCI, 8132 const ARMSubtarget *Subtarget) { 8133 8134 if (Subtarget->isThumb1Only()) return SDValue(); 8135 8136 // Only perform the checks after legalize when the pattern is available. 8137 if (DCI.isBeforeLegalize()) return SDValue(); 8138 8139 // Look for multiply add opportunities. 8140 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where 8141 // each add nodes consumes a value from ISD::UMUL_LOHI and there is 8142 // a glue link from the first add to the second add. 8143 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by 8144 // a S/UMLAL instruction. 8145 // UMUL_LOHI 8146 // / :lo \ :hi 8147 // / \ [no multiline comment] 8148 // loAdd -> ADDE | 8149 // \ :glue / 8150 // \ / 8151 // ADDC <- hiAdd 8152 // 8153 assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); 8154 SDValue AddcOp0 = AddcNode->getOperand(0); 8155 SDValue AddcOp1 = AddcNode->getOperand(1); 8156 8157 // Check if the two operands are from the same mul_lohi node. 8158 if (AddcOp0.getNode() == AddcOp1.getNode()) 8159 return SDValue(); 8160 8161 assert(AddcNode->getNumValues() == 2 && 8162 AddcNode->getValueType(0) == MVT::i32 && 8163 "Expect ADDC with two result values. First: i32"); 8164 8165 // Check that we have a glued ADDC node. 8166 if (AddcNode->getValueType(1) != MVT::Glue) 8167 return SDValue(); 8168 8169 // Check that the ADDC adds the low result of the S/UMUL_LOHI. 8170 if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && 8171 AddcOp0->getOpcode() != ISD::SMUL_LOHI && 8172 AddcOp1->getOpcode() != ISD::UMUL_LOHI && 8173 AddcOp1->getOpcode() != ISD::SMUL_LOHI) 8174 return SDValue(); 8175 8176 // Look for the glued ADDE. 8177 SDNode* AddeNode = AddcNode->getGluedUser(); 8178 if (!AddeNode) 8179 return SDValue(); 8180 8181 // Make sure it is really an ADDE. 8182 if (AddeNode->getOpcode() != ISD::ADDE) 8183 return SDValue(); 8184 8185 assert(AddeNode->getNumOperands() == 3 && 8186 AddeNode->getOperand(2).getValueType() == MVT::Glue && 8187 "ADDE node has the wrong inputs"); 8188 8189 // Check for the triangle shape. 8190 SDValue AddeOp0 = AddeNode->getOperand(0); 8191 SDValue AddeOp1 = AddeNode->getOperand(1); 8192 8193 // Make sure that the ADDE operands are not coming from the same node. 8194 if (AddeOp0.getNode() == AddeOp1.getNode()) 8195 return SDValue(); 8196 8197 // Find the MUL_LOHI node walking up ADDE's operands. 8198 bool IsLeftOperandMUL = false; 8199 SDValue MULOp = findMUL_LOHI(AddeOp0); 8200 if (MULOp == SDValue()) 8201 MULOp = findMUL_LOHI(AddeOp1); 8202 else 8203 IsLeftOperandMUL = true; 8204 if (MULOp == SDValue()) 8205 return SDValue(); 8206 8207 // Figure out the right opcode. 8208 unsigned Opc = MULOp->getOpcode(); 8209 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; 8210 8211 // Figure out the high and low input values to the MLAL node. 8212 SDValue* HiAdd = nullptr; 8213 SDValue* LoMul = nullptr; 8214 SDValue* LowAdd = nullptr; 8215 8216 // Ensure that ADDE is from high result of ISD::SMUL_LOHI. 8217 if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) 8218 return SDValue(); 8219 8220 if (IsLeftOperandMUL) 8221 HiAdd = &AddeOp1; 8222 else 8223 HiAdd = &AddeOp0; 8224 8225 8226 // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node 8227 // whose low result is fed to the ADDC we are checking. 8228 8229 if (AddcOp0 == MULOp.getValue(0)) { 8230 LoMul = &AddcOp0; 8231 LowAdd = &AddcOp1; 8232 } 8233 if (AddcOp1 == MULOp.getValue(0)) { 8234 LoMul = &AddcOp1; 8235 LowAdd = &AddcOp0; 8236 } 8237 8238 if (!LoMul) 8239 return SDValue(); 8240 8241 // Create the merged node. 8242 SelectionDAG &DAG = DCI.DAG; 8243 8244 // Build operand list. 8245 SmallVector<SDValue, 8> Ops; 8246 Ops.push_back(LoMul->getOperand(0)); 8247 Ops.push_back(LoMul->getOperand(1)); 8248 Ops.push_back(*LowAdd); 8249 Ops.push_back(*HiAdd); 8250 8251 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), 8252 DAG.getVTList(MVT::i32, MVT::i32), Ops); 8253 8254 // Replace the ADDs' nodes uses by the MLA node's values. 8255 SDValue HiMLALResult(MLALNode.getNode(), 1); 8256 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); 8257 8258 SDValue LoMLALResult(MLALNode.getNode(), 0); 8259 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); 8260 8261 // Return original node to notify the driver to stop replacing. 8262 SDValue resNode(AddcNode, 0); 8263 return resNode; 8264 } 8265 8266 /// PerformADDCCombine - Target-specific dag combine transform from 8267 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. 8268 static SDValue PerformADDCCombine(SDNode *N, 8269 TargetLowering::DAGCombinerInfo &DCI, 8270 const ARMSubtarget *Subtarget) { 8271 8272 return AddCombineTo64bitMLAL(N, DCI, Subtarget); 8273 8274 } 8275 8276 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 8277 /// operands N0 and N1. This is a helper for PerformADDCombine that is 8278 /// called with the default operands, and if that fails, with commuted 8279 /// operands. 8280 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 8281 TargetLowering::DAGCombinerInfo &DCI, 8282 const ARMSubtarget *Subtarget){ 8283 8284 // Attempt to create vpaddl for this add. 8285 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 8286 if (Result.getNode()) 8287 return Result; 8288 8289 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 8290 if (N0.getNode()->hasOneUse()) { 8291 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 8292 if (Result.getNode()) return Result; 8293 } 8294 return SDValue(); 8295 } 8296 8297 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 8298 /// 8299 static SDValue PerformADDCombine(SDNode *N, 8300 TargetLowering::DAGCombinerInfo &DCI, 8301 const ARMSubtarget *Subtarget) { 8302 SDValue N0 = N->getOperand(0); 8303 SDValue N1 = N->getOperand(1); 8304 8305 // First try with the default operand order. 8306 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 8307 if (Result.getNode()) 8308 return Result; 8309 8310 // If that didn't work, try again with the operands commuted. 8311 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 8312 } 8313 8314 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 8315 /// 8316 static SDValue PerformSUBCombine(SDNode *N, 8317 TargetLowering::DAGCombinerInfo &DCI) { 8318 SDValue N0 = N->getOperand(0); 8319 SDValue N1 = N->getOperand(1); 8320 8321 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 8322 if (N1.getNode()->hasOneUse()) { 8323 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 8324 if (Result.getNode()) return Result; 8325 } 8326 8327 return SDValue(); 8328 } 8329 8330 /// PerformVMULCombine 8331 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 8332 /// special multiplier accumulator forwarding. 8333 /// vmul d3, d0, d2 8334 /// vmla d3, d1, d2 8335 /// is faster than 8336 /// vadd d3, d0, d1 8337 /// vmul d3, d3, d2 8338 // However, for (A + B) * (A + B), 8339 // vadd d2, d0, d1 8340 // vmul d3, d0, d2 8341 // vmla d3, d1, d2 8342 // is slower than 8343 // vadd d2, d0, d1 8344 // vmul d3, d2, d2 8345 static SDValue PerformVMULCombine(SDNode *N, 8346 TargetLowering::DAGCombinerInfo &DCI, 8347 const ARMSubtarget *Subtarget) { 8348 if (!Subtarget->hasVMLxForwarding()) 8349 return SDValue(); 8350 8351 SelectionDAG &DAG = DCI.DAG; 8352 SDValue N0 = N->getOperand(0); 8353 SDValue N1 = N->getOperand(1); 8354 unsigned Opcode = N0.getOpcode(); 8355 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8356 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 8357 Opcode = N1.getOpcode(); 8358 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8359 Opcode != ISD::FADD && Opcode != ISD::FSUB) 8360 return SDValue(); 8361 std::swap(N0, N1); 8362 } 8363 8364 if (N0 == N1) 8365 return SDValue(); 8366 8367 EVT VT = N->getValueType(0); 8368 SDLoc DL(N); 8369 SDValue N00 = N0->getOperand(0); 8370 SDValue N01 = N0->getOperand(1); 8371 return DAG.getNode(Opcode, DL, VT, 8372 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 8373 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 8374 } 8375 8376 static SDValue PerformMULCombine(SDNode *N, 8377 TargetLowering::DAGCombinerInfo &DCI, 8378 const ARMSubtarget *Subtarget) { 8379 SelectionDAG &DAG = DCI.DAG; 8380 8381 if (Subtarget->isThumb1Only()) 8382 return SDValue(); 8383 8384 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 8385 return SDValue(); 8386 8387 EVT VT = N->getValueType(0); 8388 if (VT.is64BitVector() || VT.is128BitVector()) 8389 return PerformVMULCombine(N, DCI, Subtarget); 8390 if (VT != MVT::i32) 8391 return SDValue(); 8392 8393 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8394 if (!C) 8395 return SDValue(); 8396 8397 int64_t MulAmt = C->getSExtValue(); 8398 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt); 8399 8400 ShiftAmt = ShiftAmt & (32 - 1); 8401 SDValue V = N->getOperand(0); 8402 SDLoc DL(N); 8403 8404 SDValue Res; 8405 MulAmt >>= ShiftAmt; 8406 8407 if (MulAmt >= 0) { 8408 if (isPowerOf2_32(MulAmt - 1)) { 8409 // (mul x, 2^N + 1) => (add (shl x, N), x) 8410 Res = DAG.getNode(ISD::ADD, DL, VT, 8411 V, 8412 DAG.getNode(ISD::SHL, DL, VT, 8413 V, 8414 DAG.getConstant(Log2_32(MulAmt - 1), DL, 8415 MVT::i32))); 8416 } else if (isPowerOf2_32(MulAmt + 1)) { 8417 // (mul x, 2^N - 1) => (sub (shl x, N), x) 8418 Res = DAG.getNode(ISD::SUB, DL, VT, 8419 DAG.getNode(ISD::SHL, DL, VT, 8420 V, 8421 DAG.getConstant(Log2_32(MulAmt + 1), DL, 8422 MVT::i32)), 8423 V); 8424 } else 8425 return SDValue(); 8426 } else { 8427 uint64_t MulAmtAbs = -MulAmt; 8428 if (isPowerOf2_32(MulAmtAbs + 1)) { 8429 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 8430 Res = DAG.getNode(ISD::SUB, DL, VT, 8431 V, 8432 DAG.getNode(ISD::SHL, DL, VT, 8433 V, 8434 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL, 8435 MVT::i32))); 8436 } else if (isPowerOf2_32(MulAmtAbs - 1)) { 8437 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 8438 Res = DAG.getNode(ISD::ADD, DL, VT, 8439 V, 8440 DAG.getNode(ISD::SHL, DL, VT, 8441 V, 8442 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL, 8443 MVT::i32))); 8444 Res = DAG.getNode(ISD::SUB, DL, VT, 8445 DAG.getConstant(0, DL, MVT::i32), Res); 8446 8447 } else 8448 return SDValue(); 8449 } 8450 8451 if (ShiftAmt != 0) 8452 Res = DAG.getNode(ISD::SHL, DL, VT, 8453 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32)); 8454 8455 // Do not add new nodes to DAG combiner worklist. 8456 DCI.CombineTo(N, Res, false); 8457 return SDValue(); 8458 } 8459 8460 static SDValue PerformANDCombine(SDNode *N, 8461 TargetLowering::DAGCombinerInfo &DCI, 8462 const ARMSubtarget *Subtarget) { 8463 8464 // Attempt to use immediate-form VBIC 8465 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8466 SDLoc dl(N); 8467 EVT VT = N->getValueType(0); 8468 SelectionDAG &DAG = DCI.DAG; 8469 8470 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8471 return SDValue(); 8472 8473 APInt SplatBits, SplatUndef; 8474 unsigned SplatBitSize; 8475 bool HasAnyUndefs; 8476 if (BVN && 8477 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8478 if (SplatBitSize <= 64) { 8479 EVT VbicVT; 8480 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 8481 SplatUndef.getZExtValue(), SplatBitSize, 8482 DAG, dl, VbicVT, VT.is128BitVector(), 8483 OtherModImm); 8484 if (Val.getNode()) { 8485 SDValue Input = 8486 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 8487 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 8488 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 8489 } 8490 } 8491 } 8492 8493 if (!Subtarget->isThumb1Only()) { 8494 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) 8495 SDValue Result = combineSelectAndUseCommutative(N, true, DCI); 8496 if (Result.getNode()) 8497 return Result; 8498 } 8499 8500 return SDValue(); 8501 } 8502 8503 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR 8504 static SDValue PerformORCombine(SDNode *N, 8505 TargetLowering::DAGCombinerInfo &DCI, 8506 const ARMSubtarget *Subtarget) { 8507 // Attempt to use immediate-form VORR 8508 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8509 SDLoc dl(N); 8510 EVT VT = N->getValueType(0); 8511 SelectionDAG &DAG = DCI.DAG; 8512 8513 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8514 return SDValue(); 8515 8516 APInt SplatBits, SplatUndef; 8517 unsigned SplatBitSize; 8518 bool HasAnyUndefs; 8519 if (BVN && Subtarget->hasNEON() && 8520 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8521 if (SplatBitSize <= 64) { 8522 EVT VorrVT; 8523 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 8524 SplatUndef.getZExtValue(), SplatBitSize, 8525 DAG, dl, VorrVT, VT.is128BitVector(), 8526 OtherModImm); 8527 if (Val.getNode()) { 8528 SDValue Input = 8529 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 8530 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 8531 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 8532 } 8533 } 8534 } 8535 8536 if (!Subtarget->isThumb1Only()) { 8537 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 8538 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8539 if (Result.getNode()) 8540 return Result; 8541 } 8542 8543 // The code below optimizes (or (and X, Y), Z). 8544 // The AND operand needs to have a single user to make these optimizations 8545 // profitable. 8546 SDValue N0 = N->getOperand(0); 8547 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) 8548 return SDValue(); 8549 SDValue N1 = N->getOperand(1); 8550 8551 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 8552 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 8553 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 8554 APInt SplatUndef; 8555 unsigned SplatBitSize; 8556 bool HasAnyUndefs; 8557 8558 APInt SplatBits0, SplatBits1; 8559 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 8560 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 8561 // Ensure that the second operand of both ands are constants 8562 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 8563 HasAnyUndefs) && !HasAnyUndefs) { 8564 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 8565 HasAnyUndefs) && !HasAnyUndefs) { 8566 // Ensure that the bit width of the constants are the same and that 8567 // the splat arguments are logical inverses as per the pattern we 8568 // are trying to simplify. 8569 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && 8570 SplatBits0 == ~SplatBits1) { 8571 // Canonicalize the vector type to make instruction selection 8572 // simpler. 8573 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 8574 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 8575 N0->getOperand(1), 8576 N0->getOperand(0), 8577 N1->getOperand(0)); 8578 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 8579 } 8580 } 8581 } 8582 } 8583 8584 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 8585 // reasonable. 8586 8587 // BFI is only available on V6T2+ 8588 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 8589 return SDValue(); 8590 8591 SDLoc DL(N); 8592 // 1) or (and A, mask), val => ARMbfi A, val, mask 8593 // iff (val & mask) == val 8594 // 8595 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8596 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 8597 // && mask == ~mask2 8598 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 8599 // && ~mask == mask2 8600 // (i.e., copy a bitfield value into another bitfield of the same width) 8601 8602 if (VT != MVT::i32) 8603 return SDValue(); 8604 8605 SDValue N00 = N0.getOperand(0); 8606 8607 // The value and the mask need to be constants so we can verify this is 8608 // actually a bitfield set. If the mask is 0xffff, we can do better 8609 // via a movt instruction, so don't use BFI in that case. 8610 SDValue MaskOp = N0.getOperand(1); 8611 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 8612 if (!MaskC) 8613 return SDValue(); 8614 unsigned Mask = MaskC->getZExtValue(); 8615 if (Mask == 0xffff) 8616 return SDValue(); 8617 SDValue Res; 8618 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 8619 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 8620 if (N1C) { 8621 unsigned Val = N1C->getZExtValue(); 8622 if ((Val & ~Mask) != Val) 8623 return SDValue(); 8624 8625 if (ARM::isBitFieldInvertedMask(Mask)) { 8626 Val >>= countTrailingZeros(~Mask); 8627 8628 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 8629 DAG.getConstant(Val, DL, MVT::i32), 8630 DAG.getConstant(Mask, DL, MVT::i32)); 8631 8632 // Do not add new nodes to DAG combiner worklist. 8633 DCI.CombineTo(N, Res, false); 8634 return SDValue(); 8635 } 8636 } else if (N1.getOpcode() == ISD::AND) { 8637 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8638 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8639 if (!N11C) 8640 return SDValue(); 8641 unsigned Mask2 = N11C->getZExtValue(); 8642 8643 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 8644 // as is to match. 8645 if (ARM::isBitFieldInvertedMask(Mask) && 8646 (Mask == ~Mask2)) { 8647 // The pack halfword instruction works better for masks that fit it, 8648 // so use that when it's available. 8649 if (Subtarget->hasT2ExtractPack() && 8650 (Mask == 0xffff || Mask == 0xffff0000)) 8651 return SDValue(); 8652 // 2a 8653 unsigned amt = countTrailingZeros(Mask2); 8654 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 8655 DAG.getConstant(amt, DL, MVT::i32)); 8656 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 8657 DAG.getConstant(Mask, DL, MVT::i32)); 8658 // Do not add new nodes to DAG combiner worklist. 8659 DCI.CombineTo(N, Res, false); 8660 return SDValue(); 8661 } else if (ARM::isBitFieldInvertedMask(~Mask) && 8662 (~Mask == Mask2)) { 8663 // The pack halfword instruction works better for masks that fit it, 8664 // so use that when it's available. 8665 if (Subtarget->hasT2ExtractPack() && 8666 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 8667 return SDValue(); 8668 // 2b 8669 unsigned lsb = countTrailingZeros(Mask); 8670 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 8671 DAG.getConstant(lsb, DL, MVT::i32)); 8672 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 8673 DAG.getConstant(Mask2, DL, MVT::i32)); 8674 // Do not add new nodes to DAG combiner worklist. 8675 DCI.CombineTo(N, Res, false); 8676 return SDValue(); 8677 } 8678 } 8679 8680 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 8681 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 8682 ARM::isBitFieldInvertedMask(~Mask)) { 8683 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 8684 // where lsb(mask) == #shamt and masked bits of B are known zero. 8685 SDValue ShAmt = N00.getOperand(1); 8686 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 8687 unsigned LSB = countTrailingZeros(Mask); 8688 if (ShAmtC != LSB) 8689 return SDValue(); 8690 8691 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 8692 DAG.getConstant(~Mask, DL, MVT::i32)); 8693 8694 // Do not add new nodes to DAG combiner worklist. 8695 DCI.CombineTo(N, Res, false); 8696 } 8697 8698 return SDValue(); 8699 } 8700 8701 static SDValue PerformXORCombine(SDNode *N, 8702 TargetLowering::DAGCombinerInfo &DCI, 8703 const ARMSubtarget *Subtarget) { 8704 EVT VT = N->getValueType(0); 8705 SelectionDAG &DAG = DCI.DAG; 8706 8707 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8708 return SDValue(); 8709 8710 if (!Subtarget->isThumb1Only()) { 8711 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 8712 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8713 if (Result.getNode()) 8714 return Result; 8715 } 8716 8717 return SDValue(); 8718 } 8719 8720 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 8721 /// the bits being cleared by the AND are not demanded by the BFI. 8722 static SDValue PerformBFICombine(SDNode *N, 8723 TargetLowering::DAGCombinerInfo &DCI) { 8724 SDValue N1 = N->getOperand(1); 8725 if (N1.getOpcode() == ISD::AND) { 8726 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8727 if (!N11C) 8728 return SDValue(); 8729 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 8730 unsigned LSB = countTrailingZeros(~InvMask); 8731 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; 8732 assert(Width < 8733 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && 8734 "undefined behavior"); 8735 unsigned Mask = (1u << Width) - 1; 8736 unsigned Mask2 = N11C->getZExtValue(); 8737 if ((Mask & (~Mask2)) == 0) 8738 return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), 8739 N->getOperand(0), N1.getOperand(0), 8740 N->getOperand(2)); 8741 } 8742 return SDValue(); 8743 } 8744 8745 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for 8746 /// ARMISD::VMOVRRD. 8747 static SDValue PerformVMOVRRDCombine(SDNode *N, 8748 TargetLowering::DAGCombinerInfo &DCI, 8749 const ARMSubtarget *Subtarget) { 8750 // vmovrrd(vmovdrr x, y) -> x,y 8751 SDValue InDouble = N->getOperand(0); 8752 if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) 8753 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 8754 8755 // vmovrrd(load f64) -> (load i32), (load i32) 8756 SDNode *InNode = InDouble.getNode(); 8757 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 8758 InNode->getValueType(0) == MVT::f64 && 8759 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 8760 !cast<LoadSDNode>(InNode)->isVolatile()) { 8761 // TODO: Should this be done for non-FrameIndex operands? 8762 LoadSDNode *LD = cast<LoadSDNode>(InNode); 8763 8764 SelectionDAG &DAG = DCI.DAG; 8765 SDLoc DL(LD); 8766 SDValue BasePtr = LD->getBasePtr(); 8767 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 8768 LD->getPointerInfo(), LD->isVolatile(), 8769 LD->isNonTemporal(), LD->isInvariant(), 8770 LD->getAlignment()); 8771 8772 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 8773 DAG.getConstant(4, DL, MVT::i32)); 8774 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 8775 LD->getPointerInfo(), LD->isVolatile(), 8776 LD->isNonTemporal(), LD->isInvariant(), 8777 std::min(4U, LD->getAlignment() / 2)); 8778 8779 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 8780 if (DCI.DAG.getDataLayout().isBigEndian()) 8781 std::swap (NewLD1, NewLD2); 8782 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 8783 return Result; 8784 } 8785 8786 return SDValue(); 8787 } 8788 8789 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for 8790 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 8791 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 8792 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 8793 SDValue Op0 = N->getOperand(0); 8794 SDValue Op1 = N->getOperand(1); 8795 if (Op0.getOpcode() == ISD::BITCAST) 8796 Op0 = Op0.getOperand(0); 8797 if (Op1.getOpcode() == ISD::BITCAST) 8798 Op1 = Op1.getOperand(0); 8799 if (Op0.getOpcode() == ARMISD::VMOVRRD && 8800 Op0.getNode() == Op1.getNode() && 8801 Op0.getResNo() == 0 && Op1.getResNo() == 1) 8802 return DAG.getNode(ISD::BITCAST, SDLoc(N), 8803 N->getValueType(0), Op0.getOperand(0)); 8804 return SDValue(); 8805 } 8806 8807 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 8808 /// are normal, non-volatile loads. If so, it is profitable to bitcast an 8809 /// i64 vector to have f64 elements, since the value can then be loaded 8810 /// directly into a VFP register. 8811 static bool hasNormalLoadOperand(SDNode *N) { 8812 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 8813 for (unsigned i = 0; i < NumElts; ++i) { 8814 SDNode *Elt = N->getOperand(i).getNode(); 8815 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 8816 return true; 8817 } 8818 return false; 8819 } 8820 8821 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 8822 /// ISD::BUILD_VECTOR. 8823 static SDValue PerformBUILD_VECTORCombine(SDNode *N, 8824 TargetLowering::DAGCombinerInfo &DCI, 8825 const ARMSubtarget *Subtarget) { 8826 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 8827 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 8828 // into a pair of GPRs, which is fine when the value is used as a scalar, 8829 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 8830 SelectionDAG &DAG = DCI.DAG; 8831 if (N->getNumOperands() == 2) { 8832 SDValue RV = PerformVMOVDRRCombine(N, DAG); 8833 if (RV.getNode()) 8834 return RV; 8835 } 8836 8837 // Load i64 elements as f64 values so that type legalization does not split 8838 // them up into i32 values. 8839 EVT VT = N->getValueType(0); 8840 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 8841 return SDValue(); 8842 SDLoc dl(N); 8843 SmallVector<SDValue, 8> Ops; 8844 unsigned NumElts = VT.getVectorNumElements(); 8845 for (unsigned i = 0; i < NumElts; ++i) { 8846 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 8847 Ops.push_back(V); 8848 // Make the DAGCombiner fold the bitcast. 8849 DCI.AddToWorklist(V.getNode()); 8850 } 8851 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 8852 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); 8853 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 8854 } 8855 8856 /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. 8857 static SDValue 8858 PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 8859 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. 8860 // At that time, we may have inserted bitcasts from integer to float. 8861 // If these bitcasts have survived DAGCombine, change the lowering of this 8862 // BUILD_VECTOR in something more vector friendly, i.e., that does not 8863 // force to use floating point types. 8864 8865 // Make sure we can change the type of the vector. 8866 // This is possible iff: 8867 // 1. The vector is only used in a bitcast to a integer type. I.e., 8868 // 1.1. Vector is used only once. 8869 // 1.2. Use is a bit convert to an integer type. 8870 // 2. The size of its operands are 32-bits (64-bits are not legal). 8871 EVT VT = N->getValueType(0); 8872 EVT EltVT = VT.getVectorElementType(); 8873 8874 // Check 1.1. and 2. 8875 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) 8876 return SDValue(); 8877 8878 // By construction, the input type must be float. 8879 assert(EltVT == MVT::f32 && "Unexpected type!"); 8880 8881 // Check 1.2. 8882 SDNode *Use = *N->use_begin(); 8883 if (Use->getOpcode() != ISD::BITCAST || 8884 Use->getValueType(0).isFloatingPoint()) 8885 return SDValue(); 8886 8887 // Check profitability. 8888 // Model is, if more than half of the relevant operands are bitcast from 8889 // i32, turn the build_vector into a sequence of insert_vector_elt. 8890 // Relevant operands are everything that is not statically 8891 // (i.e., at compile time) bitcasted. 8892 unsigned NumOfBitCastedElts = 0; 8893 unsigned NumElts = VT.getVectorNumElements(); 8894 unsigned NumOfRelevantElts = NumElts; 8895 for (unsigned Idx = 0; Idx < NumElts; ++Idx) { 8896 SDValue Elt = N->getOperand(Idx); 8897 if (Elt->getOpcode() == ISD::BITCAST) { 8898 // Assume only bit cast to i32 will go away. 8899 if (Elt->getOperand(0).getValueType() == MVT::i32) 8900 ++NumOfBitCastedElts; 8901 } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) 8902 // Constants are statically casted, thus do not count them as 8903 // relevant operands. 8904 --NumOfRelevantElts; 8905 } 8906 8907 // Check if more than half of the elements require a non-free bitcast. 8908 if (NumOfBitCastedElts <= NumOfRelevantElts / 2) 8909 return SDValue(); 8910 8911 SelectionDAG &DAG = DCI.DAG; 8912 // Create the new vector type. 8913 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 8914 // Check if the type is legal. 8915 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8916 if (!TLI.isTypeLegal(VecVT)) 8917 return SDValue(); 8918 8919 // Combine: 8920 // ARMISD::BUILD_VECTOR E1, E2, ..., EN. 8921 // => BITCAST INSERT_VECTOR_ELT 8922 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), 8923 // (BITCAST EN), N. 8924 SDValue Vec = DAG.getUNDEF(VecVT); 8925 SDLoc dl(N); 8926 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { 8927 SDValue V = N->getOperand(Idx); 8928 if (V.getOpcode() == ISD::UNDEF) 8929 continue; 8930 if (V.getOpcode() == ISD::BITCAST && 8931 V->getOperand(0).getValueType() == MVT::i32) 8932 // Fold obvious case. 8933 V = V.getOperand(0); 8934 else { 8935 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); 8936 // Make the DAGCombiner fold the bitcasts. 8937 DCI.AddToWorklist(V.getNode()); 8938 } 8939 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32); 8940 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); 8941 } 8942 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); 8943 // Make the DAGCombiner fold the bitcasts. 8944 DCI.AddToWorklist(Vec.getNode()); 8945 return Vec; 8946 } 8947 8948 /// PerformInsertEltCombine - Target-specific dag combine xforms for 8949 /// ISD::INSERT_VECTOR_ELT. 8950 static SDValue PerformInsertEltCombine(SDNode *N, 8951 TargetLowering::DAGCombinerInfo &DCI) { 8952 // Bitcast an i64 load inserted into a vector to f64. 8953 // Otherwise, the i64 value will be legalized to a pair of i32 values. 8954 EVT VT = N->getValueType(0); 8955 SDNode *Elt = N->getOperand(1).getNode(); 8956 if (VT.getVectorElementType() != MVT::i64 || 8957 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 8958 return SDValue(); 8959 8960 SelectionDAG &DAG = DCI.DAG; 8961 SDLoc dl(N); 8962 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 8963 VT.getVectorNumElements()); 8964 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 8965 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 8966 // Make the DAGCombiner fold the bitcasts. 8967 DCI.AddToWorklist(Vec.getNode()); 8968 DCI.AddToWorklist(V.getNode()); 8969 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 8970 Vec, V, N->getOperand(2)); 8971 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 8972 } 8973 8974 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 8975 /// ISD::VECTOR_SHUFFLE. 8976 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 8977 // The LLVM shufflevector instruction does not require the shuffle mask 8978 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 8979 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 8980 // operands do not match the mask length, they are extended by concatenating 8981 // them with undef vectors. That is probably the right thing for other 8982 // targets, but for NEON it is better to concatenate two double-register 8983 // size vector operands into a single quad-register size vector. Do that 8984 // transformation here: 8985 // shuffle(concat(v1, undef), concat(v2, undef)) -> 8986 // shuffle(concat(v1, v2), undef) 8987 SDValue Op0 = N->getOperand(0); 8988 SDValue Op1 = N->getOperand(1); 8989 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 8990 Op1.getOpcode() != ISD::CONCAT_VECTORS || 8991 Op0.getNumOperands() != 2 || 8992 Op1.getNumOperands() != 2) 8993 return SDValue(); 8994 SDValue Concat0Op1 = Op0.getOperand(1); 8995 SDValue Concat1Op1 = Op1.getOperand(1); 8996 if (Concat0Op1.getOpcode() != ISD::UNDEF || 8997 Concat1Op1.getOpcode() != ISD::UNDEF) 8998 return SDValue(); 8999 // Skip the transformation if any of the types are illegal. 9000 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9001 EVT VT = N->getValueType(0); 9002 if (!TLI.isTypeLegal(VT) || 9003 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 9004 !TLI.isTypeLegal(Concat1Op1.getValueType())) 9005 return SDValue(); 9006 9007 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 9008 Op0.getOperand(0), Op1.getOperand(0)); 9009 // Translate the shuffle mask. 9010 SmallVector<int, 16> NewMask; 9011 unsigned NumElts = VT.getVectorNumElements(); 9012 unsigned HalfElts = NumElts/2; 9013 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 9014 for (unsigned n = 0; n < NumElts; ++n) { 9015 int MaskElt = SVN->getMaskElt(n); 9016 int NewElt = -1; 9017 if (MaskElt < (int)HalfElts) 9018 NewElt = MaskElt; 9019 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 9020 NewElt = HalfElts + MaskElt - NumElts; 9021 NewMask.push_back(NewElt); 9022 } 9023 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, 9024 DAG.getUNDEF(VT), NewMask.data()); 9025 } 9026 9027 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, 9028 /// NEON load/store intrinsics, and generic vector load/stores, to merge 9029 /// base address updates. 9030 /// For generic load/stores, the memory type is assumed to be a vector. 9031 /// The caller is assumed to have checked legality. 9032 static SDValue CombineBaseUpdate(SDNode *N, 9033 TargetLowering::DAGCombinerInfo &DCI) { 9034 SelectionDAG &DAG = DCI.DAG; 9035 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 9036 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 9037 const bool isStore = N->getOpcode() == ISD::STORE; 9038 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); 9039 SDValue Addr = N->getOperand(AddrOpIdx); 9040 MemSDNode *MemN = cast<MemSDNode>(N); 9041 SDLoc dl(N); 9042 9043 // Search for a use of the address operand that is an increment. 9044 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 9045 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 9046 SDNode *User = *UI; 9047 if (User->getOpcode() != ISD::ADD || 9048 UI.getUse().getResNo() != Addr.getResNo()) 9049 continue; 9050 9051 // Check that the add is independent of the load/store. Otherwise, folding 9052 // it would create a cycle. 9053 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 9054 continue; 9055 9056 // Find the new opcode for the updating load/store. 9057 bool isLoadOp = true; 9058 bool isLaneOp = false; 9059 unsigned NewOpc = 0; 9060 unsigned NumVecs = 0; 9061 if (isIntrinsic) { 9062 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 9063 switch (IntNo) { 9064 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 9065 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 9066 NumVecs = 1; break; 9067 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 9068 NumVecs = 2; break; 9069 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 9070 NumVecs = 3; break; 9071 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 9072 NumVecs = 4; break; 9073 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 9074 NumVecs = 2; isLaneOp = true; break; 9075 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 9076 NumVecs = 3; isLaneOp = true; break; 9077 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 9078 NumVecs = 4; isLaneOp = true; break; 9079 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 9080 NumVecs = 1; isLoadOp = false; break; 9081 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 9082 NumVecs = 2; isLoadOp = false; break; 9083 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 9084 NumVecs = 3; isLoadOp = false; break; 9085 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 9086 NumVecs = 4; isLoadOp = false; break; 9087 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 9088 NumVecs = 2; isLoadOp = false; isLaneOp = true; break; 9089 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 9090 NumVecs = 3; isLoadOp = false; isLaneOp = true; break; 9091 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 9092 NumVecs = 4; isLoadOp = false; isLaneOp = true; break; 9093 } 9094 } else { 9095 isLaneOp = true; 9096 switch (N->getOpcode()) { 9097 default: llvm_unreachable("unexpected opcode for Neon base update"); 9098 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 9099 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 9100 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 9101 case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD; 9102 NumVecs = 1; isLaneOp = false; break; 9103 case ISD::STORE: NewOpc = ARMISD::VST1_UPD; 9104 NumVecs = 1; isLaneOp = false; isLoadOp = false; break; 9105 } 9106 } 9107 9108 // Find the size of memory referenced by the load/store. 9109 EVT VecTy; 9110 if (isLoadOp) { 9111 VecTy = N->getValueType(0); 9112 } else if (isIntrinsic) { 9113 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 9114 } else { 9115 assert(isStore && "Node has to be a load, a store, or an intrinsic!"); 9116 VecTy = N->getOperand(1).getValueType(); 9117 } 9118 9119 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 9120 if (isLaneOp) 9121 NumBytes /= VecTy.getVectorNumElements(); 9122 9123 // If the increment is a constant, it must match the memory ref size. 9124 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 9125 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 9126 uint64_t IncVal = CInc->getZExtValue(); 9127 if (IncVal != NumBytes) 9128 continue; 9129 } else if (NumBytes >= 3 * 16) { 9130 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 9131 // separate instructions that make it harder to use a non-constant update. 9132 continue; 9133 } 9134 9135 // OK, we found an ADD we can fold into the base update. 9136 // Now, create a _UPD node, taking care of not breaking alignment. 9137 9138 EVT AlignedVecTy = VecTy; 9139 unsigned Alignment = MemN->getAlignment(); 9140 9141 // If this is a less-than-standard-aligned load/store, change the type to 9142 // match the standard alignment. 9143 // The alignment is overlooked when selecting _UPD variants; and it's 9144 // easier to introduce bitcasts here than fix that. 9145 // There are 3 ways to get to this base-update combine: 9146 // - intrinsics: they are assumed to be properly aligned (to the standard 9147 // alignment of the memory type), so we don't need to do anything. 9148 // - ARMISD::VLDx nodes: they are only generated from the aforementioned 9149 // intrinsics, so, likewise, there's nothing to do. 9150 // - generic load/store instructions: the alignment is specified as an 9151 // explicit operand, rather than implicitly as the standard alignment 9152 // of the memory type (like the intrisics). We need to change the 9153 // memory type to match the explicit alignment. That way, we don't 9154 // generate non-standard-aligned ARMISD::VLDx nodes. 9155 if (isa<LSBaseSDNode>(N)) { 9156 if (Alignment == 0) 9157 Alignment = 1; 9158 if (Alignment < VecTy.getScalarSizeInBits() / 8) { 9159 MVT EltTy = MVT::getIntegerVT(Alignment * 8); 9160 assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); 9161 assert(!isLaneOp && "Unexpected generic load/store lane."); 9162 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); 9163 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); 9164 } 9165 // Don't set an explicit alignment on regular load/stores that we want 9166 // to transform to VLD/VST 1_UPD nodes. 9167 // This matches the behavior of regular load/stores, which only get an 9168 // explicit alignment if the MMO alignment is larger than the standard 9169 // alignment of the memory type. 9170 // Intrinsics, however, always get an explicit alignment, set to the 9171 // alignment of the MMO. 9172 Alignment = 1; 9173 } 9174 9175 // Create the new updating load/store node. 9176 // First, create an SDVTList for the new updating node's results. 9177 EVT Tys[6]; 9178 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0); 9179 unsigned n; 9180 for (n = 0; n < NumResultVecs; ++n) 9181 Tys[n] = AlignedVecTy; 9182 Tys[n++] = MVT::i32; 9183 Tys[n] = MVT::Other; 9184 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); 9185 9186 // Then, gather the new node's operands. 9187 SmallVector<SDValue, 8> Ops; 9188 Ops.push_back(N->getOperand(0)); // incoming chain 9189 Ops.push_back(N->getOperand(AddrOpIdx)); 9190 Ops.push_back(Inc); 9191 9192 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) { 9193 // Try to match the intrinsic's signature 9194 Ops.push_back(StN->getValue()); 9195 } else { 9196 // Loads (and of course intrinsics) match the intrinsics' signature, 9197 // so just add all but the alignment operand. 9198 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i) 9199 Ops.push_back(N->getOperand(i)); 9200 } 9201 9202 // For all node types, the alignment operand is always the last one. 9203 Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32)); 9204 9205 // If this is a non-standard-aligned STORE, the penultimate operand is the 9206 // stored value. Bitcast it to the aligned type. 9207 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { 9208 SDValue &StVal = Ops[Ops.size()-2]; 9209 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal); 9210 } 9211 9212 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, 9213 Ops, AlignedVecTy, 9214 MemN->getMemOperand()); 9215 9216 // Update the uses. 9217 SmallVector<SDValue, 5> NewResults; 9218 for (unsigned i = 0; i < NumResultVecs; ++i) 9219 NewResults.push_back(SDValue(UpdN.getNode(), i)); 9220 9221 // If this is an non-standard-aligned LOAD, the first result is the loaded 9222 // value. Bitcast it to the expected result type. 9223 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { 9224 SDValue &LdVal = NewResults[0]; 9225 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal); 9226 } 9227 9228 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 9229 DCI.CombineTo(N, NewResults); 9230 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 9231 9232 break; 9233 } 9234 return SDValue(); 9235 } 9236 9237 static SDValue PerformVLDCombine(SDNode *N, 9238 TargetLowering::DAGCombinerInfo &DCI) { 9239 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 9240 return SDValue(); 9241 9242 return CombineBaseUpdate(N, DCI); 9243 } 9244 9245 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 9246 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 9247 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 9248 /// return true. 9249 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 9250 SelectionDAG &DAG = DCI.DAG; 9251 EVT VT = N->getValueType(0); 9252 // vldN-dup instructions only support 64-bit vectors for N > 1. 9253 if (!VT.is64BitVector()) 9254 return false; 9255 9256 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 9257 SDNode *VLD = N->getOperand(0).getNode(); 9258 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 9259 return false; 9260 unsigned NumVecs = 0; 9261 unsigned NewOpc = 0; 9262 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 9263 if (IntNo == Intrinsic::arm_neon_vld2lane) { 9264 NumVecs = 2; 9265 NewOpc = ARMISD::VLD2DUP; 9266 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 9267 NumVecs = 3; 9268 NewOpc = ARMISD::VLD3DUP; 9269 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 9270 NumVecs = 4; 9271 NewOpc = ARMISD::VLD4DUP; 9272 } else { 9273 return false; 9274 } 9275 9276 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 9277 // numbers match the load. 9278 unsigned VLDLaneNo = 9279 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 9280 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9281 UI != UE; ++UI) { 9282 // Ignore uses of the chain result. 9283 if (UI.getUse().getResNo() == NumVecs) 9284 continue; 9285 SDNode *User = *UI; 9286 if (User->getOpcode() != ARMISD::VDUPLANE || 9287 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 9288 return false; 9289 } 9290 9291 // Create the vldN-dup node. 9292 EVT Tys[5]; 9293 unsigned n; 9294 for (n = 0; n < NumVecs; ++n) 9295 Tys[n] = VT; 9296 Tys[n] = MVT::Other; 9297 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); 9298 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 9299 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 9300 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, 9301 Ops, VLDMemInt->getMemoryVT(), 9302 VLDMemInt->getMemOperand()); 9303 9304 // Update the uses. 9305 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9306 UI != UE; ++UI) { 9307 unsigned ResNo = UI.getUse().getResNo(); 9308 // Ignore uses of the chain result. 9309 if (ResNo == NumVecs) 9310 continue; 9311 SDNode *User = *UI; 9312 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 9313 } 9314 9315 // Now the vldN-lane intrinsic is dead except for its chain result. 9316 // Update uses of the chain. 9317 std::vector<SDValue> VLDDupResults; 9318 for (unsigned n = 0; n < NumVecs; ++n) 9319 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 9320 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 9321 DCI.CombineTo(VLD, VLDDupResults); 9322 9323 return true; 9324 } 9325 9326 /// PerformVDUPLANECombine - Target-specific dag combine xforms for 9327 /// ARMISD::VDUPLANE. 9328 static SDValue PerformVDUPLANECombine(SDNode *N, 9329 TargetLowering::DAGCombinerInfo &DCI) { 9330 SDValue Op = N->getOperand(0); 9331 9332 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 9333 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 9334 if (CombineVLDDUP(N, DCI)) 9335 return SDValue(N, 0); 9336 9337 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 9338 // redundant. Ignore bit_converts for now; element sizes are checked below. 9339 while (Op.getOpcode() == ISD::BITCAST) 9340 Op = Op.getOperand(0); 9341 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 9342 return SDValue(); 9343 9344 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 9345 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 9346 // The canonical VMOV for a zero vector uses a 32-bit element size. 9347 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9348 unsigned EltBits; 9349 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 9350 EltSize = 8; 9351 EVT VT = N->getValueType(0); 9352 if (EltSize > VT.getVectorElementType().getSizeInBits()) 9353 return SDValue(); 9354 9355 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 9356 } 9357 9358 static SDValue PerformLOADCombine(SDNode *N, 9359 TargetLowering::DAGCombinerInfo &DCI) { 9360 EVT VT = N->getValueType(0); 9361 9362 // If this is a legal vector load, try to combine it into a VLD1_UPD. 9363 if (ISD::isNormalLoad(N) && VT.isVector() && 9364 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) 9365 return CombineBaseUpdate(N, DCI); 9366 9367 return SDValue(); 9368 } 9369 9370 /// PerformSTORECombine - Target-specific dag combine xforms for 9371 /// ISD::STORE. 9372 static SDValue PerformSTORECombine(SDNode *N, 9373 TargetLowering::DAGCombinerInfo &DCI) { 9374 StoreSDNode *St = cast<StoreSDNode>(N); 9375 if (St->isVolatile()) 9376 return SDValue(); 9377 9378 // Optimize trunc store (of multiple scalars) to shuffle and store. First, 9379 // pack all of the elements in one place. Next, store to memory in fewer 9380 // chunks. 9381 SDValue StVal = St->getValue(); 9382 EVT VT = StVal.getValueType(); 9383 if (St->isTruncatingStore() && VT.isVector()) { 9384 SelectionDAG &DAG = DCI.DAG; 9385 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9386 EVT StVT = St->getMemoryVT(); 9387 unsigned NumElems = VT.getVectorNumElements(); 9388 assert(StVT != VT && "Cannot truncate to the same type"); 9389 unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); 9390 unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); 9391 9392 // From, To sizes and ElemCount must be pow of two 9393 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); 9394 9395 // We are going to use the original vector elt for storing. 9396 // Accumulated smaller vector elements must be a multiple of the store size. 9397 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); 9398 9399 unsigned SizeRatio = FromEltSz / ToEltSz; 9400 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); 9401 9402 // Create a type on which we perform the shuffle. 9403 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), 9404 NumElems*SizeRatio); 9405 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); 9406 9407 SDLoc DL(St); 9408 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); 9409 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); 9410 for (unsigned i = 0; i < NumElems; ++i) 9411 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() 9412 ? (i + 1) * SizeRatio - 1 9413 : i * SizeRatio; 9414 9415 // Can't shuffle using an illegal type. 9416 if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); 9417 9418 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, 9419 DAG.getUNDEF(WideVec.getValueType()), 9420 ShuffleVec.data()); 9421 // At this point all of the data is stored at the bottom of the 9422 // register. We now need to save it to mem. 9423 9424 // Find the largest store unit 9425 MVT StoreType = MVT::i8; 9426 for (MVT Tp : MVT::integer_valuetypes()) { 9427 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) 9428 StoreType = Tp; 9429 } 9430 // Didn't find a legal store type. 9431 if (!TLI.isTypeLegal(StoreType)) 9432 return SDValue(); 9433 9434 // Bitcast the original vector into a vector of store-size units 9435 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), 9436 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); 9437 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); 9438 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); 9439 SmallVector<SDValue, 8> Chains; 9440 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, 9441 TLI.getPointerTy(DAG.getDataLayout())); 9442 SDValue BasePtr = St->getBasePtr(); 9443 9444 // Perform one or more big stores into memory. 9445 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); 9446 for (unsigned I = 0; I < E; I++) { 9447 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 9448 StoreType, ShuffWide, 9449 DAG.getIntPtrConstant(I, DL)); 9450 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, 9451 St->getPointerInfo(), St->isVolatile(), 9452 St->isNonTemporal(), St->getAlignment()); 9453 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 9454 Increment); 9455 Chains.push_back(Ch); 9456 } 9457 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 9458 } 9459 9460 if (!ISD::isNormalStore(St)) 9461 return SDValue(); 9462 9463 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and 9464 // ARM stores of arguments in the same cache line. 9465 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 9466 StVal.getNode()->hasOneUse()) { 9467 SelectionDAG &DAG = DCI.DAG; 9468 bool isBigEndian = DAG.getDataLayout().isBigEndian(); 9469 SDLoc DL(St); 9470 SDValue BasePtr = St->getBasePtr(); 9471 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 9472 StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), 9473 BasePtr, St->getPointerInfo(), St->isVolatile(), 9474 St->isNonTemporal(), St->getAlignment()); 9475 9476 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 9477 DAG.getConstant(4, DL, MVT::i32)); 9478 return DAG.getStore(NewST1.getValue(0), DL, 9479 StVal.getNode()->getOperand(isBigEndian ? 0 : 1), 9480 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 9481 St->isNonTemporal(), 9482 std::min(4U, St->getAlignment() / 2)); 9483 } 9484 9485 if (StVal.getValueType() == MVT::i64 && 9486 StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 9487 9488 // Bitcast an i64 store extracted from a vector to f64. 9489 // Otherwise, the i64 value will be legalized to a pair of i32 values. 9490 SelectionDAG &DAG = DCI.DAG; 9491 SDLoc dl(StVal); 9492 SDValue IntVec = StVal.getOperand(0); 9493 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 9494 IntVec.getValueType().getVectorNumElements()); 9495 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 9496 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 9497 Vec, StVal.getOperand(1)); 9498 dl = SDLoc(N); 9499 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 9500 // Make the DAGCombiner fold the bitcasts. 9501 DCI.AddToWorklist(Vec.getNode()); 9502 DCI.AddToWorklist(ExtElt.getNode()); 9503 DCI.AddToWorklist(V.getNode()); 9504 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 9505 St->getPointerInfo(), St->isVolatile(), 9506 St->isNonTemporal(), St->getAlignment(), 9507 St->getAAInfo()); 9508 } 9509 9510 // If this is a legal vector store, try to combine it into a VST1_UPD. 9511 if (ISD::isNormalStore(N) && VT.isVector() && 9512 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) 9513 return CombineBaseUpdate(N, DCI); 9514 9515 return SDValue(); 9516 } 9517 9518 // isConstVecPow2 - Return true if each vector element is a power of 2, all 9519 // elements are the same constant, C, and Log2(C) ranges from 1 to 32. 9520 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 9521 { 9522 integerPart cN; 9523 integerPart c0 = 0; 9524 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 9525 I != E; I++) { 9526 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 9527 if (!C) 9528 return false; 9529 9530 bool isExact; 9531 APFloat APF = C->getValueAPF(); 9532 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 9533 != APFloat::opOK || !isExact) 9534 return false; 9535 9536 c0 = (I == 0) ? cN : c0; 9537 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 9538 return false; 9539 } 9540 C = c0; 9541 return true; 9542 } 9543 9544 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 9545 /// can replace combinations of VMUL and VCVT (floating-point to integer) 9546 /// when the VMUL has a constant operand that is a power of 2. 9547 /// 9548 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9549 /// vmul.f32 d16, d17, d16 9550 /// vcvt.s32.f32 d16, d16 9551 /// becomes: 9552 /// vcvt.s32.f32 d16, d16, #3 9553 static SDValue PerformVCVTCombine(SDNode *N, 9554 TargetLowering::DAGCombinerInfo &DCI, 9555 const ARMSubtarget *Subtarget) { 9556 SelectionDAG &DAG = DCI.DAG; 9557 SDValue Op = N->getOperand(0); 9558 9559 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 9560 Op.getOpcode() != ISD::FMUL) 9561 return SDValue(); 9562 9563 uint64_t C; 9564 SDValue N0 = Op->getOperand(0); 9565 SDValue ConstVec = Op->getOperand(1); 9566 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 9567 9568 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9569 !isConstVecPow2(ConstVec, isSigned, C)) 9570 return SDValue(); 9571 9572 MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); 9573 MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); 9574 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 9575 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 || 9576 NumLanes > 4) { 9577 // These instructions only exist converting from f32 to i32. We can handle 9578 // smaller integers by generating an extra truncate, but larger ones would 9579 // be lossy. We also can't handle more then 4 lanes, since these intructions 9580 // only support v2i32/v4i32 types. 9581 return SDValue(); 9582 } 9583 9584 SDLoc dl(N); 9585 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 9586 Intrinsic::arm_neon_vcvtfp2fxu; 9587 SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, 9588 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 9589 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), 9590 N0, 9591 DAG.getConstant(Log2_64(C), dl, MVT::i32)); 9592 9593 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 9594 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv); 9595 9596 return FixConv; 9597 } 9598 9599 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 9600 /// can replace combinations of VCVT (integer to floating-point) and VDIV 9601 /// when the VDIV has a constant operand that is a power of 2. 9602 /// 9603 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9604 /// vcvt.f32.s32 d16, d16 9605 /// vdiv.f32 d16, d17, d16 9606 /// becomes: 9607 /// vcvt.f32.s32 d16, d16, #3 9608 static SDValue PerformVDIVCombine(SDNode *N, 9609 TargetLowering::DAGCombinerInfo &DCI, 9610 const ARMSubtarget *Subtarget) { 9611 SelectionDAG &DAG = DCI.DAG; 9612 SDValue Op = N->getOperand(0); 9613 unsigned OpOpcode = Op.getNode()->getOpcode(); 9614 9615 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 9616 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 9617 return SDValue(); 9618 9619 uint64_t C; 9620 SDValue ConstVec = N->getOperand(1); 9621 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 9622 9623 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9624 !isConstVecPow2(ConstVec, isSigned, C)) 9625 return SDValue(); 9626 9627 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); 9628 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 9629 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { 9630 // These instructions only exist converting from i32 to f32. We can handle 9631 // smaller integers by generating an extra extend, but larger ones would 9632 // be lossy. 9633 return SDValue(); 9634 } 9635 9636 SDLoc dl(N); 9637 SDValue ConvInput = Op.getOperand(0); 9638 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 9639 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 9640 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 9641 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 9642 ConvInput); 9643 9644 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 9645 Intrinsic::arm_neon_vcvtfxu2fp; 9646 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, 9647 Op.getValueType(), 9648 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), 9649 ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32)); 9650 } 9651 9652 /// Getvshiftimm - Check if this is a valid build_vector for the immediate 9653 /// operand of a vector shift operation, where all the elements of the 9654 /// build_vector must have the same constant integer value. 9655 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 9656 // Ignore bit_converts. 9657 while (Op.getOpcode() == ISD::BITCAST) 9658 Op = Op.getOperand(0); 9659 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 9660 APInt SplatBits, SplatUndef; 9661 unsigned SplatBitSize; 9662 bool HasAnyUndefs; 9663 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 9664 HasAnyUndefs, ElementBits) || 9665 SplatBitSize > ElementBits) 9666 return false; 9667 Cnt = SplatBits.getSExtValue(); 9668 return true; 9669 } 9670 9671 /// isVShiftLImm - Check if this is a valid build_vector for the immediate 9672 /// operand of a vector shift left operation. That value must be in the range: 9673 /// 0 <= Value < ElementBits for a left shift; or 9674 /// 0 <= Value <= ElementBits for a long left shift. 9675 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 9676 assert(VT.isVector() && "vector shift count is not a vector type"); 9677 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9678 if (! getVShiftImm(Op, ElementBits, Cnt)) 9679 return false; 9680 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 9681 } 9682 9683 /// isVShiftRImm - Check if this is a valid build_vector for the immediate 9684 /// operand of a vector shift right operation. For a shift opcode, the value 9685 /// is positive, but for an intrinsic the value count must be negative. The 9686 /// absolute value must be in the range: 9687 /// 1 <= |Value| <= ElementBits for a right shift; or 9688 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 9689 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 9690 int64_t &Cnt) { 9691 assert(VT.isVector() && "vector shift count is not a vector type"); 9692 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9693 if (! getVShiftImm(Op, ElementBits, Cnt)) 9694 return false; 9695 if (isIntrinsic) 9696 Cnt = -Cnt; 9697 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 9698 } 9699 9700 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 9701 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 9702 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 9703 switch (IntNo) { 9704 default: 9705 // Don't do anything for most intrinsics. 9706 break; 9707 9708 // Vector shifts: check for immediate versions and lower them. 9709 // Note: This is done during DAG combining instead of DAG legalizing because 9710 // the build_vectors for 64-bit vector element shift counts are generally 9711 // not legal, and it is hard to see their values after they get legalized to 9712 // loads from a constant pool. 9713 case Intrinsic::arm_neon_vshifts: 9714 case Intrinsic::arm_neon_vshiftu: 9715 case Intrinsic::arm_neon_vrshifts: 9716 case Intrinsic::arm_neon_vrshiftu: 9717 case Intrinsic::arm_neon_vrshiftn: 9718 case Intrinsic::arm_neon_vqshifts: 9719 case Intrinsic::arm_neon_vqshiftu: 9720 case Intrinsic::arm_neon_vqshiftsu: 9721 case Intrinsic::arm_neon_vqshiftns: 9722 case Intrinsic::arm_neon_vqshiftnu: 9723 case Intrinsic::arm_neon_vqshiftnsu: 9724 case Intrinsic::arm_neon_vqrshiftns: 9725 case Intrinsic::arm_neon_vqrshiftnu: 9726 case Intrinsic::arm_neon_vqrshiftnsu: { 9727 EVT VT = N->getOperand(1).getValueType(); 9728 int64_t Cnt; 9729 unsigned VShiftOpc = 0; 9730 9731 switch (IntNo) { 9732 case Intrinsic::arm_neon_vshifts: 9733 case Intrinsic::arm_neon_vshiftu: 9734 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 9735 VShiftOpc = ARMISD::VSHL; 9736 break; 9737 } 9738 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 9739 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 9740 ARMISD::VSHRs : ARMISD::VSHRu); 9741 break; 9742 } 9743 return SDValue(); 9744 9745 case Intrinsic::arm_neon_vrshifts: 9746 case Intrinsic::arm_neon_vrshiftu: 9747 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 9748 break; 9749 return SDValue(); 9750 9751 case Intrinsic::arm_neon_vqshifts: 9752 case Intrinsic::arm_neon_vqshiftu: 9753 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9754 break; 9755 return SDValue(); 9756 9757 case Intrinsic::arm_neon_vqshiftsu: 9758 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9759 break; 9760 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 9761 9762 case Intrinsic::arm_neon_vrshiftn: 9763 case Intrinsic::arm_neon_vqshiftns: 9764 case Intrinsic::arm_neon_vqshiftnu: 9765 case Intrinsic::arm_neon_vqshiftnsu: 9766 case Intrinsic::arm_neon_vqrshiftns: 9767 case Intrinsic::arm_neon_vqrshiftnu: 9768 case Intrinsic::arm_neon_vqrshiftnsu: 9769 // Narrowing shifts require an immediate right shift. 9770 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 9771 break; 9772 llvm_unreachable("invalid shift count for narrowing vector shift " 9773 "intrinsic"); 9774 9775 default: 9776 llvm_unreachable("unhandled vector shift"); 9777 } 9778 9779 switch (IntNo) { 9780 case Intrinsic::arm_neon_vshifts: 9781 case Intrinsic::arm_neon_vshiftu: 9782 // Opcode already set above. 9783 break; 9784 case Intrinsic::arm_neon_vrshifts: 9785 VShiftOpc = ARMISD::VRSHRs; break; 9786 case Intrinsic::arm_neon_vrshiftu: 9787 VShiftOpc = ARMISD::VRSHRu; break; 9788 case Intrinsic::arm_neon_vrshiftn: 9789 VShiftOpc = ARMISD::VRSHRN; break; 9790 case Intrinsic::arm_neon_vqshifts: 9791 VShiftOpc = ARMISD::VQSHLs; break; 9792 case Intrinsic::arm_neon_vqshiftu: 9793 VShiftOpc = ARMISD::VQSHLu; break; 9794 case Intrinsic::arm_neon_vqshiftsu: 9795 VShiftOpc = ARMISD::VQSHLsu; break; 9796 case Intrinsic::arm_neon_vqshiftns: 9797 VShiftOpc = ARMISD::VQSHRNs; break; 9798 case Intrinsic::arm_neon_vqshiftnu: 9799 VShiftOpc = ARMISD::VQSHRNu; break; 9800 case Intrinsic::arm_neon_vqshiftnsu: 9801 VShiftOpc = ARMISD::VQSHRNsu; break; 9802 case Intrinsic::arm_neon_vqrshiftns: 9803 VShiftOpc = ARMISD::VQRSHRNs; break; 9804 case Intrinsic::arm_neon_vqrshiftnu: 9805 VShiftOpc = ARMISD::VQRSHRNu; break; 9806 case Intrinsic::arm_neon_vqrshiftnsu: 9807 VShiftOpc = ARMISD::VQRSHRNsu; break; 9808 } 9809 9810 SDLoc dl(N); 9811 return DAG.getNode(VShiftOpc, dl, N->getValueType(0), 9812 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32)); 9813 } 9814 9815 case Intrinsic::arm_neon_vshiftins: { 9816 EVT VT = N->getOperand(1).getValueType(); 9817 int64_t Cnt; 9818 unsigned VShiftOpc = 0; 9819 9820 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 9821 VShiftOpc = ARMISD::VSLI; 9822 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 9823 VShiftOpc = ARMISD::VSRI; 9824 else { 9825 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 9826 } 9827 9828 SDLoc dl(N); 9829 return DAG.getNode(VShiftOpc, dl, N->getValueType(0), 9830 N->getOperand(1), N->getOperand(2), 9831 DAG.getConstant(Cnt, dl, MVT::i32)); 9832 } 9833 9834 case Intrinsic::arm_neon_vqrshifts: 9835 case Intrinsic::arm_neon_vqrshiftu: 9836 // No immediate versions of these to check for. 9837 break; 9838 } 9839 9840 return SDValue(); 9841 } 9842 9843 /// PerformShiftCombine - Checks for immediate versions of vector shifts and 9844 /// lowers them. As with the vector shift intrinsics, this is done during DAG 9845 /// combining instead of DAG legalizing because the build_vectors for 64-bit 9846 /// vector element shift counts are generally not legal, and it is hard to see 9847 /// their values after they get legalized to loads from a constant pool. 9848 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 9849 const ARMSubtarget *ST) { 9850 EVT VT = N->getValueType(0); 9851 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 9852 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 9853 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 9854 SDValue N1 = N->getOperand(1); 9855 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 9856 SDValue N0 = N->getOperand(0); 9857 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 9858 DAG.MaskedValueIsZero(N0.getOperand(0), 9859 APInt::getHighBitsSet(32, 16))) 9860 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); 9861 } 9862 } 9863 9864 // Nothing to be done for scalar shifts. 9865 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9866 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 9867 return SDValue(); 9868 9869 assert(ST->hasNEON() && "unexpected vector shift"); 9870 int64_t Cnt; 9871 9872 switch (N->getOpcode()) { 9873 default: llvm_unreachable("unexpected shift opcode"); 9874 9875 case ISD::SHL: 9876 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) { 9877 SDLoc dl(N); 9878 return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0), 9879 DAG.getConstant(Cnt, dl, MVT::i32)); 9880 } 9881 break; 9882 9883 case ISD::SRA: 9884 case ISD::SRL: 9885 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 9886 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 9887 ARMISD::VSHRs : ARMISD::VSHRu); 9888 SDLoc dl(N); 9889 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), 9890 DAG.getConstant(Cnt, dl, MVT::i32)); 9891 } 9892 } 9893 return SDValue(); 9894 } 9895 9896 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 9897 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 9898 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 9899 const ARMSubtarget *ST) { 9900 SDValue N0 = N->getOperand(0); 9901 9902 // Check for sign- and zero-extensions of vector extract operations of 8- 9903 // and 16-bit vector elements. NEON supports these directly. They are 9904 // handled during DAG combining because type legalization will promote them 9905 // to 32-bit types and it is messy to recognize the operations after that. 9906 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 9907 SDValue Vec = N0.getOperand(0); 9908 SDValue Lane = N0.getOperand(1); 9909 EVT VT = N->getValueType(0); 9910 EVT EltVT = N0.getValueType(); 9911 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9912 9913 if (VT == MVT::i32 && 9914 (EltVT == MVT::i8 || EltVT == MVT::i16) && 9915 TLI.isTypeLegal(Vec.getValueType()) && 9916 isa<ConstantSDNode>(Lane)) { 9917 9918 unsigned Opc = 0; 9919 switch (N->getOpcode()) { 9920 default: llvm_unreachable("unexpected opcode"); 9921 case ISD::SIGN_EXTEND: 9922 Opc = ARMISD::VGETLANEs; 9923 break; 9924 case ISD::ZERO_EXTEND: 9925 case ISD::ANY_EXTEND: 9926 Opc = ARMISD::VGETLANEu; 9927 break; 9928 } 9929 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); 9930 } 9931 } 9932 9933 return SDValue(); 9934 } 9935 9936 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 9937 /// to match f32 max/min patterns to use NEON vmax/vmin instructions. 9938 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 9939 const ARMSubtarget *ST) { 9940 // If the target supports NEON, try to use vmax/vmin instructions for f32 9941 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 9942 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 9943 // a NaN; only do the transformation when it matches that behavior. 9944 9945 // For now only do this when using NEON for FP operations; if using VFP, it 9946 // is not obvious that the benefit outweighs the cost of switching to the 9947 // NEON pipeline. 9948 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 9949 N->getValueType(0) != MVT::f32) 9950 return SDValue(); 9951 9952 SDValue CondLHS = N->getOperand(0); 9953 SDValue CondRHS = N->getOperand(1); 9954 SDValue LHS = N->getOperand(2); 9955 SDValue RHS = N->getOperand(3); 9956 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 9957 9958 unsigned Opcode = 0; 9959 bool IsReversed; 9960 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 9961 IsReversed = false; // x CC y ? x : y 9962 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 9963 IsReversed = true ; // x CC y ? y : x 9964 } else { 9965 return SDValue(); 9966 } 9967 9968 bool IsUnordered; 9969 switch (CC) { 9970 default: break; 9971 case ISD::SETOLT: 9972 case ISD::SETOLE: 9973 case ISD::SETLT: 9974 case ISD::SETLE: 9975 case ISD::SETULT: 9976 case ISD::SETULE: 9977 // If LHS is NaN, an ordered comparison will be false and the result will 9978 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 9979 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 9980 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 9981 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 9982 break; 9983 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 9984 // will return -0, so vmin can only be used for unsafe math or if one of 9985 // the operands is known to be nonzero. 9986 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 9987 !DAG.getTarget().Options.UnsafeFPMath && 9988 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 9989 break; 9990 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 9991 break; 9992 9993 case ISD::SETOGT: 9994 case ISD::SETOGE: 9995 case ISD::SETGT: 9996 case ISD::SETGE: 9997 case ISD::SETUGT: 9998 case ISD::SETUGE: 9999 // If LHS is NaN, an ordered comparison will be false and the result will 10000 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 10001 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 10002 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 10003 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 10004 break; 10005 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 10006 // will return +0, so vmax can only be used for unsafe math or if one of 10007 // the operands is known to be nonzero. 10008 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 10009 !DAG.getTarget().Options.UnsafeFPMath && 10010 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 10011 break; 10012 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 10013 break; 10014 } 10015 10016 if (!Opcode) 10017 return SDValue(); 10018 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); 10019 } 10020 10021 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 10022 SDValue 10023 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 10024 SDValue Cmp = N->getOperand(4); 10025 if (Cmp.getOpcode() != ARMISD::CMPZ) 10026 // Only looking at EQ and NE cases. 10027 return SDValue(); 10028 10029 EVT VT = N->getValueType(0); 10030 SDLoc dl(N); 10031 SDValue LHS = Cmp.getOperand(0); 10032 SDValue RHS = Cmp.getOperand(1); 10033 SDValue FalseVal = N->getOperand(0); 10034 SDValue TrueVal = N->getOperand(1); 10035 SDValue ARMcc = N->getOperand(2); 10036 ARMCC::CondCodes CC = 10037 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 10038 10039 // Simplify 10040 // mov r1, r0 10041 // cmp r1, x 10042 // mov r0, y 10043 // moveq r0, x 10044 // to 10045 // cmp r0, x 10046 // movne r0, y 10047 // 10048 // mov r1, r0 10049 // cmp r1, x 10050 // mov r0, x 10051 // movne r0, y 10052 // to 10053 // cmp r0, x 10054 // movne r0, y 10055 /// FIXME: Turn this into a target neutral optimization? 10056 SDValue Res; 10057 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 10058 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 10059 N->getOperand(3), Cmp); 10060 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 10061 SDValue ARMcc; 10062 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 10063 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 10064 N->getOperand(3), NewCmp); 10065 } 10066 10067 if (Res.getNode()) { 10068 APInt KnownZero, KnownOne; 10069 DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); 10070 // Capture demanded bits information that would be otherwise lost. 10071 if (KnownZero == 0xfffffffe) 10072 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10073 DAG.getValueType(MVT::i1)); 10074 else if (KnownZero == 0xffffff00) 10075 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10076 DAG.getValueType(MVT::i8)); 10077 else if (KnownZero == 0xffff0000) 10078 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10079 DAG.getValueType(MVT::i16)); 10080 } 10081 10082 return Res; 10083 } 10084 10085 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 10086 DAGCombinerInfo &DCI) const { 10087 switch (N->getOpcode()) { 10088 default: break; 10089 case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); 10090 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 10091 case ISD::SUB: return PerformSUBCombine(N, DCI); 10092 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 10093 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 10094 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 10095 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 10096 case ARMISD::BFI: return PerformBFICombine(N, DCI); 10097 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); 10098 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 10099 case ISD::STORE: return PerformSTORECombine(N, DCI); 10100 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); 10101 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 10102 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 10103 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 10104 case ISD::FP_TO_SINT: 10105 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 10106 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 10107 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 10108 case ISD::SHL: 10109 case ISD::SRA: 10110 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 10111 case ISD::SIGN_EXTEND: 10112 case ISD::ZERO_EXTEND: 10113 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 10114 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 10115 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 10116 case ISD::LOAD: return PerformLOADCombine(N, DCI); 10117 case ARMISD::VLD2DUP: 10118 case ARMISD::VLD3DUP: 10119 case ARMISD::VLD4DUP: 10120 return PerformVLDCombine(N, DCI); 10121 case ARMISD::BUILD_VECTOR: 10122 return PerformARMBUILD_VECTORCombine(N, DCI); 10123 case ISD::INTRINSIC_VOID: 10124 case ISD::INTRINSIC_W_CHAIN: 10125 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10126 case Intrinsic::arm_neon_vld1: 10127 case Intrinsic::arm_neon_vld2: 10128 case Intrinsic::arm_neon_vld3: 10129 case Intrinsic::arm_neon_vld4: 10130 case Intrinsic::arm_neon_vld2lane: 10131 case Intrinsic::arm_neon_vld3lane: 10132 case Intrinsic::arm_neon_vld4lane: 10133 case Intrinsic::arm_neon_vst1: 10134 case Intrinsic::arm_neon_vst2: 10135 case Intrinsic::arm_neon_vst3: 10136 case Intrinsic::arm_neon_vst4: 10137 case Intrinsic::arm_neon_vst2lane: 10138 case Intrinsic::arm_neon_vst3lane: 10139 case Intrinsic::arm_neon_vst4lane: 10140 return PerformVLDCombine(N, DCI); 10141 default: break; 10142 } 10143 break; 10144 } 10145 return SDValue(); 10146 } 10147 10148 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 10149 EVT VT) const { 10150 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 10151 } 10152 10153 bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 10154 unsigned, 10155 unsigned, 10156 bool *Fast) const { 10157 // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus 10158 bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); 10159 10160 switch (VT.getSimpleVT().SimpleTy) { 10161 default: 10162 return false; 10163 case MVT::i8: 10164 case MVT::i16: 10165 case MVT::i32: { 10166 // Unaligned access can use (for example) LRDB, LRDH, LDR 10167 if (AllowsUnaligned) { 10168 if (Fast) 10169 *Fast = Subtarget->hasV7Ops(); 10170 return true; 10171 } 10172 return false; 10173 } 10174 case MVT::f64: 10175 case MVT::v2f64: { 10176 // For any little-endian targets with neon, we can support unaligned ld/st 10177 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. 10178 // A big-endian target may also explicitly support unaligned accesses 10179 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) { 10180 if (Fast) 10181 *Fast = true; 10182 return true; 10183 } 10184 return false; 10185 } 10186 } 10187 } 10188 10189 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 10190 unsigned AlignCheck) { 10191 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 10192 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 10193 } 10194 10195 EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 10196 unsigned DstAlign, unsigned SrcAlign, 10197 bool IsMemset, bool ZeroMemset, 10198 bool MemcpyStrSrc, 10199 MachineFunction &MF) const { 10200 const Function *F = MF.getFunction(); 10201 10202 // See if we can use NEON instructions for this... 10203 if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && 10204 !F->hasFnAttribute(Attribute::NoImplicitFloat)) { 10205 bool Fast; 10206 if (Size >= 16 && 10207 (memOpAlign(SrcAlign, DstAlign, 16) || 10208 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { 10209 return MVT::v2f64; 10210 } else if (Size >= 8 && 10211 (memOpAlign(SrcAlign, DstAlign, 8) || 10212 (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && 10213 Fast))) { 10214 return MVT::f64; 10215 } 10216 } 10217 10218 // Lowering to i32/i16 if the size permits. 10219 if (Size >= 4) 10220 return MVT::i32; 10221 else if (Size >= 2) 10222 return MVT::i16; 10223 10224 // Let the target-independent logic figure it out. 10225 return MVT::Other; 10226 } 10227 10228 bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 10229 if (Val.getOpcode() != ISD::LOAD) 10230 return false; 10231 10232 EVT VT1 = Val.getValueType(); 10233 if (!VT1.isSimple() || !VT1.isInteger() || 10234 !VT2.isSimple() || !VT2.isInteger()) 10235 return false; 10236 10237 switch (VT1.getSimpleVT().SimpleTy) { 10238 default: break; 10239 case MVT::i1: 10240 case MVT::i8: 10241 case MVT::i16: 10242 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. 10243 return true; 10244 } 10245 10246 return false; 10247 } 10248 10249 bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { 10250 EVT VT = ExtVal.getValueType(); 10251 10252 if (!isTypeLegal(VT)) 10253 return false; 10254 10255 // Don't create a loadext if we can fold the extension into a wide/long 10256 // instruction. 10257 // If there's more than one user instruction, the loadext is desirable no 10258 // matter what. There can be two uses by the same instruction. 10259 if (ExtVal->use_empty() || 10260 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode())) 10261 return true; 10262 10263 SDNode *U = *ExtVal->use_begin(); 10264 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB || 10265 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL)) 10266 return false; 10267 10268 return true; 10269 } 10270 10271 bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { 10272 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 10273 return false; 10274 10275 if (!isTypeLegal(EVT::getEVT(Ty1))) 10276 return false; 10277 10278 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); 10279 10280 // Assuming the caller doesn't have a zeroext or signext return parameter, 10281 // truncation all the way down to i1 is valid. 10282 return true; 10283 } 10284 10285 10286 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 10287 if (V < 0) 10288 return false; 10289 10290 unsigned Scale = 1; 10291 switch (VT.getSimpleVT().SimpleTy) { 10292 default: return false; 10293 case MVT::i1: 10294 case MVT::i8: 10295 // Scale == 1; 10296 break; 10297 case MVT::i16: 10298 // Scale == 2; 10299 Scale = 2; 10300 break; 10301 case MVT::i32: 10302 // Scale == 4; 10303 Scale = 4; 10304 break; 10305 } 10306 10307 if ((V & (Scale - 1)) != 0) 10308 return false; 10309 V /= Scale; 10310 return V == (V & ((1LL << 5) - 1)); 10311 } 10312 10313 static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 10314 const ARMSubtarget *Subtarget) { 10315 bool isNeg = false; 10316 if (V < 0) { 10317 isNeg = true; 10318 V = - V; 10319 } 10320 10321 switch (VT.getSimpleVT().SimpleTy) { 10322 default: return false; 10323 case MVT::i1: 10324 case MVT::i8: 10325 case MVT::i16: 10326 case MVT::i32: 10327 // + imm12 or - imm8 10328 if (isNeg) 10329 return V == (V & ((1LL << 8) - 1)); 10330 return V == (V & ((1LL << 12) - 1)); 10331 case MVT::f32: 10332 case MVT::f64: 10333 // Same as ARM mode. FIXME: NEON? 10334 if (!Subtarget->hasVFP2()) 10335 return false; 10336 if ((V & 3) != 0) 10337 return false; 10338 V >>= 2; 10339 return V == (V & ((1LL << 8) - 1)); 10340 } 10341 } 10342 10343 /// isLegalAddressImmediate - Return true if the integer value can be used 10344 /// as the offset of the target addressing mode for load / store of the 10345 /// given type. 10346 static bool isLegalAddressImmediate(int64_t V, EVT VT, 10347 const ARMSubtarget *Subtarget) { 10348 if (V == 0) 10349 return true; 10350 10351 if (!VT.isSimple()) 10352 return false; 10353 10354 if (Subtarget->isThumb1Only()) 10355 return isLegalT1AddressImmediate(V, VT); 10356 else if (Subtarget->isThumb2()) 10357 return isLegalT2AddressImmediate(V, VT, Subtarget); 10358 10359 // ARM mode. 10360 if (V < 0) 10361 V = - V; 10362 switch (VT.getSimpleVT().SimpleTy) { 10363 default: return false; 10364 case MVT::i1: 10365 case MVT::i8: 10366 case MVT::i32: 10367 // +- imm12 10368 return V == (V & ((1LL << 12) - 1)); 10369 case MVT::i16: 10370 // +- imm8 10371 return V == (V & ((1LL << 8) - 1)); 10372 case MVT::f32: 10373 case MVT::f64: 10374 if (!Subtarget->hasVFP2()) // FIXME: NEON? 10375 return false; 10376 if ((V & 3) != 0) 10377 return false; 10378 V >>= 2; 10379 return V == (V & ((1LL << 8) - 1)); 10380 } 10381 } 10382 10383 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 10384 EVT VT) const { 10385 int Scale = AM.Scale; 10386 if (Scale < 0) 10387 return false; 10388 10389 switch (VT.getSimpleVT().SimpleTy) { 10390 default: return false; 10391 case MVT::i1: 10392 case MVT::i8: 10393 case MVT::i16: 10394 case MVT::i32: 10395 if (Scale == 1) 10396 return true; 10397 // r + r << imm 10398 Scale = Scale & ~1; 10399 return Scale == 2 || Scale == 4 || Scale == 8; 10400 case MVT::i64: 10401 // r + r 10402 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10403 return true; 10404 return false; 10405 case MVT::isVoid: 10406 // Note, we allow "void" uses (basically, uses that aren't loads or 10407 // stores), because arm allows folding a scale into many arithmetic 10408 // operations. This should be made more precise and revisited later. 10409 10410 // Allow r << imm, but the imm has to be a multiple of two. 10411 if (Scale & 1) return false; 10412 return isPowerOf2_32(Scale); 10413 } 10414 } 10415 10416 /// isLegalAddressingMode - Return true if the addressing mode represented 10417 /// by AM is legal for this target, for a load/store of the specified type. 10418 bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, 10419 const AddrMode &AM, Type *Ty, 10420 unsigned AS) const { 10421 EVT VT = getValueType(DL, Ty, true); 10422 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 10423 return false; 10424 10425 // Can never fold addr of global into load/store. 10426 if (AM.BaseGV) 10427 return false; 10428 10429 switch (AM.Scale) { 10430 case 0: // no scale reg, must be "r+i" or "r", or "i". 10431 break; 10432 case 1: 10433 if (Subtarget->isThumb1Only()) 10434 return false; 10435 // FALL THROUGH. 10436 default: 10437 // ARM doesn't support any R+R*scale+imm addr modes. 10438 if (AM.BaseOffs) 10439 return false; 10440 10441 if (!VT.isSimple()) 10442 return false; 10443 10444 if (Subtarget->isThumb2()) 10445 return isLegalT2ScaledAddressingMode(AM, VT); 10446 10447 int Scale = AM.Scale; 10448 switch (VT.getSimpleVT().SimpleTy) { 10449 default: return false; 10450 case MVT::i1: 10451 case MVT::i8: 10452 case MVT::i32: 10453 if (Scale < 0) Scale = -Scale; 10454 if (Scale == 1) 10455 return true; 10456 // r + r << imm 10457 return isPowerOf2_32(Scale & ~1); 10458 case MVT::i16: 10459 case MVT::i64: 10460 // r + r 10461 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10462 return true; 10463 return false; 10464 10465 case MVT::isVoid: 10466 // Note, we allow "void" uses (basically, uses that aren't loads or 10467 // stores), because arm allows folding a scale into many arithmetic 10468 // operations. This should be made more precise and revisited later. 10469 10470 // Allow r << imm, but the imm has to be a multiple of two. 10471 if (Scale & 1) return false; 10472 return isPowerOf2_32(Scale); 10473 } 10474 } 10475 return true; 10476 } 10477 10478 /// isLegalICmpImmediate - Return true if the specified immediate is legal 10479 /// icmp immediate, that is the target has icmp instructions which can compare 10480 /// a register against the immediate without having to materialize the 10481 /// immediate into a register. 10482 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 10483 // Thumb2 and ARM modes can use cmn for negative immediates. 10484 if (!Subtarget->isThumb()) 10485 return ARM_AM::getSOImmVal(std::abs(Imm)) != -1; 10486 if (Subtarget->isThumb2()) 10487 return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1; 10488 // Thumb1 doesn't have cmn, and only 8-bit immediates. 10489 return Imm >= 0 && Imm <= 255; 10490 } 10491 10492 /// isLegalAddImmediate - Return true if the specified immediate is a legal add 10493 /// *or sub* immediate, that is the target has add or sub instructions which can 10494 /// add a register with the immediate without having to materialize the 10495 /// immediate into a register. 10496 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 10497 // Same encoding for add/sub, just flip the sign. 10498 int64_t AbsImm = std::abs(Imm); 10499 if (!Subtarget->isThumb()) 10500 return ARM_AM::getSOImmVal(AbsImm) != -1; 10501 if (Subtarget->isThumb2()) 10502 return ARM_AM::getT2SOImmVal(AbsImm) != -1; 10503 // Thumb1 only has 8-bit unsigned immediate. 10504 return AbsImm >= 0 && AbsImm <= 255; 10505 } 10506 10507 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 10508 bool isSEXTLoad, SDValue &Base, 10509 SDValue &Offset, bool &isInc, 10510 SelectionDAG &DAG) { 10511 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10512 return false; 10513 10514 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 10515 // AddressingMode 3 10516 Base = Ptr->getOperand(0); 10517 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10518 int RHSC = (int)RHS->getZExtValue(); 10519 if (RHSC < 0 && RHSC > -256) { 10520 assert(Ptr->getOpcode() == ISD::ADD); 10521 isInc = false; 10522 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); 10523 return true; 10524 } 10525 } 10526 isInc = (Ptr->getOpcode() == ISD::ADD); 10527 Offset = Ptr->getOperand(1); 10528 return true; 10529 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 10530 // AddressingMode 2 10531 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10532 int RHSC = (int)RHS->getZExtValue(); 10533 if (RHSC < 0 && RHSC > -0x1000) { 10534 assert(Ptr->getOpcode() == ISD::ADD); 10535 isInc = false; 10536 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); 10537 Base = Ptr->getOperand(0); 10538 return true; 10539 } 10540 } 10541 10542 if (Ptr->getOpcode() == ISD::ADD) { 10543 isInc = true; 10544 ARM_AM::ShiftOpc ShOpcVal= 10545 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 10546 if (ShOpcVal != ARM_AM::no_shift) { 10547 Base = Ptr->getOperand(1); 10548 Offset = Ptr->getOperand(0); 10549 } else { 10550 Base = Ptr->getOperand(0); 10551 Offset = Ptr->getOperand(1); 10552 } 10553 return true; 10554 } 10555 10556 isInc = (Ptr->getOpcode() == ISD::ADD); 10557 Base = Ptr->getOperand(0); 10558 Offset = Ptr->getOperand(1); 10559 return true; 10560 } 10561 10562 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 10563 return false; 10564 } 10565 10566 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 10567 bool isSEXTLoad, SDValue &Base, 10568 SDValue &Offset, bool &isInc, 10569 SelectionDAG &DAG) { 10570 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10571 return false; 10572 10573 Base = Ptr->getOperand(0); 10574 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10575 int RHSC = (int)RHS->getZExtValue(); 10576 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 10577 assert(Ptr->getOpcode() == ISD::ADD); 10578 isInc = false; 10579 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); 10580 return true; 10581 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 10582 isInc = Ptr->getOpcode() == ISD::ADD; 10583 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); 10584 return true; 10585 } 10586 } 10587 10588 return false; 10589 } 10590 10591 /// getPreIndexedAddressParts - returns true by value, base pointer and 10592 /// offset pointer and addressing mode by reference if the node's address 10593 /// can be legally represented as pre-indexed load / store address. 10594 bool 10595 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 10596 SDValue &Offset, 10597 ISD::MemIndexedMode &AM, 10598 SelectionDAG &DAG) const { 10599 if (Subtarget->isThumb1Only()) 10600 return false; 10601 10602 EVT VT; 10603 SDValue Ptr; 10604 bool isSEXTLoad = false; 10605 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10606 Ptr = LD->getBasePtr(); 10607 VT = LD->getMemoryVT(); 10608 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10609 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10610 Ptr = ST->getBasePtr(); 10611 VT = ST->getMemoryVT(); 10612 } else 10613 return false; 10614 10615 bool isInc; 10616 bool isLegal = false; 10617 if (Subtarget->isThumb2()) 10618 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10619 Offset, isInc, DAG); 10620 else 10621 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10622 Offset, isInc, DAG); 10623 if (!isLegal) 10624 return false; 10625 10626 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 10627 return true; 10628 } 10629 10630 /// getPostIndexedAddressParts - returns true by value, base pointer and 10631 /// offset pointer and addressing mode by reference if this node can be 10632 /// combined with a load / store to form a post-indexed load / store. 10633 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 10634 SDValue &Base, 10635 SDValue &Offset, 10636 ISD::MemIndexedMode &AM, 10637 SelectionDAG &DAG) const { 10638 if (Subtarget->isThumb1Only()) 10639 return false; 10640 10641 EVT VT; 10642 SDValue Ptr; 10643 bool isSEXTLoad = false; 10644 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10645 VT = LD->getMemoryVT(); 10646 Ptr = LD->getBasePtr(); 10647 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10648 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10649 VT = ST->getMemoryVT(); 10650 Ptr = ST->getBasePtr(); 10651 } else 10652 return false; 10653 10654 bool isInc; 10655 bool isLegal = false; 10656 if (Subtarget->isThumb2()) 10657 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10658 isInc, DAG); 10659 else 10660 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10661 isInc, DAG); 10662 if (!isLegal) 10663 return false; 10664 10665 if (Ptr != Base) { 10666 // Swap base ptr and offset to catch more post-index load / store when 10667 // it's legal. In Thumb2 mode, offset must be an immediate. 10668 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 10669 !Subtarget->isThumb2()) 10670 std::swap(Base, Offset); 10671 10672 // Post-indexed load / store update the base pointer. 10673 if (Ptr != Base) 10674 return false; 10675 } 10676 10677 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 10678 return true; 10679 } 10680 10681 void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 10682 APInt &KnownZero, 10683 APInt &KnownOne, 10684 const SelectionDAG &DAG, 10685 unsigned Depth) const { 10686 unsigned BitWidth = KnownOne.getBitWidth(); 10687 KnownZero = KnownOne = APInt(BitWidth, 0); 10688 switch (Op.getOpcode()) { 10689 default: break; 10690 case ARMISD::ADDC: 10691 case ARMISD::ADDE: 10692 case ARMISD::SUBC: 10693 case ARMISD::SUBE: 10694 // These nodes' second result is a boolean 10695 if (Op.getResNo() == 0) 10696 break; 10697 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); 10698 break; 10699 case ARMISD::CMOV: { 10700 // Bits are known zero/one if known on the LHS and RHS. 10701 DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); 10702 if (KnownZero == 0 && KnownOne == 0) return; 10703 10704 APInt KnownZeroRHS, KnownOneRHS; 10705 DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); 10706 KnownZero &= KnownZeroRHS; 10707 KnownOne &= KnownOneRHS; 10708 return; 10709 } 10710 case ISD::INTRINSIC_W_CHAIN: { 10711 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); 10712 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); 10713 switch (IntID) { 10714 default: return; 10715 case Intrinsic::arm_ldaex: 10716 case Intrinsic::arm_ldrex: { 10717 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); 10718 unsigned MemBits = VT.getScalarType().getSizeInBits(); 10719 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); 10720 return; 10721 } 10722 } 10723 } 10724 } 10725 } 10726 10727 //===----------------------------------------------------------------------===// 10728 // ARM Inline Assembly Support 10729 //===----------------------------------------------------------------------===// 10730 10731 bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 10732 // Looking for "rev" which is V6+. 10733 if (!Subtarget->hasV6Ops()) 10734 return false; 10735 10736 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 10737 std::string AsmStr = IA->getAsmString(); 10738 SmallVector<StringRef, 4> AsmPieces; 10739 SplitString(AsmStr, AsmPieces, ";\n"); 10740 10741 switch (AsmPieces.size()) { 10742 default: return false; 10743 case 1: 10744 AsmStr = AsmPieces[0]; 10745 AsmPieces.clear(); 10746 SplitString(AsmStr, AsmPieces, " \t,"); 10747 10748 // rev $0, $1 10749 if (AsmPieces.size() == 3 && 10750 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 10751 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 10752 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 10753 if (Ty && Ty->getBitWidth() == 32) 10754 return IntrinsicLowering::LowerToByteSwap(CI); 10755 } 10756 break; 10757 } 10758 10759 return false; 10760 } 10761 10762 /// getConstraintType - Given a constraint letter, return the type of 10763 /// constraint it is for this target. 10764 ARMTargetLowering::ConstraintType 10765 ARMTargetLowering::getConstraintType(StringRef Constraint) const { 10766 if (Constraint.size() == 1) { 10767 switch (Constraint[0]) { 10768 default: break; 10769 case 'l': return C_RegisterClass; 10770 case 'w': return C_RegisterClass; 10771 case 'h': return C_RegisterClass; 10772 case 'x': return C_RegisterClass; 10773 case 't': return C_RegisterClass; 10774 case 'j': return C_Other; // Constant for movw. 10775 // An address with a single base register. Due to the way we 10776 // currently handle addresses it is the same as an 'r' memory constraint. 10777 case 'Q': return C_Memory; 10778 } 10779 } else if (Constraint.size() == 2) { 10780 switch (Constraint[0]) { 10781 default: break; 10782 // All 'U+' constraints are addresses. 10783 case 'U': return C_Memory; 10784 } 10785 } 10786 return TargetLowering::getConstraintType(Constraint); 10787 } 10788 10789 /// Examine constraint type and operand type and determine a weight value. 10790 /// This object must already have been set up with the operand type 10791 /// and the current alternative constraint selected. 10792 TargetLowering::ConstraintWeight 10793 ARMTargetLowering::getSingleConstraintMatchWeight( 10794 AsmOperandInfo &info, const char *constraint) const { 10795 ConstraintWeight weight = CW_Invalid; 10796 Value *CallOperandVal = info.CallOperandVal; 10797 // If we don't have a value, we can't do a match, 10798 // but allow it at the lowest weight. 10799 if (!CallOperandVal) 10800 return CW_Default; 10801 Type *type = CallOperandVal->getType(); 10802 // Look at the constraint type. 10803 switch (*constraint) { 10804 default: 10805 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 10806 break; 10807 case 'l': 10808 if (type->isIntegerTy()) { 10809 if (Subtarget->isThumb()) 10810 weight = CW_SpecificReg; 10811 else 10812 weight = CW_Register; 10813 } 10814 break; 10815 case 'w': 10816 if (type->isFloatingPointTy()) 10817 weight = CW_Register; 10818 break; 10819 } 10820 return weight; 10821 } 10822 10823 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 10824 RCPair ARMTargetLowering::getRegForInlineAsmConstraint( 10825 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 10826 if (Constraint.size() == 1) { 10827 // GCC ARM Constraint Letters 10828 switch (Constraint[0]) { 10829 case 'l': // Low regs or general regs. 10830 if (Subtarget->isThumb()) 10831 return RCPair(0U, &ARM::tGPRRegClass); 10832 return RCPair(0U, &ARM::GPRRegClass); 10833 case 'h': // High regs or no regs. 10834 if (Subtarget->isThumb()) 10835 return RCPair(0U, &ARM::hGPRRegClass); 10836 break; 10837 case 'r': 10838 if (Subtarget->isThumb1Only()) 10839 return RCPair(0U, &ARM::tGPRRegClass); 10840 return RCPair(0U, &ARM::GPRRegClass); 10841 case 'w': 10842 if (VT == MVT::Other) 10843 break; 10844 if (VT == MVT::f32) 10845 return RCPair(0U, &ARM::SPRRegClass); 10846 if (VT.getSizeInBits() == 64) 10847 return RCPair(0U, &ARM::DPRRegClass); 10848 if (VT.getSizeInBits() == 128) 10849 return RCPair(0U, &ARM::QPRRegClass); 10850 break; 10851 case 'x': 10852 if (VT == MVT::Other) 10853 break; 10854 if (VT == MVT::f32) 10855 return RCPair(0U, &ARM::SPR_8RegClass); 10856 if (VT.getSizeInBits() == 64) 10857 return RCPair(0U, &ARM::DPR_8RegClass); 10858 if (VT.getSizeInBits() == 128) 10859 return RCPair(0U, &ARM::QPR_8RegClass); 10860 break; 10861 case 't': 10862 if (VT == MVT::f32) 10863 return RCPair(0U, &ARM::SPRRegClass); 10864 break; 10865 } 10866 } 10867 if (StringRef("{cc}").equals_lower(Constraint)) 10868 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); 10869 10870 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 10871 } 10872 10873 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10874 /// vector. If it is invalid, don't add anything to Ops. 10875 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10876 std::string &Constraint, 10877 std::vector<SDValue>&Ops, 10878 SelectionDAG &DAG) const { 10879 SDValue Result; 10880 10881 // Currently only support length 1 constraints. 10882 if (Constraint.length() != 1) return; 10883 10884 char ConstraintLetter = Constraint[0]; 10885 switch (ConstraintLetter) { 10886 default: break; 10887 case 'j': 10888 case 'I': case 'J': case 'K': case 'L': 10889 case 'M': case 'N': case 'O': 10890 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 10891 if (!C) 10892 return; 10893 10894 int64_t CVal64 = C->getSExtValue(); 10895 int CVal = (int) CVal64; 10896 // None of these constraints allow values larger than 32 bits. Check 10897 // that the value fits in an int. 10898 if (CVal != CVal64) 10899 return; 10900 10901 switch (ConstraintLetter) { 10902 case 'j': 10903 // Constant suitable for movw, must be between 0 and 10904 // 65535. 10905 if (Subtarget->hasV6T2Ops()) 10906 if (CVal >= 0 && CVal <= 65535) 10907 break; 10908 return; 10909 case 'I': 10910 if (Subtarget->isThumb1Only()) { 10911 // This must be a constant between 0 and 255, for ADD 10912 // immediates. 10913 if (CVal >= 0 && CVal <= 255) 10914 break; 10915 } else if (Subtarget->isThumb2()) { 10916 // A constant that can be used as an immediate value in a 10917 // data-processing instruction. 10918 if (ARM_AM::getT2SOImmVal(CVal) != -1) 10919 break; 10920 } else { 10921 // A constant that can be used as an immediate value in a 10922 // data-processing instruction. 10923 if (ARM_AM::getSOImmVal(CVal) != -1) 10924 break; 10925 } 10926 return; 10927 10928 case 'J': 10929 if (Subtarget->isThumb()) { // FIXME thumb2 10930 // This must be a constant between -255 and -1, for negated ADD 10931 // immediates. This can be used in GCC with an "n" modifier that 10932 // prints the negated value, for use with SUB instructions. It is 10933 // not useful otherwise but is implemented for compatibility. 10934 if (CVal >= -255 && CVal <= -1) 10935 break; 10936 } else { 10937 // This must be a constant between -4095 and 4095. It is not clear 10938 // what this constraint is intended for. Implemented for 10939 // compatibility with GCC. 10940 if (CVal >= -4095 && CVal <= 4095) 10941 break; 10942 } 10943 return; 10944 10945 case 'K': 10946 if (Subtarget->isThumb1Only()) { 10947 // A 32-bit value where only one byte has a nonzero value. Exclude 10948 // zero to match GCC. This constraint is used by GCC internally for 10949 // constants that can be loaded with a move/shift combination. 10950 // It is not useful otherwise but is implemented for compatibility. 10951 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 10952 break; 10953 } else if (Subtarget->isThumb2()) { 10954 // A constant whose bitwise inverse can be used as an immediate 10955 // value in a data-processing instruction. This can be used in GCC 10956 // with a "B" modifier that prints the inverted value, for use with 10957 // BIC and MVN instructions. It is not useful otherwise but is 10958 // implemented for compatibility. 10959 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 10960 break; 10961 } else { 10962 // A constant whose bitwise inverse can be used as an immediate 10963 // value in a data-processing instruction. This can be used in GCC 10964 // with a "B" modifier that prints the inverted value, for use with 10965 // BIC and MVN instructions. It is not useful otherwise but is 10966 // implemented for compatibility. 10967 if (ARM_AM::getSOImmVal(~CVal) != -1) 10968 break; 10969 } 10970 return; 10971 10972 case 'L': 10973 if (Subtarget->isThumb1Only()) { 10974 // This must be a constant between -7 and 7, 10975 // for 3-operand ADD/SUB immediate instructions. 10976 if (CVal >= -7 && CVal < 7) 10977 break; 10978 } else if (Subtarget->isThumb2()) { 10979 // A constant whose negation can be used as an immediate value in a 10980 // data-processing instruction. This can be used in GCC with an "n" 10981 // modifier that prints the negated value, for use with SUB 10982 // instructions. It is not useful otherwise but is implemented for 10983 // compatibility. 10984 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 10985 break; 10986 } else { 10987 // A constant whose negation can be used as an immediate value in a 10988 // data-processing instruction. This can be used in GCC with an "n" 10989 // modifier that prints the negated value, for use with SUB 10990 // instructions. It is not useful otherwise but is implemented for 10991 // compatibility. 10992 if (ARM_AM::getSOImmVal(-CVal) != -1) 10993 break; 10994 } 10995 return; 10996 10997 case 'M': 10998 if (Subtarget->isThumb()) { // FIXME thumb2 10999 // This must be a multiple of 4 between 0 and 1020, for 11000 // ADD sp + immediate. 11001 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 11002 break; 11003 } else { 11004 // A power of two or a constant between 0 and 32. This is used in 11005 // GCC for the shift amount on shifted register operands, but it is 11006 // useful in general for any shift amounts. 11007 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 11008 break; 11009 } 11010 return; 11011 11012 case 'N': 11013 if (Subtarget->isThumb()) { // FIXME thumb2 11014 // This must be a constant between 0 and 31, for shift amounts. 11015 if (CVal >= 0 && CVal <= 31) 11016 break; 11017 } 11018 return; 11019 11020 case 'O': 11021 if (Subtarget->isThumb()) { // FIXME thumb2 11022 // This must be a multiple of 4 between -508 and 508, for 11023 // ADD/SUB sp = sp + immediate. 11024 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 11025 break; 11026 } 11027 return; 11028 } 11029 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType()); 11030 break; 11031 } 11032 11033 if (Result.getNode()) { 11034 Ops.push_back(Result); 11035 return; 11036 } 11037 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 11038 } 11039 11040 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { 11041 assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); 11042 unsigned Opcode = Op->getOpcode(); 11043 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && 11044 "Invalid opcode for Div/Rem lowering"); 11045 bool isSigned = (Opcode == ISD::SDIVREM); 11046 EVT VT = Op->getValueType(0); 11047 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 11048 11049 RTLIB::Libcall LC; 11050 switch (VT.getSimpleVT().SimpleTy) { 11051 default: llvm_unreachable("Unexpected request for libcall!"); 11052 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 11053 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 11054 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 11055 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 11056 } 11057 11058 SDValue InChain = DAG.getEntryNode(); 11059 11060 TargetLowering::ArgListTy Args; 11061 TargetLowering::ArgListEntry Entry; 11062 for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { 11063 EVT ArgVT = Op->getOperand(i).getValueType(); 11064 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 11065 Entry.Node = Op->getOperand(i); 11066 Entry.Ty = ArgTy; 11067 Entry.isSExt = isSigned; 11068 Entry.isZExt = !isSigned; 11069 Args.push_back(Entry); 11070 } 11071 11072 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), 11073 getPointerTy(DAG.getDataLayout())); 11074 11075 Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); 11076 11077 SDLoc dl(Op); 11078 TargetLowering::CallLoweringInfo CLI(DAG); 11079 CLI.setDebugLoc(dl).setChain(InChain) 11080 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) 11081 .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); 11082 11083 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); 11084 return CallInfo.first; 11085 } 11086 11087 SDValue 11088 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { 11089 assert(Subtarget->isTargetWindows() && "unsupported target platform"); 11090 SDLoc DL(Op); 11091 11092 // Get the inputs. 11093 SDValue Chain = Op.getOperand(0); 11094 SDValue Size = Op.getOperand(1); 11095 11096 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, 11097 DAG.getConstant(2, DL, MVT::i32)); 11098 11099 SDValue Flag; 11100 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); 11101 Flag = Chain.getValue(1); 11102 11103 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 11104 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); 11105 11106 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); 11107 Chain = NewSP.getValue(1); 11108 11109 SDValue Ops[2] = { NewSP, Chain }; 11110 return DAG.getMergeValues(Ops, DL); 11111 } 11112 11113 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { 11114 assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && 11115 "Unexpected type for custom-lowering FP_EXTEND"); 11116 11117 RTLIB::Libcall LC; 11118 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); 11119 11120 SDValue SrcVal = Op.getOperand(0); 11121 return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, 11122 /*isSigned*/ false, SDLoc(Op)).first; 11123 } 11124 11125 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { 11126 assert(Op.getOperand(0).getValueType() == MVT::f64 && 11127 Subtarget->isFPOnlySP() && 11128 "Unexpected type for custom-lowering FP_ROUND"); 11129 11130 RTLIB::Libcall LC; 11131 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); 11132 11133 SDValue SrcVal = Op.getOperand(0); 11134 return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, 11135 /*isSigned*/ false, SDLoc(Op)).first; 11136 } 11137 11138 bool 11139 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 11140 // The ARM target isn't yet aware of offsets. 11141 return false; 11142 } 11143 11144 bool ARM::isBitFieldInvertedMask(unsigned v) { 11145 if (v == 0xffffffff) 11146 return false; 11147 11148 // there can be 1's on either or both "outsides", all the "inside" 11149 // bits must be 0's 11150 return isShiftedMask_32(~v); 11151 } 11152 11153 /// isFPImmLegal - Returns true if the target can instruction select the 11154 /// specified FP immediate natively. If false, the legalizer will 11155 /// materialize the FP immediate as a load from a constant pool. 11156 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 11157 if (!Subtarget->hasVFP3()) 11158 return false; 11159 if (VT == MVT::f32) 11160 return ARM_AM::getFP32Imm(Imm) != -1; 11161 if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) 11162 return ARM_AM::getFP64Imm(Imm) != -1; 11163 return false; 11164 } 11165 11166 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 11167 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 11168 /// specified in the intrinsic calls. 11169 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 11170 const CallInst &I, 11171 unsigned Intrinsic) const { 11172 switch (Intrinsic) { 11173 case Intrinsic::arm_neon_vld1: 11174 case Intrinsic::arm_neon_vld2: 11175 case Intrinsic::arm_neon_vld3: 11176 case Intrinsic::arm_neon_vld4: 11177 case Intrinsic::arm_neon_vld2lane: 11178 case Intrinsic::arm_neon_vld3lane: 11179 case Intrinsic::arm_neon_vld4lane: { 11180 Info.opc = ISD::INTRINSIC_W_CHAIN; 11181 // Conservatively set memVT to the entire set of vectors loaded. 11182 auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); 11183 uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; 11184 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 11185 Info.ptrVal = I.getArgOperand(0); 11186 Info.offset = 0; 11187 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 11188 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 11189 Info.vol = false; // volatile loads with NEON intrinsics not supported 11190 Info.readMem = true; 11191 Info.writeMem = false; 11192 return true; 11193 } 11194 case Intrinsic::arm_neon_vst1: 11195 case Intrinsic::arm_neon_vst2: 11196 case Intrinsic::arm_neon_vst3: 11197 case Intrinsic::arm_neon_vst4: 11198 case Intrinsic::arm_neon_vst2lane: 11199 case Intrinsic::arm_neon_vst3lane: 11200 case Intrinsic::arm_neon_vst4lane: { 11201 Info.opc = ISD::INTRINSIC_VOID; 11202 // Conservatively set memVT to the entire set of vectors stored. 11203 auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); 11204 unsigned NumElts = 0; 11205 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 11206 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 11207 if (!ArgTy->isVectorTy()) 11208 break; 11209 NumElts += DL.getTypeAllocSize(ArgTy) / 8; 11210 } 11211 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 11212 Info.ptrVal = I.getArgOperand(0); 11213 Info.offset = 0; 11214 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 11215 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 11216 Info.vol = false; // volatile stores with NEON intrinsics not supported 11217 Info.readMem = false; 11218 Info.writeMem = true; 11219 return true; 11220 } 11221 case Intrinsic::arm_ldaex: 11222 case Intrinsic::arm_ldrex: { 11223 auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); 11224 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 11225 Info.opc = ISD::INTRINSIC_W_CHAIN; 11226 Info.memVT = MVT::getVT(PtrTy->getElementType()); 11227 Info.ptrVal = I.getArgOperand(0); 11228 Info.offset = 0; 11229 Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); 11230 Info.vol = true; 11231 Info.readMem = true; 11232 Info.writeMem = false; 11233 return true; 11234 } 11235 case Intrinsic::arm_stlex: 11236 case Intrinsic::arm_strex: { 11237 auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); 11238 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); 11239 Info.opc = ISD::INTRINSIC_W_CHAIN; 11240 Info.memVT = MVT::getVT(PtrTy->getElementType()); 11241 Info.ptrVal = I.getArgOperand(1); 11242 Info.offset = 0; 11243 Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); 11244 Info.vol = true; 11245 Info.readMem = false; 11246 Info.writeMem = true; 11247 return true; 11248 } 11249 case Intrinsic::arm_stlexd: 11250 case Intrinsic::arm_strexd: { 11251 Info.opc = ISD::INTRINSIC_W_CHAIN; 11252 Info.memVT = MVT::i64; 11253 Info.ptrVal = I.getArgOperand(2); 11254 Info.offset = 0; 11255 Info.align = 8; 11256 Info.vol = true; 11257 Info.readMem = false; 11258 Info.writeMem = true; 11259 return true; 11260 } 11261 case Intrinsic::arm_ldaexd: 11262 case Intrinsic::arm_ldrexd: { 11263 Info.opc = ISD::INTRINSIC_W_CHAIN; 11264 Info.memVT = MVT::i64; 11265 Info.ptrVal = I.getArgOperand(0); 11266 Info.offset = 0; 11267 Info.align = 8; 11268 Info.vol = true; 11269 Info.readMem = true; 11270 Info.writeMem = false; 11271 return true; 11272 } 11273 default: 11274 break; 11275 } 11276 11277 return false; 11278 } 11279 11280 /// \brief Returns true if it is beneficial to convert a load of a constant 11281 /// to just the constant itself. 11282 bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 11283 Type *Ty) const { 11284 assert(Ty->isIntegerTy()); 11285 11286 unsigned Bits = Ty->getPrimitiveSizeInBits(); 11287 if (Bits == 0 || Bits > 32) 11288 return false; 11289 return true; 11290 } 11291 11292 bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } 11293 11294 Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, 11295 ARM_MB::MemBOpt Domain) const { 11296 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 11297 11298 // First, if the target has no DMB, see what fallback we can use. 11299 if (!Subtarget->hasDataBarrier()) { 11300 // Some ARMv6 cpus can support data barriers with an mcr instruction. 11301 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 11302 // here. 11303 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { 11304 Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); 11305 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), 11306 Builder.getInt32(0), Builder.getInt32(7), 11307 Builder.getInt32(10), Builder.getInt32(5)}; 11308 return Builder.CreateCall(MCR, args); 11309 } else { 11310 // Instead of using barriers, atomic accesses on these subtargets use 11311 // libcalls. 11312 llvm_unreachable("makeDMB on a target so old that it has no barriers"); 11313 } 11314 } else { 11315 Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); 11316 // Only a full system barrier exists in the M-class architectures. 11317 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; 11318 Constant *CDomain = Builder.getInt32(Domain); 11319 return Builder.CreateCall(DMB, CDomain); 11320 } 11321 } 11322 11323 // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 11324 Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 11325 AtomicOrdering Ord, bool IsStore, 11326 bool IsLoad) const { 11327 if (!getInsertFencesForAtomic()) 11328 return nullptr; 11329 11330 switch (Ord) { 11331 case NotAtomic: 11332 case Unordered: 11333 llvm_unreachable("Invalid fence: unordered/non-atomic"); 11334 case Monotonic: 11335 case Acquire: 11336 return nullptr; // Nothing to do 11337 case SequentiallyConsistent: 11338 if (!IsStore) 11339 return nullptr; // Nothing to do 11340 /*FALLTHROUGH*/ 11341 case Release: 11342 case AcquireRelease: 11343 if (Subtarget->isSwift()) 11344 return makeDMB(Builder, ARM_MB::ISHST); 11345 // FIXME: add a comment with a link to documentation justifying this. 11346 else 11347 return makeDMB(Builder, ARM_MB::ISH); 11348 } 11349 llvm_unreachable("Unknown fence ordering in emitLeadingFence"); 11350 } 11351 11352 Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 11353 AtomicOrdering Ord, bool IsStore, 11354 bool IsLoad) const { 11355 if (!getInsertFencesForAtomic()) 11356 return nullptr; 11357 11358 switch (Ord) { 11359 case NotAtomic: 11360 case Unordered: 11361 llvm_unreachable("Invalid fence: unordered/not-atomic"); 11362 case Monotonic: 11363 case Release: 11364 return nullptr; // Nothing to do 11365 case Acquire: 11366 case AcquireRelease: 11367 case SequentiallyConsistent: 11368 return makeDMB(Builder, ARM_MB::ISH); 11369 } 11370 llvm_unreachable("Unknown fence ordering in emitTrailingFence"); 11371 } 11372 11373 // Loads and stores less than 64-bits are already atomic; ones above that 11374 // are doomed anyway, so defer to the default libcall and blame the OS when 11375 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit 11376 // anything for those. 11377 bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { 11378 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); 11379 return (Size == 64) && !Subtarget->isMClass(); 11380 } 11381 11382 // Loads and stores less than 64-bits are already atomic; ones above that 11383 // are doomed anyway, so defer to the default libcall and blame the OS when 11384 // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit 11385 // anything for those. 11386 // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that 11387 // guarantee, see DDI0406C ARM architecture reference manual, 11388 // sections A8.8.72-74 LDRD) 11389 bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { 11390 unsigned Size = LI->getType()->getPrimitiveSizeInBits(); 11391 return (Size == 64) && !Subtarget->isMClass(); 11392 } 11393 11394 // For the real atomic operations, we have ldrex/strex up to 32 bits, 11395 // and up to 64 bits on the non-M profiles 11396 TargetLoweringBase::AtomicRMWExpansionKind 11397 ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 11398 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 11399 return (Size <= (Subtarget->isMClass() ? 32U : 64U)) 11400 ? AtomicRMWExpansionKind::LLSC 11401 : AtomicRMWExpansionKind::None; 11402 } 11403 11404 // This has so far only been implemented for MachO. 11405 bool ARMTargetLowering::useLoadStackGuardNode() const { 11406 return Subtarget->isTargetMachO(); 11407 } 11408 11409 bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, 11410 unsigned &Cost) const { 11411 // If we do not have NEON, vector types are not natively supported. 11412 if (!Subtarget->hasNEON()) 11413 return false; 11414 11415 // Floating point values and vector values map to the same register file. 11416 // Therefore, althought we could do a store extract of a vector type, this is 11417 // better to leave at float as we have more freedom in the addressing mode for 11418 // those. 11419 if (VectorTy->isFPOrFPVectorTy()) 11420 return false; 11421 11422 // If the index is unknown at compile time, this is very expensive to lower 11423 // and it is not possible to combine the store with the extract. 11424 if (!isa<ConstantInt>(Idx)) 11425 return false; 11426 11427 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); 11428 unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); 11429 // We can do a store + vector extract on any vector that fits perfectly in a D 11430 // or Q register. 11431 if (BitWidth == 64 || BitWidth == 128) { 11432 Cost = 0; 11433 return true; 11434 } 11435 return false; 11436 } 11437 11438 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, 11439 AtomicOrdering Ord) const { 11440 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 11441 Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); 11442 bool IsAcquire = isAtLeastAcquire(Ord); 11443 11444 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd 11445 // intrinsic must return {i32, i32} and we have to recombine them into a 11446 // single i64 here. 11447 if (ValTy->getPrimitiveSizeInBits() == 64) { 11448 Intrinsic::ID Int = 11449 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; 11450 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); 11451 11452 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); 11453 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); 11454 11455 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); 11456 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); 11457 if (!Subtarget->isLittle()) 11458 std::swap (Lo, Hi); 11459 Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); 11460 Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); 11461 return Builder.CreateOr( 11462 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); 11463 } 11464 11465 Type *Tys[] = { Addr->getType() }; 11466 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; 11467 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); 11468 11469 return Builder.CreateTruncOrBitCast( 11470 Builder.CreateCall(Ldrex, Addr), 11471 cast<PointerType>(Addr->getType())->getElementType()); 11472 } 11473 11474 Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, 11475 Value *Addr, 11476 AtomicOrdering Ord) const { 11477 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 11478 bool IsRelease = isAtLeastRelease(Ord); 11479 11480 // Since the intrinsics must have legal type, the i64 intrinsics take two 11481 // parameters: "i32, i32". We must marshal Val into the appropriate form 11482 // before the call. 11483 if (Val->getType()->getPrimitiveSizeInBits() == 64) { 11484 Intrinsic::ID Int = 11485 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; 11486 Function *Strex = Intrinsic::getDeclaration(M, Int); 11487 Type *Int32Ty = Type::getInt32Ty(M->getContext()); 11488 11489 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); 11490 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); 11491 if (!Subtarget->isLittle()) 11492 std::swap (Lo, Hi); 11493 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); 11494 return Builder.CreateCall(Strex, {Lo, Hi, Addr}); 11495 } 11496 11497 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; 11498 Type *Tys[] = { Addr->getType() }; 11499 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); 11500 11501 return Builder.CreateCall( 11502 Strex, {Builder.CreateZExtOrBitCast( 11503 Val, Strex->getFunctionType()->getParamType(0)), 11504 Addr}); 11505 } 11506 11507 /// \brief Lower an interleaved load into a vldN intrinsic. 11508 /// 11509 /// E.g. Lower an interleaved load (Factor = 2): 11510 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4 11511 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements 11512 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements 11513 /// 11514 /// Into: 11515 /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4) 11516 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0 11517 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1 11518 bool ARMTargetLowering::lowerInterleavedLoad( 11519 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, 11520 ArrayRef<unsigned> Indices, unsigned Factor) const { 11521 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && 11522 "Invalid interleave factor"); 11523 assert(!Shuffles.empty() && "Empty shufflevector input"); 11524 assert(Shuffles.size() == Indices.size() && 11525 "Unmatched number of shufflevectors and indices"); 11526 11527 VectorType *VecTy = Shuffles[0]->getType(); 11528 Type *EltTy = VecTy->getVectorElementType(); 11529 11530 const DataLayout &DL = LI->getModule()->getDataLayout(); 11531 unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); 11532 bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; 11533 11534 // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't 11535 // support i64/f64 element). 11536 if ((VecSize != 64 && VecSize != 128) || EltIs64Bits) 11537 return false; 11538 11539 // A pointer vector can not be the return type of the ldN intrinsics. Need to 11540 // load integer vectors first and then convert to pointer vectors. 11541 if (EltTy->isPointerTy()) 11542 VecTy = 11543 VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); 11544 11545 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, 11546 Intrinsic::arm_neon_vld3, 11547 Intrinsic::arm_neon_vld4}; 11548 11549 Function *VldnFunc = 11550 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy); 11551 11552 IRBuilder<> Builder(LI); 11553 SmallVector<Value *, 2> Ops; 11554 11555 Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); 11556 Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); 11557 Ops.push_back(Builder.getInt32(LI->getAlignment())); 11558 11559 CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); 11560 11561 // Replace uses of each shufflevector with the corresponding vector loaded 11562 // by ldN. 11563 for (unsigned i = 0; i < Shuffles.size(); i++) { 11564 ShuffleVectorInst *SV = Shuffles[i]; 11565 unsigned Index = Indices[i]; 11566 11567 Value *SubVec = Builder.CreateExtractValue(VldN, Index); 11568 11569 // Convert the integer vector to pointer vector if the element is pointer. 11570 if (EltTy->isPointerTy()) 11571 SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); 11572 11573 SV->replaceAllUsesWith(SubVec); 11574 } 11575 11576 return true; 11577 } 11578 11579 /// \brief Get a mask consisting of sequential integers starting from \p Start. 11580 /// 11581 /// I.e. <Start, Start + 1, ..., Start + NumElts - 1> 11582 static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, 11583 unsigned NumElts) { 11584 SmallVector<Constant *, 16> Mask; 11585 for (unsigned i = 0; i < NumElts; i++) 11586 Mask.push_back(Builder.getInt32(Start + i)); 11587 11588 return ConstantVector::get(Mask); 11589 } 11590 11591 /// \brief Lower an interleaved store into a vstN intrinsic. 11592 /// 11593 /// E.g. Lower an interleaved store (Factor = 3): 11594 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, 11595 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> 11596 /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4 11597 /// 11598 /// Into: 11599 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> 11600 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> 11601 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> 11602 /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) 11603 /// 11604 /// Note that the new shufflevectors will be removed and we'll only generate one 11605 /// vst3 instruction in CodeGen. 11606 bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, 11607 ShuffleVectorInst *SVI, 11608 unsigned Factor) const { 11609 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && 11610 "Invalid interleave factor"); 11611 11612 VectorType *VecTy = SVI->getType(); 11613 assert(VecTy->getVectorNumElements() % Factor == 0 && 11614 "Invalid interleaved store"); 11615 11616 unsigned NumSubElts = VecTy->getVectorNumElements() / Factor; 11617 Type *EltTy = VecTy->getVectorElementType(); 11618 VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); 11619 11620 const DataLayout &DL = SI->getModule()->getDataLayout(); 11621 unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); 11622 bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; 11623 11624 // Skip illegal sub vector types and vector types of i64/f64 element (vstN 11625 // doesn't support i64/f64 element). 11626 if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits) 11627 return false; 11628 11629 Value *Op0 = SVI->getOperand(0); 11630 Value *Op1 = SVI->getOperand(1); 11631 IRBuilder<> Builder(SI); 11632 11633 // StN intrinsics don't support pointer vectors as arguments. Convert pointer 11634 // vectors to integer vectors. 11635 if (EltTy->isPointerTy()) { 11636 Type *IntTy = DL.getIntPtrType(EltTy); 11637 11638 // Convert to the corresponding integer vector. 11639 Type *IntVecTy = 11640 VectorType::get(IntTy, Op0->getType()->getVectorNumElements()); 11641 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); 11642 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); 11643 11644 SubVecTy = VectorType::get(IntTy, NumSubElts); 11645 } 11646 11647 static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, 11648 Intrinsic::arm_neon_vst3, 11649 Intrinsic::arm_neon_vst4}; 11650 Function *VstNFunc = Intrinsic::getDeclaration( 11651 SI->getModule(), StoreInts[Factor - 2], SubVecTy); 11652 11653 SmallVector<Value *, 6> Ops; 11654 11655 Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); 11656 Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); 11657 11658 // Split the shufflevector operands into sub vectors for the new vstN call. 11659 for (unsigned i = 0; i < Factor; i++) 11660 Ops.push_back(Builder.CreateShuffleVector( 11661 Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts))); 11662 11663 Ops.push_back(Builder.getInt32(SI->getAlignment())); 11664 Builder.CreateCall(VstNFunc, Ops); 11665 return true; 11666 } 11667 11668 enum HABaseType { 11669 HA_UNKNOWN = 0, 11670 HA_FLOAT, 11671 HA_DOUBLE, 11672 HA_VECT64, 11673 HA_VECT128 11674 }; 11675 11676 static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, 11677 uint64_t &Members) { 11678 if (const StructType *ST = dyn_cast<StructType>(Ty)) { 11679 for (unsigned i = 0; i < ST->getNumElements(); ++i) { 11680 uint64_t SubMembers = 0; 11681 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) 11682 return false; 11683 Members += SubMembers; 11684 } 11685 } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { 11686 uint64_t SubMembers = 0; 11687 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) 11688 return false; 11689 Members += SubMembers * AT->getNumElements(); 11690 } else if (Ty->isFloatTy()) { 11691 if (Base != HA_UNKNOWN && Base != HA_FLOAT) 11692 return false; 11693 Members = 1; 11694 Base = HA_FLOAT; 11695 } else if (Ty->isDoubleTy()) { 11696 if (Base != HA_UNKNOWN && Base != HA_DOUBLE) 11697 return false; 11698 Members = 1; 11699 Base = HA_DOUBLE; 11700 } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) { 11701 Members = 1; 11702 switch (Base) { 11703 case HA_FLOAT: 11704 case HA_DOUBLE: 11705 return false; 11706 case HA_VECT64: 11707 return VT->getBitWidth() == 64; 11708 case HA_VECT128: 11709 return VT->getBitWidth() == 128; 11710 case HA_UNKNOWN: 11711 switch (VT->getBitWidth()) { 11712 case 64: 11713 Base = HA_VECT64; 11714 return true; 11715 case 128: 11716 Base = HA_VECT128; 11717 return true; 11718 default: 11719 return false; 11720 } 11721 } 11722 } 11723 11724 return (Members > 0 && Members <= 4); 11725 } 11726 11727 /// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of 11728 /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when 11729 /// passing according to AAPCS rules. 11730 bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( 11731 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { 11732 if (getEffectiveCallingConv(CallConv, isVarArg) != 11733 CallingConv::ARM_AAPCS_VFP) 11734 return false; 11735 11736 HABaseType Base = HA_UNKNOWN; 11737 uint64_t Members = 0; 11738 bool IsHA = isHomogeneousAggregate(Ty, Base, Members); 11739 DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); 11740 11741 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); 11742 return IsHA || IsIntArray; 11743 } 11744