1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that ARM uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-isel" 16 #include "ARMISelLowering.h" 17 #include "ARM.h" 18 #include "ARMCallingConv.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMPerfectShuffle.h" 22 #include "ARMSubtarget.h" 23 #include "ARMTargetMachine.h" 24 #include "ARMTargetObjectFile.h" 25 #include "MCTargetDesc/ARMAddressingModes.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/CodeGen/CallingConvLower.h" 29 #include "llvm/CodeGen/IntrinsicLowering.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/IR/CallingConv.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalValue.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/IR/Instructions.h" 43 #include "llvm/IR/Intrinsics.h" 44 #include "llvm/IR/Type.h" 45 #include "llvm/MC/MCSectionMachO.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MathExtras.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Target/TargetOptions.h" 51 #include <utility> 52 using namespace llvm; 53 54 STATISTIC(NumTailCalls, "Number of tail calls"); 55 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 56 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 57 58 // This option should go away when tail calls fully work. 59 static cl::opt<bool> 60 EnableARMTailCalls("arm-tail-calls", cl::Hidden, 61 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 62 cl::init(false)); 63 64 cl::opt<bool> 65 EnableARMLongCalls("arm-long-calls", cl::Hidden, 66 cl::desc("Generate calls via indirect call instructions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 ARMInterworking("arm-interworking", cl::Hidden, 71 cl::desc("Enable / disable ARM interworking (for debugging only)"), 72 cl::init(true)); 73 74 namespace { 75 class ARMCCState : public CCState { 76 public: 77 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 78 const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, 79 LLVMContext &C, ParmContext PC) 80 : CCState(CC, isVarArg, MF, TM, locs, C) { 81 assert(((PC == Call) || (PC == Prologue)) && 82 "ARMCCState users must specify whether their context is call" 83 "or prologue generation."); 84 CallOrPrologue = PC; 85 } 86 }; 87 } 88 89 // The APCS parameter registers. 90 static const uint16_t GPRArgRegs[] = { 91 ARM::R0, ARM::R1, ARM::R2, ARM::R3 92 }; 93 94 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 95 MVT PromotedBitwiseVT) { 96 if (VT != PromotedLdStVT) { 97 setOperationAction(ISD::LOAD, VT, Promote); 98 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 99 100 setOperationAction(ISD::STORE, VT, Promote); 101 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 102 } 103 104 MVT ElemTy = VT.getVectorElementType(); 105 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 106 setOperationAction(ISD::SETCC, VT, Custom); 107 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 108 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 109 if (ElemTy == MVT::i32) { 110 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 111 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 112 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 113 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 114 } else { 115 setOperationAction(ISD::SINT_TO_FP, VT, Expand); 116 setOperationAction(ISD::UINT_TO_FP, VT, Expand); 117 setOperationAction(ISD::FP_TO_SINT, VT, Expand); 118 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 119 } 120 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 121 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 122 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 123 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 124 setOperationAction(ISD::SELECT, VT, Expand); 125 setOperationAction(ISD::SELECT_CC, VT, Expand); 126 setOperationAction(ISD::VSELECT, VT, Expand); 127 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 128 if (VT.isInteger()) { 129 setOperationAction(ISD::SHL, VT, Custom); 130 setOperationAction(ISD::SRA, VT, Custom); 131 setOperationAction(ISD::SRL, VT, Custom); 132 } 133 134 // Promote all bit-wise operations. 135 if (VT.isInteger() && VT != PromotedBitwiseVT) { 136 setOperationAction(ISD::AND, VT, Promote); 137 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 138 setOperationAction(ISD::OR, VT, Promote); 139 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 140 setOperationAction(ISD::XOR, VT, Promote); 141 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 142 } 143 144 // Neon does not support vector divide/remainder operations. 145 setOperationAction(ISD::SDIV, VT, Expand); 146 setOperationAction(ISD::UDIV, VT, Expand); 147 setOperationAction(ISD::FDIV, VT, Expand); 148 setOperationAction(ISD::SREM, VT, Expand); 149 setOperationAction(ISD::UREM, VT, Expand); 150 setOperationAction(ISD::FREM, VT, Expand); 151 } 152 153 void ARMTargetLowering::addDRTypeForNEON(MVT VT) { 154 addRegisterClass(VT, &ARM::DPRRegClass); 155 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 156 } 157 158 void ARMTargetLowering::addQRTypeForNEON(MVT VT) { 159 addRegisterClass(VT, &ARM::QPRRegClass); 160 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 161 } 162 163 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 164 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 165 return new TargetLoweringObjectFileMachO(); 166 167 return new ARMElfTargetObjectFile(); 168 } 169 170 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 171 : TargetLowering(TM, createTLOF(TM)) { 172 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 173 RegInfo = TM.getRegisterInfo(); 174 Itins = TM.getInstrItineraryData(); 175 176 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 177 178 if (Subtarget->isTargetIOS()) { 179 // Uses VFP for Thumb libfuncs if available. 180 if (Subtarget->isThumb() && Subtarget->hasVFP2() && 181 Subtarget->hasARMOps()) { 182 // Single-precision floating-point arithmetic. 183 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 184 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 185 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 186 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 187 188 // Double-precision floating-point arithmetic. 189 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 190 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 191 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 192 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 193 194 // Single-precision comparisons. 195 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 196 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 197 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 198 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 199 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 200 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 201 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 202 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 203 204 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 205 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 206 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 212 213 // Double-precision comparisons. 214 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 215 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 216 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 217 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 218 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 219 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 220 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 221 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 222 223 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 224 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 225 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 226 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 227 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 229 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 230 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 231 232 // Floating-point to integer conversions. 233 // i64 conversions are done via library routines even when generating VFP 234 // instructions, so use the same ones. 235 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 236 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 237 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 238 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 239 240 // Conversions between floating types. 241 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 242 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 243 244 // Integer to floating-point conversions. 245 // i64 conversions are done via library routines even when generating VFP 246 // instructions, so use the same ones. 247 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 248 // e.g., __floatunsidf vs. __floatunssidfvfp. 249 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 250 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 251 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 252 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 253 } 254 } 255 256 // These libcalls are not available in 32-bit. 257 setLibcallName(RTLIB::SHL_I128, 0); 258 setLibcallName(RTLIB::SRL_I128, 0); 259 setLibcallName(RTLIB::SRA_I128, 0); 260 261 if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 262 // Double-precision floating-point arithmetic helper functions 263 // RTABI chapter 4.1.2, Table 2 264 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 265 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 266 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 267 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 268 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 269 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 270 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 271 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 272 273 // Double-precision floating-point comparison helper functions 274 // RTABI chapter 4.1.2, Table 3 275 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 276 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 277 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 278 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 279 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 280 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 281 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 282 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 283 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 284 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 285 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 286 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 287 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 288 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 289 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 290 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 291 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 292 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 293 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 294 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 297 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 298 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 299 300 // Single-precision floating-point arithmetic helper functions 301 // RTABI chapter 4.1.2, Table 4 302 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 303 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 304 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 305 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 306 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 307 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 308 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 309 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 310 311 // Single-precision floating-point comparison helper functions 312 // RTABI chapter 4.1.2, Table 5 313 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 314 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 315 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 316 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 317 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 318 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 319 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 320 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 321 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 322 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 323 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 324 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 325 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 326 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 327 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 328 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 329 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 330 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 331 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 337 338 // Floating-point to integer conversions. 339 // RTABI chapter 4.1.2, Table 6 340 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 341 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 342 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 343 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 344 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 345 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 346 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 347 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 348 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 349 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 350 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 351 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 352 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 354 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 355 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 356 357 // Conversions between floating types. 358 // RTABI chapter 4.1.2, Table 7 359 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 360 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 361 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 362 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 363 364 // Integer to floating-point conversions. 365 // RTABI chapter 4.1.2, Table 8 366 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 367 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 368 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 369 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 370 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 371 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 372 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 373 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 374 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 375 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 376 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 380 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 381 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 382 383 // Long long helper functions 384 // RTABI chapter 4.2, Table 9 385 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 386 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 387 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 388 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 389 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 390 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 391 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 392 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 393 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 394 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 395 396 // Integer division functions 397 // RTABI chapter 4.3.1 398 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 399 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 400 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 401 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 402 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 403 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 404 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 405 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 406 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 407 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 408 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 409 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 410 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 412 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 413 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 414 415 // Memory operations 416 // RTABI chapter 4.3.4 417 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 418 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 419 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 420 setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 421 setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 422 setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 423 } 424 425 // Use divmod compiler-rt calls for iOS 5.0 and later. 426 if (Subtarget->getTargetTriple().isiOS() && 427 !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 428 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 429 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 430 } 431 432 if (Subtarget->isThumb1Only()) 433 addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 434 else 435 addRegisterClass(MVT::i32, &ARM::GPRRegClass); 436 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 437 !Subtarget->isThumb1Only()) { 438 addRegisterClass(MVT::f32, &ARM::SPRRegClass); 439 if (!Subtarget->isFPOnlySP()) 440 addRegisterClass(MVT::f64, &ARM::DPRRegClass); 441 442 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 443 } 444 445 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 446 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 447 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 448 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 449 setTruncStoreAction((MVT::SimpleValueType)VT, 450 (MVT::SimpleValueType)InnerVT, Expand); 451 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 452 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 453 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 454 } 455 456 setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 457 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 458 459 if (Subtarget->hasNEON()) { 460 addDRTypeForNEON(MVT::v2f32); 461 addDRTypeForNEON(MVT::v8i8); 462 addDRTypeForNEON(MVT::v4i16); 463 addDRTypeForNEON(MVT::v2i32); 464 addDRTypeForNEON(MVT::v1i64); 465 466 addQRTypeForNEON(MVT::v4f32); 467 addQRTypeForNEON(MVT::v2f64); 468 addQRTypeForNEON(MVT::v16i8); 469 addQRTypeForNEON(MVT::v8i16); 470 addQRTypeForNEON(MVT::v4i32); 471 addQRTypeForNEON(MVT::v2i64); 472 473 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 474 // neither Neon nor VFP support any arithmetic operations on it. 475 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 476 // supported for v4f32. 477 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 478 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 479 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 480 // FIXME: Code duplication: FDIV and FREM are expanded always, see 481 // ARMTargetLowering::addTypeForNEON method for details. 482 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 483 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 484 // FIXME: Create unittest. 485 // In another words, find a way when "copysign" appears in DAG with vector 486 // operands. 487 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 488 // FIXME: Code duplication: SETCC has custom operation action, see 489 // ARMTargetLowering::addTypeForNEON method for details. 490 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 491 // FIXME: Create unittest for FNEG and for FABS. 492 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 493 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 494 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 495 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 496 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 497 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 498 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 499 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 500 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 501 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 502 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 503 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 504 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 505 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 506 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 507 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 508 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 509 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 510 setOperationAction(ISD::FMA, MVT::v2f64, Expand); 511 512 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 513 setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 514 setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 515 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 516 setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 517 setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 518 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 519 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 520 setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 521 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 522 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 523 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 524 setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 525 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 526 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 527 528 // Mark v2f32 intrinsics. 529 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 530 setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 531 setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 532 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 533 setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 534 setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 535 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 536 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 537 setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 538 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 539 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 540 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 541 setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 542 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 543 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 544 545 // Neon does not support some operations on v1i64 and v2i64 types. 546 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 547 // Custom handling for some quad-vector types to detect VMULL. 548 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 549 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 550 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 551 // Custom handling for some vector types to avoid expensive expansions 552 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 553 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 554 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 555 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 556 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 557 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 558 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 559 // a destination type that is wider than the source, and nor does 560 // it have a FP_TO_[SU]INT instruction with a narrower destination than 561 // source. 562 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 563 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 564 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 565 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 566 567 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 568 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 569 570 // Custom expand long extensions to vectors. 571 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); 572 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 573 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); 574 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); 575 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); 576 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); 577 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); 578 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); 579 580 // NEON does not have single instruction CTPOP for vectors with element 581 // types wider than 8-bits. However, custom lowering can leverage the 582 // v8i8/v16i8 vcnt instruction. 583 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 584 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 585 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 586 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 587 588 // NEON only has FMA instructions as of VFP4. 589 if (!Subtarget->hasVFP4()) { 590 setOperationAction(ISD::FMA, MVT::v2f32, Expand); 591 setOperationAction(ISD::FMA, MVT::v4f32, Expand); 592 } 593 594 setTargetDAGCombine(ISD::INTRINSIC_VOID); 595 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 596 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 597 setTargetDAGCombine(ISD::SHL); 598 setTargetDAGCombine(ISD::SRL); 599 setTargetDAGCombine(ISD::SRA); 600 setTargetDAGCombine(ISD::SIGN_EXTEND); 601 setTargetDAGCombine(ISD::ZERO_EXTEND); 602 setTargetDAGCombine(ISD::ANY_EXTEND); 603 setTargetDAGCombine(ISD::SELECT_CC); 604 setTargetDAGCombine(ISD::BUILD_VECTOR); 605 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 606 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 607 setTargetDAGCombine(ISD::STORE); 608 setTargetDAGCombine(ISD::FP_TO_SINT); 609 setTargetDAGCombine(ISD::FP_TO_UINT); 610 setTargetDAGCombine(ISD::FDIV); 611 612 // It is legal to extload from v4i8 to v4i16 or v4i32. 613 MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 614 MVT::v4i16, MVT::v2i16, 615 MVT::v2i32}; 616 for (unsigned i = 0; i < 6; ++i) { 617 setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 618 setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 619 setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 620 } 621 } 622 623 // ARM and Thumb2 support UMLAL/SMLAL. 624 if (!Subtarget->isThumb1Only()) 625 setTargetDAGCombine(ISD::ADDC); 626 627 628 computeRegisterProperties(); 629 630 // ARM does not have f32 extending load. 631 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 632 633 // ARM does not have i1 sign extending load. 634 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 635 636 // ARM supports all 4 flavors of integer indexed load / store. 637 if (!Subtarget->isThumb1Only()) { 638 for (unsigned im = (unsigned)ISD::PRE_INC; 639 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 640 setIndexedLoadAction(im, MVT::i1, Legal); 641 setIndexedLoadAction(im, MVT::i8, Legal); 642 setIndexedLoadAction(im, MVT::i16, Legal); 643 setIndexedLoadAction(im, MVT::i32, Legal); 644 setIndexedStoreAction(im, MVT::i1, Legal); 645 setIndexedStoreAction(im, MVT::i8, Legal); 646 setIndexedStoreAction(im, MVT::i16, Legal); 647 setIndexedStoreAction(im, MVT::i32, Legal); 648 } 649 } 650 651 // i64 operation support. 652 setOperationAction(ISD::MUL, MVT::i64, Expand); 653 setOperationAction(ISD::MULHU, MVT::i32, Expand); 654 if (Subtarget->isThumb1Only()) { 655 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 656 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 657 } 658 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 659 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 660 setOperationAction(ISD::MULHS, MVT::i32, Expand); 661 662 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 663 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 664 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 665 setOperationAction(ISD::SRL, MVT::i64, Custom); 666 setOperationAction(ISD::SRA, MVT::i64, Custom); 667 668 if (!Subtarget->isThumb1Only()) { 669 // FIXME: We should do this for Thumb1 as well. 670 setOperationAction(ISD::ADDC, MVT::i32, Custom); 671 setOperationAction(ISD::ADDE, MVT::i32, Custom); 672 setOperationAction(ISD::SUBC, MVT::i32, Custom); 673 setOperationAction(ISD::SUBE, MVT::i32, Custom); 674 } 675 676 // ARM does not have ROTL. 677 setOperationAction(ISD::ROTL, MVT::i32, Expand); 678 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 679 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 680 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 681 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 682 683 // These just redirect to CTTZ and CTLZ on ARM. 684 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 685 setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 686 687 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 688 689 // Only ARMv6 has BSWAP. 690 if (!Subtarget->hasV6Ops()) 691 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 692 693 if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 694 !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 695 // These are expanded into libcalls if the cpu doesn't have HW divider. 696 setOperationAction(ISD::SDIV, MVT::i32, Expand); 697 setOperationAction(ISD::UDIV, MVT::i32, Expand); 698 } 699 700 // FIXME: Also set divmod for SREM on EABI 701 setOperationAction(ISD::SREM, MVT::i32, Expand); 702 setOperationAction(ISD::UREM, MVT::i32, Expand); 703 // Register based DivRem for AEABI (RTABI 4.2) 704 if (Subtarget->isTargetAEABI()) { 705 setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); 706 setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); 707 setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); 708 setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); 709 setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); 710 setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); 711 setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); 712 setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); 713 714 setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); 715 setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); 716 setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); 717 setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); 718 setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); 719 setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); 720 setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); 721 setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); 722 723 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 724 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 725 } else { 726 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 727 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 728 } 729 730 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 731 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 732 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 733 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 734 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 735 736 setOperationAction(ISD::TRAP, MVT::Other, Legal); 737 738 // Use the default implementation. 739 setOperationAction(ISD::VASTART, MVT::Other, Custom); 740 setOperationAction(ISD::VAARG, MVT::Other, Expand); 741 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 742 setOperationAction(ISD::VAEND, MVT::Other, Expand); 743 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 744 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 745 746 if (!Subtarget->isTargetDarwin()) { 747 // Non-Darwin platforms may return values in these registers via the 748 // personality function. 749 setExceptionPointerRegister(ARM::R0); 750 setExceptionSelectorRegister(ARM::R1); 751 } 752 753 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 754 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 755 // the default expansion. 756 if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { 757 // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and 758 // handled normally. 759 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 760 // Custom lowering for 64-bit ops 761 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 762 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 763 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 764 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 765 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 766 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 767 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); 768 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); 769 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); 770 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); 771 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 772 // On v8, we have particularly efficient implementations of atomic fences 773 // if they can be combined with nearby atomic loads and stores. 774 if (!Subtarget->hasV8Ops()) { 775 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 776 setInsertFencesForAtomic(true); 777 } 778 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); 779 } else { 780 // If there's anything we can use as a barrier, go through custom lowering 781 // for ATOMIC_FENCE. 782 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, 783 Subtarget->hasAnyDataBarrier() ? Custom : Expand); 784 785 // Set them all for expansion, which will force libcalls. 786 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 787 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 788 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 789 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 790 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 791 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 792 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 793 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 794 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 795 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 796 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 797 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 798 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 799 // Unordered/Monotonic case. 800 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 801 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 802 } 803 804 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 805 806 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 807 if (!Subtarget->hasV6Ops()) { 808 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 809 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 810 } 811 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 812 813 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 814 !Subtarget->isThumb1Only()) { 815 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 816 // iff target supports vfp2. 817 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 818 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 819 } 820 821 // We want to custom lower some of our intrinsics. 822 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 823 if (Subtarget->isTargetDarwin()) { 824 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 825 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 826 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 827 } 828 829 setOperationAction(ISD::SETCC, MVT::i32, Expand); 830 setOperationAction(ISD::SETCC, MVT::f32, Expand); 831 setOperationAction(ISD::SETCC, MVT::f64, Expand); 832 setOperationAction(ISD::SELECT, MVT::i32, Custom); 833 setOperationAction(ISD::SELECT, MVT::f32, Custom); 834 setOperationAction(ISD::SELECT, MVT::f64, Custom); 835 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 836 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 837 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 838 839 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 840 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 841 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 842 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 843 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 844 845 // We don't support sin/cos/fmod/copysign/pow 846 setOperationAction(ISD::FSIN, MVT::f64, Expand); 847 setOperationAction(ISD::FSIN, MVT::f32, Expand); 848 setOperationAction(ISD::FCOS, MVT::f32, Expand); 849 setOperationAction(ISD::FCOS, MVT::f64, Expand); 850 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 851 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 852 setOperationAction(ISD::FREM, MVT::f64, Expand); 853 setOperationAction(ISD::FREM, MVT::f32, Expand); 854 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 855 !Subtarget->isThumb1Only()) { 856 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 857 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 858 } 859 setOperationAction(ISD::FPOW, MVT::f64, Expand); 860 setOperationAction(ISD::FPOW, MVT::f32, Expand); 861 862 if (!Subtarget->hasVFP4()) { 863 setOperationAction(ISD::FMA, MVT::f64, Expand); 864 setOperationAction(ISD::FMA, MVT::f32, Expand); 865 } 866 867 // Various VFP goodness 868 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 869 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 870 if (Subtarget->hasVFP2()) { 871 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 872 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 873 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 874 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 875 } 876 // Special handling for half-precision FP. 877 if (!Subtarget->hasFP16()) { 878 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 879 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 880 } 881 } 882 883 // We have target-specific dag combine patterns for the following nodes: 884 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 885 setTargetDAGCombine(ISD::ADD); 886 setTargetDAGCombine(ISD::SUB); 887 setTargetDAGCombine(ISD::MUL); 888 setTargetDAGCombine(ISD::AND); 889 setTargetDAGCombine(ISD::OR); 890 setTargetDAGCombine(ISD::XOR); 891 892 if (Subtarget->hasV6Ops()) 893 setTargetDAGCombine(ISD::SRL); 894 895 setStackPointerRegisterToSaveRestore(ARM::SP); 896 897 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 898 !Subtarget->hasVFP2()) 899 setSchedulingPreference(Sched::RegPressure); 900 else 901 setSchedulingPreference(Sched::Hybrid); 902 903 //// temporary - rewrite interface to use type 904 MaxStoresPerMemset = 8; 905 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 906 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 907 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 908 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 909 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 910 911 // On ARM arguments smaller than 4 bytes are extended, so all arguments 912 // are at least 4 bytes aligned. 913 setMinStackArgumentAlignment(4); 914 915 // Prefer likely predicted branches to selects on out-of-order cores. 916 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 917 918 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 919 } 920 921 static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, 922 bool isThumb2, unsigned &LdrOpc, 923 unsigned &StrOpc) { 924 static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, 925 {ARM::LDREXH, ARM::t2LDREXH}, 926 {ARM::LDREX, ARM::t2LDREX}, 927 {ARM::LDREXD, ARM::t2LDREXD}}; 928 static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, 929 {ARM::LDAEXH, ARM::t2LDAEXH}, 930 {ARM::LDAEX, ARM::t2LDAEX}, 931 {ARM::LDAEXD, ARM::t2LDAEXD}}; 932 static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, 933 {ARM::STREXH, ARM::t2STREXH}, 934 {ARM::STREX, ARM::t2STREX}, 935 {ARM::STREXD, ARM::t2STREXD}}; 936 static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, 937 {ARM::STLEXH, ARM::t2STLEXH}, 938 {ARM::STLEX, ARM::t2STLEX}, 939 {ARM::STLEXD, ARM::t2STLEXD}}; 940 941 const unsigned (*LoadOps)[2], (*StoreOps)[2]; 942 if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) 943 LoadOps = LoadAcqs; 944 else 945 LoadOps = LoadBares; 946 947 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) 948 StoreOps = StoreRels; 949 else 950 StoreOps = StoreBares; 951 952 assert(isPowerOf2_32(Size) && Size <= 8 && 953 "unsupported size for atomic binary op!"); 954 955 LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; 956 StrOpc = StoreOps[Log2_32(Size)][isThumb2]; 957 } 958 959 // FIXME: It might make sense to define the representative register class as the 960 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 961 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 962 // SPR's representative would be DPR_VFP2. This should work well if register 963 // pressure tracking were modified such that a register use would increment the 964 // pressure of the register class's representative and all of it's super 965 // classes' representatives transitively. We have not implemented this because 966 // of the difficulty prior to coalescing of modeling operand register classes 967 // due to the common occurrence of cross class copies and subregister insertions 968 // and extractions. 969 std::pair<const TargetRegisterClass*, uint8_t> 970 ARMTargetLowering::findRepresentativeClass(MVT VT) const{ 971 const TargetRegisterClass *RRC = 0; 972 uint8_t Cost = 1; 973 switch (VT.SimpleTy) { 974 default: 975 return TargetLowering::findRepresentativeClass(VT); 976 // Use DPR as representative register class for all floating point 977 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 978 // the cost is 1 for both f32 and f64. 979 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 980 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 981 RRC = &ARM::DPRRegClass; 982 // When NEON is used for SP, only half of the register file is available 983 // because operations that define both SP and DP results will be constrained 984 // to the VFP2 class (D0-D15). We currently model this constraint prior to 985 // coalescing by double-counting the SP regs. See the FIXME above. 986 if (Subtarget->useNEONForSinglePrecisionFP()) 987 Cost = 2; 988 break; 989 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 990 case MVT::v4f32: case MVT::v2f64: 991 RRC = &ARM::DPRRegClass; 992 Cost = 2; 993 break; 994 case MVT::v4i64: 995 RRC = &ARM::DPRRegClass; 996 Cost = 4; 997 break; 998 case MVT::v8i64: 999 RRC = &ARM::DPRRegClass; 1000 Cost = 8; 1001 break; 1002 } 1003 return std::make_pair(RRC, Cost); 1004 } 1005 1006 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 1007 switch (Opcode) { 1008 default: return 0; 1009 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 1010 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 1011 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 1012 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 1013 case ARMISD::CALL: return "ARMISD::CALL"; 1014 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 1015 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 1016 case ARMISD::tCALL: return "ARMISD::tCALL"; 1017 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 1018 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 1019 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 1020 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 1021 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; 1022 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 1023 case ARMISD::CMP: return "ARMISD::CMP"; 1024 case ARMISD::CMN: return "ARMISD::CMN"; 1025 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 1026 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 1027 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 1028 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 1029 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 1030 1031 case ARMISD::CMOV: return "ARMISD::CMOV"; 1032 1033 case ARMISD::RBIT: return "ARMISD::RBIT"; 1034 1035 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 1036 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 1037 case ARMISD::SITOF: return "ARMISD::SITOF"; 1038 case ARMISD::UITOF: return "ARMISD::UITOF"; 1039 1040 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 1041 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 1042 case ARMISD::RRX: return "ARMISD::RRX"; 1043 1044 case ARMISD::ADDC: return "ARMISD::ADDC"; 1045 case ARMISD::ADDE: return "ARMISD::ADDE"; 1046 case ARMISD::SUBC: return "ARMISD::SUBC"; 1047 case ARMISD::SUBE: return "ARMISD::SUBE"; 1048 1049 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 1050 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 1051 1052 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 1053 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 1054 1055 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 1056 1057 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 1058 1059 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 1060 1061 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 1062 1063 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 1064 1065 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 1066 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 1067 case ARMISD::VCGE: return "ARMISD::VCGE"; 1068 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 1069 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 1070 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 1071 case ARMISD::VCGT: return "ARMISD::VCGT"; 1072 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 1073 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 1074 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 1075 case ARMISD::VTST: return "ARMISD::VTST"; 1076 1077 case ARMISD::VSHL: return "ARMISD::VSHL"; 1078 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 1079 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 1080 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 1081 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 1082 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 1083 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 1084 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 1085 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 1086 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 1087 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 1088 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 1089 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 1090 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 1091 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 1092 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 1093 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 1094 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 1095 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 1096 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 1097 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 1098 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 1099 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 1100 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 1101 case ARMISD::VDUP: return "ARMISD::VDUP"; 1102 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 1103 case ARMISD::VEXT: return "ARMISD::VEXT"; 1104 case ARMISD::VREV64: return "ARMISD::VREV64"; 1105 case ARMISD::VREV32: return "ARMISD::VREV32"; 1106 case ARMISD::VREV16: return "ARMISD::VREV16"; 1107 case ARMISD::VZIP: return "ARMISD::VZIP"; 1108 case ARMISD::VUZP: return "ARMISD::VUZP"; 1109 case ARMISD::VTRN: return "ARMISD::VTRN"; 1110 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 1111 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 1112 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 1113 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 1114 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 1115 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 1116 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 1117 case ARMISD::FMAX: return "ARMISD::FMAX"; 1118 case ARMISD::FMIN: return "ARMISD::FMIN"; 1119 case ARMISD::VMAXNM: return "ARMISD::VMAX"; 1120 case ARMISD::VMINNM: return "ARMISD::VMIN"; 1121 case ARMISD::BFI: return "ARMISD::BFI"; 1122 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 1123 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 1124 case ARMISD::VBSL: return "ARMISD::VBSL"; 1125 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 1126 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 1127 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 1128 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 1129 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 1130 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 1131 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 1132 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 1133 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1134 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1135 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1136 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1137 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1138 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1139 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1140 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1141 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1142 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1143 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1144 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1145 } 1146 } 1147 1148 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 1149 if (!VT.isVector()) return getPointerTy(); 1150 return VT.changeVectorElementTypeToInteger(); 1151 } 1152 1153 /// getRegClassFor - Return the register class that should be used for the 1154 /// specified value type. 1155 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 1156 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1157 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1158 // load / store 4 to 8 consecutive D registers. 1159 if (Subtarget->hasNEON()) { 1160 if (VT == MVT::v4i64) 1161 return &ARM::QQPRRegClass; 1162 if (VT == MVT::v8i64) 1163 return &ARM::QQQQPRRegClass; 1164 } 1165 return TargetLowering::getRegClassFor(VT); 1166 } 1167 1168 // Create a fast isel object. 1169 FastISel * 1170 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 1171 const TargetLibraryInfo *libInfo) const { 1172 return ARM::createFastISel(funcInfo, libInfo); 1173 } 1174 1175 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 1176 /// be used for loads / stores from the global. 1177 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1178 return (Subtarget->isThumb1Only() ? 127 : 4095); 1179 } 1180 1181 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1182 unsigned NumVals = N->getNumValues(); 1183 if (!NumVals) 1184 return Sched::RegPressure; 1185 1186 for (unsigned i = 0; i != NumVals; ++i) { 1187 EVT VT = N->getValueType(i); 1188 if (VT == MVT::Glue || VT == MVT::Other) 1189 continue; 1190 if (VT.isFloatingPoint() || VT.isVector()) 1191 return Sched::ILP; 1192 } 1193 1194 if (!N->isMachineOpcode()) 1195 return Sched::RegPressure; 1196 1197 // Load are scheduled for latency even if there instruction itinerary 1198 // is not available. 1199 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1200 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1201 1202 if (MCID.getNumDefs() == 0) 1203 return Sched::RegPressure; 1204 if (!Itins->isEmpty() && 1205 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1206 return Sched::ILP; 1207 1208 return Sched::RegPressure; 1209 } 1210 1211 //===----------------------------------------------------------------------===// 1212 // Lowering Code 1213 //===----------------------------------------------------------------------===// 1214 1215 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1216 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1217 switch (CC) { 1218 default: llvm_unreachable("Unknown condition code!"); 1219 case ISD::SETNE: return ARMCC::NE; 1220 case ISD::SETEQ: return ARMCC::EQ; 1221 case ISD::SETGT: return ARMCC::GT; 1222 case ISD::SETGE: return ARMCC::GE; 1223 case ISD::SETLT: return ARMCC::LT; 1224 case ISD::SETLE: return ARMCC::LE; 1225 case ISD::SETUGT: return ARMCC::HI; 1226 case ISD::SETUGE: return ARMCC::HS; 1227 case ISD::SETULT: return ARMCC::LO; 1228 case ISD::SETULE: return ARMCC::LS; 1229 } 1230 } 1231 1232 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1233 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1234 ARMCC::CondCodes &CondCode2) { 1235 CondCode2 = ARMCC::AL; 1236 switch (CC) { 1237 default: llvm_unreachable("Unknown FP condition!"); 1238 case ISD::SETEQ: 1239 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1240 case ISD::SETGT: 1241 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1242 case ISD::SETGE: 1243 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1244 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1245 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1246 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1247 case ISD::SETO: CondCode = ARMCC::VC; break; 1248 case ISD::SETUO: CondCode = ARMCC::VS; break; 1249 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1250 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1251 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1252 case ISD::SETLT: 1253 case ISD::SETULT: CondCode = ARMCC::LT; break; 1254 case ISD::SETLE: 1255 case ISD::SETULE: CondCode = ARMCC::LE; break; 1256 case ISD::SETNE: 1257 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1258 } 1259 } 1260 1261 //===----------------------------------------------------------------------===// 1262 // Calling Convention Implementation 1263 //===----------------------------------------------------------------------===// 1264 1265 #include "ARMGenCallingConv.inc" 1266 1267 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1268 /// given CallingConvention value. 1269 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1270 bool Return, 1271 bool isVarArg) const { 1272 switch (CC) { 1273 default: 1274 llvm_unreachable("Unsupported calling convention"); 1275 case CallingConv::Fast: 1276 if (Subtarget->hasVFP2() && !isVarArg) { 1277 if (!Subtarget->isAAPCS_ABI()) 1278 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1279 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1280 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1281 } 1282 // Fallthrough 1283 case CallingConv::C: { 1284 // Use target triple & subtarget features to do actual dispatch. 1285 if (!Subtarget->isAAPCS_ABI()) 1286 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1287 else if (Subtarget->hasVFP2() && 1288 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1289 !isVarArg) 1290 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1291 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1292 } 1293 case CallingConv::ARM_AAPCS_VFP: 1294 if (!isVarArg) 1295 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1296 // Fallthrough 1297 case CallingConv::ARM_AAPCS: 1298 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1299 case CallingConv::ARM_APCS: 1300 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1301 case CallingConv::GHC: 1302 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 1303 } 1304 } 1305 1306 /// LowerCallResult - Lower the result values of a call into the 1307 /// appropriate copies out of appropriate physical registers. 1308 SDValue 1309 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1310 CallingConv::ID CallConv, bool isVarArg, 1311 const SmallVectorImpl<ISD::InputArg> &Ins, 1312 SDLoc dl, SelectionDAG &DAG, 1313 SmallVectorImpl<SDValue> &InVals, 1314 bool isThisReturn, SDValue ThisVal) const { 1315 1316 // Assign locations to each value returned by this call. 1317 SmallVector<CCValAssign, 16> RVLocs; 1318 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1319 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1320 CCInfo.AnalyzeCallResult(Ins, 1321 CCAssignFnForNode(CallConv, /* Return*/ true, 1322 isVarArg)); 1323 1324 // Copy all of the result registers out of their specified physreg. 1325 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1326 CCValAssign VA = RVLocs[i]; 1327 1328 // Pass 'this' value directly from the argument to return value, to avoid 1329 // reg unit interference 1330 if (i == 0 && isThisReturn) { 1331 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && 1332 "unexpected return calling convention register assignment"); 1333 InVals.push_back(ThisVal); 1334 continue; 1335 } 1336 1337 SDValue Val; 1338 if (VA.needsCustom()) { 1339 // Handle f64 or half of a v2f64. 1340 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1341 InFlag); 1342 Chain = Lo.getValue(1); 1343 InFlag = Lo.getValue(2); 1344 VA = RVLocs[++i]; // skip ahead to next loc 1345 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1346 InFlag); 1347 Chain = Hi.getValue(1); 1348 InFlag = Hi.getValue(2); 1349 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1350 1351 if (VA.getLocVT() == MVT::v2f64) { 1352 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1353 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1354 DAG.getConstant(0, MVT::i32)); 1355 1356 VA = RVLocs[++i]; // skip ahead to next loc 1357 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1358 Chain = Lo.getValue(1); 1359 InFlag = Lo.getValue(2); 1360 VA = RVLocs[++i]; // skip ahead to next loc 1361 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1362 Chain = Hi.getValue(1); 1363 InFlag = Hi.getValue(2); 1364 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1365 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1366 DAG.getConstant(1, MVT::i32)); 1367 } 1368 } else { 1369 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1370 InFlag); 1371 Chain = Val.getValue(1); 1372 InFlag = Val.getValue(2); 1373 } 1374 1375 switch (VA.getLocInfo()) { 1376 default: llvm_unreachable("Unknown loc info!"); 1377 case CCValAssign::Full: break; 1378 case CCValAssign::BCvt: 1379 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1380 break; 1381 } 1382 1383 InVals.push_back(Val); 1384 } 1385 1386 return Chain; 1387 } 1388 1389 /// LowerMemOpCallTo - Store the argument to the stack. 1390 SDValue 1391 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1392 SDValue StackPtr, SDValue Arg, 1393 SDLoc dl, SelectionDAG &DAG, 1394 const CCValAssign &VA, 1395 ISD::ArgFlagsTy Flags) const { 1396 unsigned LocMemOffset = VA.getLocMemOffset(); 1397 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1398 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1399 return DAG.getStore(Chain, dl, Arg, PtrOff, 1400 MachinePointerInfo::getStack(LocMemOffset), 1401 false, false, 0); 1402 } 1403 1404 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, 1405 SDValue Chain, SDValue &Arg, 1406 RegsToPassVector &RegsToPass, 1407 CCValAssign &VA, CCValAssign &NextVA, 1408 SDValue &StackPtr, 1409 SmallVectorImpl<SDValue> &MemOpChains, 1410 ISD::ArgFlagsTy Flags) const { 1411 1412 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1413 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1414 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1415 1416 if (NextVA.isRegLoc()) 1417 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1418 else { 1419 assert(NextVA.isMemLoc()); 1420 if (StackPtr.getNode() == 0) 1421 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1422 1423 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1424 dl, DAG, NextVA, 1425 Flags)); 1426 } 1427 } 1428 1429 /// LowerCall - Lowering a call into a callseq_start <- 1430 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1431 /// nodes. 1432 SDValue 1433 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1434 SmallVectorImpl<SDValue> &InVals) const { 1435 SelectionDAG &DAG = CLI.DAG; 1436 SDLoc &dl = CLI.DL; 1437 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1438 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1439 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1440 SDValue Chain = CLI.Chain; 1441 SDValue Callee = CLI.Callee; 1442 bool &isTailCall = CLI.IsTailCall; 1443 CallingConv::ID CallConv = CLI.CallConv; 1444 bool doesNotRet = CLI.DoesNotReturn; 1445 bool isVarArg = CLI.IsVarArg; 1446 1447 MachineFunction &MF = DAG.getMachineFunction(); 1448 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1449 bool isThisReturn = false; 1450 bool isSibCall = false; 1451 // Disable tail calls if they're not supported. 1452 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1453 isTailCall = false; 1454 if (isTailCall) { 1455 // Check if it's really possible to do a tail call. 1456 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1457 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), 1458 Outs, OutVals, Ins, DAG); 1459 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1460 // detected sibcalls. 1461 if (isTailCall) { 1462 ++NumTailCalls; 1463 isSibCall = true; 1464 } 1465 } 1466 1467 // Analyze operands of the call, assigning locations to each operand. 1468 SmallVector<CCValAssign, 16> ArgLocs; 1469 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1470 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1471 CCInfo.AnalyzeCallOperands(Outs, 1472 CCAssignFnForNode(CallConv, /* Return*/ false, 1473 isVarArg)); 1474 1475 // Get a count of how many bytes are to be pushed on the stack. 1476 unsigned NumBytes = CCInfo.getNextStackOffset(); 1477 1478 // For tail calls, memory operands are available in our caller's stack. 1479 if (isSibCall) 1480 NumBytes = 0; 1481 1482 // Adjust the stack pointer for the new arguments... 1483 // These operations are automatically eliminated by the prolog/epilog pass 1484 if (!isSibCall) 1485 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 1486 dl); 1487 1488 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1489 1490 RegsToPassVector RegsToPass; 1491 SmallVector<SDValue, 8> MemOpChains; 1492 1493 // Walk the register/memloc assignments, inserting copies/loads. In the case 1494 // of tail call optimization, arguments are handled later. 1495 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1496 i != e; 1497 ++i, ++realArgIdx) { 1498 CCValAssign &VA = ArgLocs[i]; 1499 SDValue Arg = OutVals[realArgIdx]; 1500 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1501 bool isByVal = Flags.isByVal(); 1502 1503 // Promote the value if needed. 1504 switch (VA.getLocInfo()) { 1505 default: llvm_unreachable("Unknown loc info!"); 1506 case CCValAssign::Full: break; 1507 case CCValAssign::SExt: 1508 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1509 break; 1510 case CCValAssign::ZExt: 1511 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1512 break; 1513 case CCValAssign::AExt: 1514 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1515 break; 1516 case CCValAssign::BCvt: 1517 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1518 break; 1519 } 1520 1521 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1522 if (VA.needsCustom()) { 1523 if (VA.getLocVT() == MVT::v2f64) { 1524 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1525 DAG.getConstant(0, MVT::i32)); 1526 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1527 DAG.getConstant(1, MVT::i32)); 1528 1529 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1530 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1531 1532 VA = ArgLocs[++i]; // skip ahead to next loc 1533 if (VA.isRegLoc()) { 1534 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1535 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1536 } else { 1537 assert(VA.isMemLoc()); 1538 1539 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1540 dl, DAG, VA, Flags)); 1541 } 1542 } else { 1543 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1544 StackPtr, MemOpChains, Flags); 1545 } 1546 } else if (VA.isRegLoc()) { 1547 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { 1548 assert(VA.getLocVT() == MVT::i32 && 1549 "unexpected calling convention register assignment"); 1550 assert(!Ins.empty() && Ins[0].VT == MVT::i32 && 1551 "unexpected use of 'returned'"); 1552 isThisReturn = true; 1553 } 1554 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1555 } else if (isByVal) { 1556 assert(VA.isMemLoc()); 1557 unsigned offset = 0; 1558 1559 // True if this byval aggregate will be split between registers 1560 // and memory. 1561 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); 1562 unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); 1563 1564 if (CurByValIdx < ByValArgsCount) { 1565 1566 unsigned RegBegin, RegEnd; 1567 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); 1568 1569 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1570 unsigned int i, j; 1571 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { 1572 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1573 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1574 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1575 MachinePointerInfo(), 1576 false, false, false, 1577 DAG.InferPtrAlignment(AddArg)); 1578 MemOpChains.push_back(Load.getValue(1)); 1579 RegsToPass.push_back(std::make_pair(j, Load)); 1580 } 1581 1582 // If parameter size outsides register area, "offset" value 1583 // helps us to calculate stack slot for remained part properly. 1584 offset = RegEnd - RegBegin; 1585 1586 CCInfo.nextInRegsParam(); 1587 } 1588 1589 if (Flags.getByValSize() > 4*offset) { 1590 unsigned LocMemOffset = VA.getLocMemOffset(); 1591 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1592 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1593 StkPtrOff); 1594 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1595 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1596 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1597 MVT::i32); 1598 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 1599 1600 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 1601 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 1602 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 1603 Ops, array_lengthof(Ops))); 1604 } 1605 } else if (!isSibCall) { 1606 assert(VA.isMemLoc()); 1607 1608 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1609 dl, DAG, VA, Flags)); 1610 } 1611 } 1612 1613 if (!MemOpChains.empty()) 1614 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1615 &MemOpChains[0], MemOpChains.size()); 1616 1617 // Build a sequence of copy-to-reg nodes chained together with token chain 1618 // and flag operands which copy the outgoing args into the appropriate regs. 1619 SDValue InFlag; 1620 // Tail call byval lowering might overwrite argument registers so in case of 1621 // tail call optimization the copies to registers are lowered later. 1622 if (!isTailCall) 1623 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1624 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1625 RegsToPass[i].second, InFlag); 1626 InFlag = Chain.getValue(1); 1627 } 1628 1629 // For tail calls lower the arguments to the 'real' stack slot. 1630 if (isTailCall) { 1631 // Force all the incoming stack arguments to be loaded from the stack 1632 // before any new outgoing arguments are stored to the stack, because the 1633 // outgoing stack slots may alias the incoming argument stack slots, and 1634 // the alias isn't otherwise explicit. This is slightly more conservative 1635 // than necessary, because it means that each store effectively depends 1636 // on every argument instead of just those arguments it would clobber. 1637 1638 // Do not flag preceding copytoreg stuff together with the following stuff. 1639 InFlag = SDValue(); 1640 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1641 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1642 RegsToPass[i].second, InFlag); 1643 InFlag = Chain.getValue(1); 1644 } 1645 InFlag = SDValue(); 1646 } 1647 1648 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1649 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1650 // node so that legalize doesn't hack it. 1651 bool isDirect = false; 1652 bool isARMFunc = false; 1653 bool isLocalARMFunc = false; 1654 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1655 1656 if (EnableARMLongCalls) { 1657 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1658 && "long-calls with non-static relocation model!"); 1659 // Handle a global address or an external symbol. If it's not one of 1660 // those, the target's already in a register, so we don't need to do 1661 // anything extra. 1662 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1663 const GlobalValue *GV = G->getGlobal(); 1664 // Create a constant pool entry for the callee address 1665 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1666 ARMConstantPoolValue *CPV = 1667 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1668 1669 // Get the address of the callee into a register 1670 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1671 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1672 Callee = DAG.getLoad(getPointerTy(), dl, 1673 DAG.getEntryNode(), CPAddr, 1674 MachinePointerInfo::getConstantPool(), 1675 false, false, false, 0); 1676 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1677 const char *Sym = S->getSymbol(); 1678 1679 // Create a constant pool entry for the callee address 1680 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1681 ARMConstantPoolValue *CPV = 1682 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1683 ARMPCLabelIndex, 0); 1684 // Get the address of the callee into a register 1685 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1686 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1687 Callee = DAG.getLoad(getPointerTy(), dl, 1688 DAG.getEntryNode(), CPAddr, 1689 MachinePointerInfo::getConstantPool(), 1690 false, false, false, 0); 1691 } 1692 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1693 const GlobalValue *GV = G->getGlobal(); 1694 isDirect = true; 1695 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1696 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1697 getTargetMachine().getRelocationModel() != Reloc::Static; 1698 isARMFunc = !Subtarget->isThumb() || isStub; 1699 // ARM call to a local ARM function is predicable. 1700 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1701 // tBX takes a register source operand. 1702 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1703 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1704 ARMConstantPoolValue *CPV = 1705 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1706 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1707 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1708 Callee = DAG.getLoad(getPointerTy(), dl, 1709 DAG.getEntryNode(), CPAddr, 1710 MachinePointerInfo::getConstantPool(), 1711 false, false, false, 0); 1712 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1713 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1714 getPointerTy(), Callee, PICLabel); 1715 } else { 1716 // On ELF targets for PIC code, direct calls should go through the PLT 1717 unsigned OpFlags = 0; 1718 if (Subtarget->isTargetELF() && 1719 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1720 OpFlags = ARMII::MO_PLT; 1721 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1722 } 1723 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1724 isDirect = true; 1725 bool isStub = Subtarget->isTargetDarwin() && 1726 getTargetMachine().getRelocationModel() != Reloc::Static; 1727 isARMFunc = !Subtarget->isThumb() || isStub; 1728 // tBX takes a register source operand. 1729 const char *Sym = S->getSymbol(); 1730 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1731 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1732 ARMConstantPoolValue *CPV = 1733 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1734 ARMPCLabelIndex, 4); 1735 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1736 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1737 Callee = DAG.getLoad(getPointerTy(), dl, 1738 DAG.getEntryNode(), CPAddr, 1739 MachinePointerInfo::getConstantPool(), 1740 false, false, false, 0); 1741 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1742 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1743 getPointerTy(), Callee, PICLabel); 1744 } else { 1745 unsigned OpFlags = 0; 1746 // On ELF targets for PIC code, direct calls should go through the PLT 1747 if (Subtarget->isTargetELF() && 1748 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1749 OpFlags = ARMII::MO_PLT; 1750 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1751 } 1752 } 1753 1754 // FIXME: handle tail calls differently. 1755 unsigned CallOpc; 1756 bool HasMinSizeAttr = MF.getFunction()->getAttributes(). 1757 hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 1758 if (Subtarget->isThumb()) { 1759 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1760 CallOpc = ARMISD::CALL_NOLINK; 1761 else 1762 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1763 } else { 1764 if (!isDirect && !Subtarget->hasV5TOps()) 1765 CallOpc = ARMISD::CALL_NOLINK; 1766 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 1767 // Emit regular call when code size is the priority 1768 !HasMinSizeAttr) 1769 // "mov lr, pc; b _foo" to avoid confusing the RSP 1770 CallOpc = ARMISD::CALL_NOLINK; 1771 else 1772 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1773 } 1774 1775 std::vector<SDValue> Ops; 1776 Ops.push_back(Chain); 1777 Ops.push_back(Callee); 1778 1779 // Add argument registers to the end of the list so that they are known live 1780 // into the call. 1781 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1782 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1783 RegsToPass[i].second.getValueType())); 1784 1785 // Add a register mask operand representing the call-preserved registers. 1786 if (!isTailCall) { 1787 const uint32_t *Mask; 1788 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 1789 const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); 1790 if (isThisReturn) { 1791 // For 'this' returns, use the R0-preserving mask if applicable 1792 Mask = ARI->getThisReturnPreservedMask(CallConv); 1793 if (!Mask) { 1794 // Set isThisReturn to false if the calling convention is not one that 1795 // allows 'returned' to be modeled in this way, so LowerCallResult does 1796 // not try to pass 'this' straight through 1797 isThisReturn = false; 1798 Mask = ARI->getCallPreservedMask(CallConv); 1799 } 1800 } else 1801 Mask = ARI->getCallPreservedMask(CallConv); 1802 1803 assert(Mask && "Missing call preserved mask for calling convention"); 1804 Ops.push_back(DAG.getRegisterMask(Mask)); 1805 } 1806 1807 if (InFlag.getNode()) 1808 Ops.push_back(InFlag); 1809 1810 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1811 if (isTailCall) 1812 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1813 1814 // Returns a chain and a flag for retval copy to use. 1815 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1816 InFlag = Chain.getValue(1); 1817 1818 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1819 DAG.getIntPtrConstant(0, true), InFlag, dl); 1820 if (!Ins.empty()) 1821 InFlag = Chain.getValue(1); 1822 1823 // Handle result values, copying them out of physregs into vregs that we 1824 // return. 1825 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1826 InVals, isThisReturn, 1827 isThisReturn ? OutVals[0] : SDValue()); 1828 } 1829 1830 /// HandleByVal - Every parameter *after* a byval parameter is passed 1831 /// on the stack. Remember the next parameter register to allocate, 1832 /// and then confiscate the rest of the parameter registers to insure 1833 /// this. 1834 void 1835 ARMTargetLowering::HandleByVal( 1836 CCState *State, unsigned &size, unsigned Align) const { 1837 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1838 assert((State->getCallOrPrologue() == Prologue || 1839 State->getCallOrPrologue() == Call) && 1840 "unhandled ParmContext"); 1841 1842 // For in-prologue parameters handling, we also introduce stack offset 1843 // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. 1844 // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how 1845 // NSAA should be evaluted (NSAA means "next stacked argument address"). 1846 // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. 1847 // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. 1848 unsigned NSAAOffset = State->getNextStackOffset(); 1849 if (State->getCallOrPrologue() != Call) { 1850 for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { 1851 unsigned RB, RE; 1852 State->getInRegsParamInfo(i, RB, RE); 1853 assert(NSAAOffset >= (RE-RB)*4 && 1854 "Stack offset for byval regs doesn't introduced anymore?"); 1855 NSAAOffset -= (RE-RB)*4; 1856 } 1857 } 1858 if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { 1859 if (Subtarget->isAAPCS_ABI() && Align > 4) { 1860 unsigned AlignInRegs = Align / 4; 1861 unsigned Waste = (ARM::R4 - reg) % AlignInRegs; 1862 for (unsigned i = 0; i < Waste; ++i) 1863 reg = State->AllocateReg(GPRArgRegs, 4); 1864 } 1865 if (reg != 0) { 1866 unsigned excess = 4 * (ARM::R4 - reg); 1867 1868 // Special case when NSAA != SP and parameter size greater than size of 1869 // all remained GPR regs. In that case we can't split parameter, we must 1870 // send it to stack. We also must set NCRN to R4, so waste all 1871 // remained registers. 1872 if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { 1873 while (State->AllocateReg(GPRArgRegs, 4)) 1874 ; 1875 return; 1876 } 1877 1878 // First register for byval parameter is the first register that wasn't 1879 // allocated before this method call, so it would be "reg". 1880 // If parameter is small enough to be saved in range [reg, r4), then 1881 // the end (first after last) register would be reg + param-size-in-regs, 1882 // else parameter would be splitted between registers and stack, 1883 // end register would be r4 in this case. 1884 unsigned ByValRegBegin = reg; 1885 unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; 1886 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); 1887 // Note, first register is allocated in the beginning of function already, 1888 // allocate remained amount of registers we need. 1889 for (unsigned i = reg+1; i != ByValRegEnd; ++i) 1890 State->AllocateReg(GPRArgRegs, 4); 1891 // At a call site, a byval parameter that is split between 1892 // registers and memory needs its size truncated here. In a 1893 // function prologue, such byval parameters are reassembled in 1894 // memory, and are not truncated. 1895 if (State->getCallOrPrologue() == Call) { 1896 // Make remained size equal to 0 in case, when 1897 // the whole structure may be stored into registers. 1898 if (size < excess) 1899 size = 0; 1900 else 1901 size -= excess; 1902 } 1903 } 1904 } 1905 } 1906 1907 /// MatchingStackOffset - Return true if the given stack call argument is 1908 /// already available in the same position (relatively) of the caller's 1909 /// incoming argument stack. 1910 static 1911 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1912 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1913 const TargetInstrInfo *TII) { 1914 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1915 int FI = INT_MAX; 1916 if (Arg.getOpcode() == ISD::CopyFromReg) { 1917 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1918 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1919 return false; 1920 MachineInstr *Def = MRI->getVRegDef(VR); 1921 if (!Def) 1922 return false; 1923 if (!Flags.isByVal()) { 1924 if (!TII->isLoadFromStackSlot(Def, FI)) 1925 return false; 1926 } else { 1927 return false; 1928 } 1929 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1930 if (Flags.isByVal()) 1931 // ByVal argument is passed in as a pointer but it's now being 1932 // dereferenced. e.g. 1933 // define @foo(%struct.X* %A) { 1934 // tail call @bar(%struct.X* byval %A) 1935 // } 1936 return false; 1937 SDValue Ptr = Ld->getBasePtr(); 1938 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1939 if (!FINode) 1940 return false; 1941 FI = FINode->getIndex(); 1942 } else 1943 return false; 1944 1945 assert(FI != INT_MAX); 1946 if (!MFI->isFixedObjectIndex(FI)) 1947 return false; 1948 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1949 } 1950 1951 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1952 /// for tail call optimization. Targets which want to do tail call 1953 /// optimization should implement this function. 1954 bool 1955 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1956 CallingConv::ID CalleeCC, 1957 bool isVarArg, 1958 bool isCalleeStructRet, 1959 bool isCallerStructRet, 1960 const SmallVectorImpl<ISD::OutputArg> &Outs, 1961 const SmallVectorImpl<SDValue> &OutVals, 1962 const SmallVectorImpl<ISD::InputArg> &Ins, 1963 SelectionDAG& DAG) const { 1964 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1965 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1966 bool CCMatch = CallerCC == CalleeCC; 1967 1968 // Look for obvious safe cases to perform tail call optimization that do not 1969 // require ABI changes. This is what gcc calls sibcall. 1970 1971 // Do not sibcall optimize vararg calls unless the call site is not passing 1972 // any arguments. 1973 if (isVarArg && !Outs.empty()) 1974 return false; 1975 1976 // Exception-handling functions need a special set of instructions to indicate 1977 // a return to the hardware. Tail-calling another function would probably 1978 // break this. 1979 if (CallerF->hasFnAttribute("interrupt")) 1980 return false; 1981 1982 // Also avoid sibcall optimization if either caller or callee uses struct 1983 // return semantics. 1984 if (isCalleeStructRet || isCallerStructRet) 1985 return false; 1986 1987 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1988 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1989 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1990 // support in the assembler and linker to be used. This would need to be 1991 // fixed to fully support tail calls in Thumb1. 1992 // 1993 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1994 // LR. This means if we need to reload LR, it takes an extra instructions, 1995 // which outweighs the value of the tail call; but here we don't know yet 1996 // whether LR is going to be used. Probably the right approach is to 1997 // generate the tail call here and turn it back into CALL/RET in 1998 // emitEpilogue if LR is used. 1999 2000 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 2001 // but we need to make sure there are enough registers; the only valid 2002 // registers are the 4 used for parameters. We don't currently do this 2003 // case. 2004 if (Subtarget->isThumb1Only()) 2005 return false; 2006 2007 // If the calling conventions do not match, then we'd better make sure the 2008 // results are returned in the same way as what the caller expects. 2009 if (!CCMatch) { 2010 SmallVector<CCValAssign, 16> RVLocs1; 2011 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 2012 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 2013 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 2014 2015 SmallVector<CCValAssign, 16> RVLocs2; 2016 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 2017 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 2018 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 2019 2020 if (RVLocs1.size() != RVLocs2.size()) 2021 return false; 2022 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 2023 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 2024 return false; 2025 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 2026 return false; 2027 if (RVLocs1[i].isRegLoc()) { 2028 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 2029 return false; 2030 } else { 2031 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 2032 return false; 2033 } 2034 } 2035 } 2036 2037 // If Caller's vararg or byval argument has been split between registers and 2038 // stack, do not perform tail call, since part of the argument is in caller's 2039 // local frame. 2040 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 2041 getInfo<ARMFunctionInfo>(); 2042 if (AFI_Caller->getArgRegsSaveSize()) 2043 return false; 2044 2045 // If the callee takes no arguments then go on to check the results of the 2046 // call. 2047 if (!Outs.empty()) { 2048 // Check if stack adjustment is needed. For now, do not do this if any 2049 // argument is passed on the stack. 2050 SmallVector<CCValAssign, 16> ArgLocs; 2051 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 2052 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 2053 CCInfo.AnalyzeCallOperands(Outs, 2054 CCAssignFnForNode(CalleeCC, false, isVarArg)); 2055 if (CCInfo.getNextStackOffset()) { 2056 MachineFunction &MF = DAG.getMachineFunction(); 2057 2058 // Check if the arguments are already laid out in the right way as 2059 // the caller's fixed stack objects. 2060 MachineFrameInfo *MFI = MF.getFrameInfo(); 2061 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 2062 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 2063 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 2064 i != e; 2065 ++i, ++realArgIdx) { 2066 CCValAssign &VA = ArgLocs[i]; 2067 EVT RegVT = VA.getLocVT(); 2068 SDValue Arg = OutVals[realArgIdx]; 2069 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 2070 if (VA.getLocInfo() == CCValAssign::Indirect) 2071 return false; 2072 if (VA.needsCustom()) { 2073 // f64 and vector types are split into multiple registers or 2074 // register/stack-slot combinations. The types will not match 2075 // the registers; give up on memory f64 refs until we figure 2076 // out what to do about this. 2077 if (!VA.isRegLoc()) 2078 return false; 2079 if (!ArgLocs[++i].isRegLoc()) 2080 return false; 2081 if (RegVT == MVT::v2f64) { 2082 if (!ArgLocs[++i].isRegLoc()) 2083 return false; 2084 if (!ArgLocs[++i].isRegLoc()) 2085 return false; 2086 } 2087 } else if (!VA.isRegLoc()) { 2088 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 2089 MFI, MRI, TII)) 2090 return false; 2091 } 2092 } 2093 } 2094 } 2095 2096 return true; 2097 } 2098 2099 bool 2100 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 2101 MachineFunction &MF, bool isVarArg, 2102 const SmallVectorImpl<ISD::OutputArg> &Outs, 2103 LLVMContext &Context) const { 2104 SmallVector<CCValAssign, 16> RVLocs; 2105 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); 2106 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 2107 isVarArg)); 2108 } 2109 2110 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, 2111 SDLoc DL, SelectionDAG &DAG) { 2112 const MachineFunction &MF = DAG.getMachineFunction(); 2113 const Function *F = MF.getFunction(); 2114 2115 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); 2116 2117 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset 2118 // version of the "preferred return address". These offsets affect the return 2119 // instruction if this is a return from PL1 without hypervisor extensions. 2120 // IRQ/FIQ: +4 "subs pc, lr, #4" 2121 // SWI: 0 "subs pc, lr, #0" 2122 // ABORT: +4 "subs pc, lr, #4" 2123 // UNDEF: +4/+2 "subs pc, lr, #0" 2124 // UNDEF varies depending on where the exception came from ARM or Thumb 2125 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. 2126 2127 int64_t LROffset; 2128 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || 2129 IntKind == "ABORT") 2130 LROffset = 4; 2131 else if (IntKind == "SWI" || IntKind == "UNDEF") 2132 LROffset = 0; 2133 else 2134 report_fatal_error("Unsupported interrupt attribute. If present, value " 2135 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); 2136 2137 RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); 2138 2139 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, 2140 RetOps.data(), RetOps.size()); 2141 } 2142 2143 SDValue 2144 ARMTargetLowering::LowerReturn(SDValue Chain, 2145 CallingConv::ID CallConv, bool isVarArg, 2146 const SmallVectorImpl<ISD::OutputArg> &Outs, 2147 const SmallVectorImpl<SDValue> &OutVals, 2148 SDLoc dl, SelectionDAG &DAG) const { 2149 2150 // CCValAssign - represent the assignment of the return value to a location. 2151 SmallVector<CCValAssign, 16> RVLocs; 2152 2153 // CCState - Info about the registers and stack slots. 2154 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2155 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 2156 2157 // Analyze outgoing return values. 2158 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 2159 isVarArg)); 2160 2161 SDValue Flag; 2162 SmallVector<SDValue, 4> RetOps; 2163 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 2164 2165 // Copy the result values into the output registers. 2166 for (unsigned i = 0, realRVLocIdx = 0; 2167 i != RVLocs.size(); 2168 ++i, ++realRVLocIdx) { 2169 CCValAssign &VA = RVLocs[i]; 2170 assert(VA.isRegLoc() && "Can only return in registers!"); 2171 2172 SDValue Arg = OutVals[realRVLocIdx]; 2173 2174 switch (VA.getLocInfo()) { 2175 default: llvm_unreachable("Unknown loc info!"); 2176 case CCValAssign::Full: break; 2177 case CCValAssign::BCvt: 2178 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 2179 break; 2180 } 2181 2182 if (VA.needsCustom()) { 2183 if (VA.getLocVT() == MVT::v2f64) { 2184 // Extract the first half and return it in two registers. 2185 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2186 DAG.getConstant(0, MVT::i32)); 2187 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 2188 DAG.getVTList(MVT::i32, MVT::i32), Half); 2189 2190 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 2191 Flag = Chain.getValue(1); 2192 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2193 VA = RVLocs[++i]; // skip ahead to next loc 2194 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2195 HalfGPRs.getValue(1), Flag); 2196 Flag = Chain.getValue(1); 2197 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2198 VA = RVLocs[++i]; // skip ahead to next loc 2199 2200 // Extract the 2nd half and fall through to handle it as an f64 value. 2201 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2202 DAG.getConstant(1, MVT::i32)); 2203 } 2204 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 2205 // available. 2206 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 2207 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 2208 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 2209 Flag = Chain.getValue(1); 2210 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2211 VA = RVLocs[++i]; // skip ahead to next loc 2212 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 2213 Flag); 2214 } else 2215 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 2216 2217 // Guarantee that all emitted copies are 2218 // stuck together, avoiding something bad. 2219 Flag = Chain.getValue(1); 2220 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2221 } 2222 2223 // Update chain and glue. 2224 RetOps[0] = Chain; 2225 if (Flag.getNode()) 2226 RetOps.push_back(Flag); 2227 2228 // CPUs which aren't M-class use a special sequence to return from 2229 // exceptions (roughly, any instruction setting pc and cpsr simultaneously, 2230 // though we use "subs pc, lr, #N"). 2231 // 2232 // M-class CPUs actually use a normal return sequence with a special 2233 // (hardware-provided) value in LR, so the normal code path works. 2234 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && 2235 !Subtarget->isMClass()) { 2236 if (Subtarget->isThumb1Only()) 2237 report_fatal_error("interrupt attribute is not supported in Thumb1"); 2238 return LowerInterruptReturn(RetOps, dl, DAG); 2239 } 2240 2241 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, 2242 RetOps.data(), RetOps.size()); 2243 } 2244 2245 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 2246 if (N->getNumValues() != 1) 2247 return false; 2248 if (!N->hasNUsesOfValue(1, 0)) 2249 return false; 2250 2251 SDValue TCChain = Chain; 2252 SDNode *Copy = *N->use_begin(); 2253 if (Copy->getOpcode() == ISD::CopyToReg) { 2254 // If the copy has a glue operand, we conservatively assume it isn't safe to 2255 // perform a tail call. 2256 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2257 return false; 2258 TCChain = Copy->getOperand(0); 2259 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 2260 SDNode *VMov = Copy; 2261 // f64 returned in a pair of GPRs. 2262 SmallPtrSet<SDNode*, 2> Copies; 2263 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2264 UI != UE; ++UI) { 2265 if (UI->getOpcode() != ISD::CopyToReg) 2266 return false; 2267 Copies.insert(*UI); 2268 } 2269 if (Copies.size() > 2) 2270 return false; 2271 2272 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2273 UI != UE; ++UI) { 2274 SDValue UseChain = UI->getOperand(0); 2275 if (Copies.count(UseChain.getNode())) 2276 // Second CopyToReg 2277 Copy = *UI; 2278 else 2279 // First CopyToReg 2280 TCChain = UseChain; 2281 } 2282 } else if (Copy->getOpcode() == ISD::BITCAST) { 2283 // f32 returned in a single GPR. 2284 if (!Copy->hasOneUse()) 2285 return false; 2286 Copy = *Copy->use_begin(); 2287 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 2288 return false; 2289 TCChain = Copy->getOperand(0); 2290 } else { 2291 return false; 2292 } 2293 2294 bool HasRet = false; 2295 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 2296 UI != UE; ++UI) { 2297 if (UI->getOpcode() != ARMISD::RET_FLAG && 2298 UI->getOpcode() != ARMISD::INTRET_FLAG) 2299 return false; 2300 HasRet = true; 2301 } 2302 2303 if (!HasRet) 2304 return false; 2305 2306 Chain = TCChain; 2307 return true; 2308 } 2309 2310 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2311 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 2312 return false; 2313 2314 if (!CI->isTailCall()) 2315 return false; 2316 2317 return !Subtarget->isThumb1Only(); 2318 } 2319 2320 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2321 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2322 // one of the above mentioned nodes. It has to be wrapped because otherwise 2323 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2324 // be used to form addressing mode. These wrapped nodes will be selected 2325 // into MOVi. 2326 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2327 EVT PtrVT = Op.getValueType(); 2328 // FIXME there is no actual debug info here 2329 SDLoc dl(Op); 2330 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2331 SDValue Res; 2332 if (CP->isMachineConstantPoolEntry()) 2333 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2334 CP->getAlignment()); 2335 else 2336 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2337 CP->getAlignment()); 2338 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2339 } 2340 2341 unsigned ARMTargetLowering::getJumpTableEncoding() const { 2342 return MachineJumpTableInfo::EK_Inline; 2343 } 2344 2345 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2346 SelectionDAG &DAG) const { 2347 MachineFunction &MF = DAG.getMachineFunction(); 2348 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2349 unsigned ARMPCLabelIndex = 0; 2350 SDLoc DL(Op); 2351 EVT PtrVT = getPointerTy(); 2352 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2353 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2354 SDValue CPAddr; 2355 if (RelocM == Reloc::Static) { 2356 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2357 } else { 2358 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2359 ARMPCLabelIndex = AFI->createPICLabelUId(); 2360 ARMConstantPoolValue *CPV = 2361 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2362 ARMCP::CPBlockAddress, PCAdj); 2363 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2364 } 2365 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2366 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2367 MachinePointerInfo::getConstantPool(), 2368 false, false, false, 0); 2369 if (RelocM == Reloc::Static) 2370 return Result; 2371 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2372 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2373 } 2374 2375 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 2376 SDValue 2377 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2378 SelectionDAG &DAG) const { 2379 SDLoc dl(GA); 2380 EVT PtrVT = getPointerTy(); 2381 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2382 MachineFunction &MF = DAG.getMachineFunction(); 2383 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2384 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2385 ARMConstantPoolValue *CPV = 2386 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2387 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2388 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2389 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2390 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2391 MachinePointerInfo::getConstantPool(), 2392 false, false, false, 0); 2393 SDValue Chain = Argument.getValue(1); 2394 2395 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2396 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2397 2398 // call __tls_get_addr. 2399 ArgListTy Args; 2400 ArgListEntry Entry; 2401 Entry.Node = Argument; 2402 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2403 Args.push_back(Entry); 2404 // FIXME: is there useful debug info available here? 2405 TargetLowering::CallLoweringInfo CLI(Chain, 2406 (Type *) Type::getInt32Ty(*DAG.getContext()), 2407 false, false, false, false, 2408 0, CallingConv::C, /*isTailCall=*/false, 2409 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 2410 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2411 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2412 return CallResult.first; 2413 } 2414 2415 // Lower ISD::GlobalTLSAddress using the "initial exec" or 2416 // "local exec" model. 2417 SDValue 2418 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2419 SelectionDAG &DAG, 2420 TLSModel::Model model) const { 2421 const GlobalValue *GV = GA->getGlobal(); 2422 SDLoc dl(GA); 2423 SDValue Offset; 2424 SDValue Chain = DAG.getEntryNode(); 2425 EVT PtrVT = getPointerTy(); 2426 // Get the Thread Pointer 2427 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2428 2429 if (model == TLSModel::InitialExec) { 2430 MachineFunction &MF = DAG.getMachineFunction(); 2431 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2432 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2433 // Initial exec model. 2434 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2435 ARMConstantPoolValue *CPV = 2436 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2437 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2438 true); 2439 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2440 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2441 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2442 MachinePointerInfo::getConstantPool(), 2443 false, false, false, 0); 2444 Chain = Offset.getValue(1); 2445 2446 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2447 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2448 2449 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2450 MachinePointerInfo::getConstantPool(), 2451 false, false, false, 0); 2452 } else { 2453 // local exec model 2454 assert(model == TLSModel::LocalExec); 2455 ARMConstantPoolValue *CPV = 2456 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2457 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2458 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2459 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2460 MachinePointerInfo::getConstantPool(), 2461 false, false, false, 0); 2462 } 2463 2464 // The address of the thread local variable is the add of the thread 2465 // pointer with the offset of the variable. 2466 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2467 } 2468 2469 SDValue 2470 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2471 // TODO: implement the "local dynamic" model 2472 assert(Subtarget->isTargetELF() && 2473 "TLS not implemented for non-ELF targets"); 2474 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2475 2476 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 2477 2478 switch (model) { 2479 case TLSModel::GeneralDynamic: 2480 case TLSModel::LocalDynamic: 2481 return LowerToTLSGeneralDynamicModel(GA, DAG); 2482 case TLSModel::InitialExec: 2483 case TLSModel::LocalExec: 2484 return LowerToTLSExecModels(GA, DAG, model); 2485 } 2486 llvm_unreachable("bogus TLS model"); 2487 } 2488 2489 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2490 SelectionDAG &DAG) const { 2491 EVT PtrVT = getPointerTy(); 2492 SDLoc dl(Op); 2493 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2494 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2495 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2496 ARMConstantPoolValue *CPV = 2497 ARMConstantPoolConstant::Create(GV, 2498 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2499 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2500 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2501 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2502 CPAddr, 2503 MachinePointerInfo::getConstantPool(), 2504 false, false, false, 0); 2505 SDValue Chain = Result.getValue(1); 2506 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2507 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2508 if (!UseGOTOFF) 2509 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2510 MachinePointerInfo::getGOT(), 2511 false, false, false, 0); 2512 return Result; 2513 } 2514 2515 // If we have T2 ops, we can materialize the address directly via movt/movw 2516 // pair. This is always cheaper. 2517 if (Subtarget->useMovt()) { 2518 ++NumMovwMovt; 2519 // FIXME: Once remat is capable of dealing with instructions with register 2520 // operands, expand this into two nodes. 2521 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2522 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2523 } else { 2524 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2525 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2526 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2527 MachinePointerInfo::getConstantPool(), 2528 false, false, false, 0); 2529 } 2530 } 2531 2532 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2533 SelectionDAG &DAG) const { 2534 EVT PtrVT = getPointerTy(); 2535 SDLoc dl(Op); 2536 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2537 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2538 2539 // FIXME: Enable this for static codegen when tool issues are fixed. Also 2540 // update ARMFastISel::ARMMaterializeGV. 2541 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2542 ++NumMovwMovt; 2543 // FIXME: Once remat is capable of dealing with instructions with register 2544 // operands, expand this into two nodes. 2545 if (RelocM == Reloc::Static) 2546 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2547 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2548 2549 unsigned Wrapper = (RelocM == Reloc::PIC_) 2550 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2551 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2552 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2553 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2554 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2555 MachinePointerInfo::getGOT(), 2556 false, false, false, 0); 2557 return Result; 2558 } 2559 2560 unsigned ARMPCLabelIndex = 0; 2561 SDValue CPAddr; 2562 if (RelocM == Reloc::Static) { 2563 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2564 } else { 2565 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2566 ARMPCLabelIndex = AFI->createPICLabelUId(); 2567 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2568 ARMConstantPoolValue *CPV = 2569 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2570 PCAdj); 2571 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2572 } 2573 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2574 2575 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2576 MachinePointerInfo::getConstantPool(), 2577 false, false, false, 0); 2578 SDValue Chain = Result.getValue(1); 2579 2580 if (RelocM == Reloc::PIC_) { 2581 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2582 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2583 } 2584 2585 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2586 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2587 false, false, false, 0); 2588 2589 return Result; 2590 } 2591 2592 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2593 SelectionDAG &DAG) const { 2594 assert(Subtarget->isTargetELF() && 2595 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2596 MachineFunction &MF = DAG.getMachineFunction(); 2597 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2598 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2599 EVT PtrVT = getPointerTy(); 2600 SDLoc dl(Op); 2601 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2602 ARMConstantPoolValue *CPV = 2603 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2604 ARMPCLabelIndex, PCAdj); 2605 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2606 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2607 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2608 MachinePointerInfo::getConstantPool(), 2609 false, false, false, 0); 2610 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2611 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2612 } 2613 2614 SDValue 2615 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2616 SDLoc dl(Op); 2617 SDValue Val = DAG.getConstant(0, MVT::i32); 2618 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2619 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2620 Op.getOperand(1), Val); 2621 } 2622 2623 SDValue 2624 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2625 SDLoc dl(Op); 2626 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2627 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2628 } 2629 2630 SDValue 2631 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2632 const ARMSubtarget *Subtarget) const { 2633 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2634 SDLoc dl(Op); 2635 switch (IntNo) { 2636 default: return SDValue(); // Don't custom lower most intrinsics. 2637 case Intrinsic::arm_thread_pointer: { 2638 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2639 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2640 } 2641 case Intrinsic::eh_sjlj_lsda: { 2642 MachineFunction &MF = DAG.getMachineFunction(); 2643 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2644 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2645 EVT PtrVT = getPointerTy(); 2646 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2647 SDValue CPAddr; 2648 unsigned PCAdj = (RelocM != Reloc::PIC_) 2649 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2650 ARMConstantPoolValue *CPV = 2651 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2652 ARMCP::CPLSDA, PCAdj); 2653 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2654 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2655 SDValue Result = 2656 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2657 MachinePointerInfo::getConstantPool(), 2658 false, false, false, 0); 2659 2660 if (RelocM == Reloc::PIC_) { 2661 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2662 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2663 } 2664 return Result; 2665 } 2666 case Intrinsic::arm_neon_vmulls: 2667 case Intrinsic::arm_neon_vmullu: { 2668 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2669 ? ARMISD::VMULLs : ARMISD::VMULLu; 2670 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), 2671 Op.getOperand(1), Op.getOperand(2)); 2672 } 2673 } 2674 } 2675 2676 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2677 const ARMSubtarget *Subtarget) { 2678 // FIXME: handle "fence singlethread" more efficiently. 2679 SDLoc dl(Op); 2680 if (!Subtarget->hasDataBarrier()) { 2681 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2682 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2683 // here. 2684 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2685 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); 2686 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2687 DAG.getConstant(0, MVT::i32)); 2688 } 2689 2690 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); 2691 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); 2692 unsigned Domain = ARM_MB::ISH; 2693 if (Subtarget->isMClass()) { 2694 // Only a full system barrier exists in the M-class architectures. 2695 Domain = ARM_MB::SY; 2696 } else if (Subtarget->isSwift() && Ord == Release) { 2697 // Swift happens to implement ISHST barriers in a way that's compatible with 2698 // Release semantics but weaker than ISH so we'd be fools not to use 2699 // it. Beware: other processors probably don't! 2700 Domain = ARM_MB::ISHST; 2701 } 2702 2703 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), 2704 DAG.getConstant(Intrinsic::arm_dmb, MVT::i32), 2705 DAG.getConstant(Domain, MVT::i32)); 2706 } 2707 2708 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2709 const ARMSubtarget *Subtarget) { 2710 // ARM pre v5TE and Thumb1 does not have preload instructions. 2711 if (!(Subtarget->isThumb2() || 2712 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2713 // Just preserve the chain. 2714 return Op.getOperand(0); 2715 2716 SDLoc dl(Op); 2717 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2718 if (!isRead && 2719 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2720 // ARMv7 with MP extension has PLDW. 2721 return Op.getOperand(0); 2722 2723 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2724 if (Subtarget->isThumb()) { 2725 // Invert the bits. 2726 isRead = ~isRead & 1; 2727 isData = ~isData & 1; 2728 } 2729 2730 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2731 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2732 DAG.getConstant(isData, MVT::i32)); 2733 } 2734 2735 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2736 MachineFunction &MF = DAG.getMachineFunction(); 2737 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2738 2739 // vastart just stores the address of the VarArgsFrameIndex slot into the 2740 // memory location argument. 2741 SDLoc dl(Op); 2742 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2743 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2744 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2745 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2746 MachinePointerInfo(SV), false, false, 0); 2747 } 2748 2749 SDValue 2750 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2751 SDValue &Root, SelectionDAG &DAG, 2752 SDLoc dl) const { 2753 MachineFunction &MF = DAG.getMachineFunction(); 2754 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2755 2756 const TargetRegisterClass *RC; 2757 if (AFI->isThumb1OnlyFunction()) 2758 RC = &ARM::tGPRRegClass; 2759 else 2760 RC = &ARM::GPRRegClass; 2761 2762 // Transform the arguments stored in physical registers into virtual ones. 2763 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2764 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2765 2766 SDValue ArgValue2; 2767 if (NextVA.isMemLoc()) { 2768 MachineFrameInfo *MFI = MF.getFrameInfo(); 2769 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2770 2771 // Create load node to retrieve arguments from the stack. 2772 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2773 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2774 MachinePointerInfo::getFixedStack(FI), 2775 false, false, false, 0); 2776 } else { 2777 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2778 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2779 } 2780 2781 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2782 } 2783 2784 void 2785 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2786 unsigned InRegsParamRecordIdx, 2787 unsigned ArgSize, 2788 unsigned &ArgRegsSize, 2789 unsigned &ArgRegsSaveSize) 2790 const { 2791 unsigned NumGPRs; 2792 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2793 unsigned RBegin, REnd; 2794 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2795 NumGPRs = REnd - RBegin; 2796 } else { 2797 unsigned int firstUnalloced; 2798 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2799 sizeof(GPRArgRegs) / 2800 sizeof(GPRArgRegs[0])); 2801 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2802 } 2803 2804 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2805 ArgRegsSize = NumGPRs * 4; 2806 2807 // If parameter is split between stack and GPRs... 2808 if (NumGPRs && Align == 8 && 2809 (ArgRegsSize < ArgSize || 2810 InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { 2811 // Add padding for part of param recovered from GPRs, so 2812 // its last byte must be at address K*8 - 1. 2813 // We need to do it, since remained (stack) part of parameter has 2814 // stack alignment, and we need to "attach" "GPRs head" without gaps 2815 // to it: 2816 // Stack: 2817 // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... 2818 // [ [padding] [GPRs head] ] [ Tail passed via stack .... 2819 // 2820 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2821 unsigned Padding = 2822 ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - 2823 (ArgRegsSize + AFI->getArgRegsSaveSize()); 2824 ArgRegsSaveSize = ArgRegsSize + Padding; 2825 } else 2826 // We don't need to extend regs save size for byval parameters if they 2827 // are passed via GPRs only. 2828 ArgRegsSaveSize = ArgRegsSize; 2829 } 2830 2831 // The remaining GPRs hold either the beginning of variable-argument 2832 // data, or the beginning of an aggregate passed by value (usually 2833 // byval). Either way, we allocate stack slots adjacent to the data 2834 // provided by our caller, and store the unallocated registers there. 2835 // If this is a variadic function, the va_list pointer will begin with 2836 // these values; otherwise, this reassembles a (byval) structure that 2837 // was split between registers and memory. 2838 // Return: The frame index registers were stored into. 2839 int 2840 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, 2841 SDLoc dl, SDValue &Chain, 2842 const Value *OrigArg, 2843 unsigned InRegsParamRecordIdx, 2844 unsigned OffsetFromOrigArg, 2845 unsigned ArgOffset, 2846 unsigned ArgSize, 2847 bool ForceMutable) const { 2848 2849 // Currently, two use-cases possible: 2850 // Case #1. Non var-args function, and we meet first byval parameter. 2851 // Setup first unallocated register as first byval register; 2852 // eat all remained registers 2853 // (these two actions are performed by HandleByVal method). 2854 // Then, here, we initialize stack frame with 2855 // "store-reg" instructions. 2856 // Case #2. Var-args function, that doesn't contain byval parameters. 2857 // The same: eat all remained unallocated registers, 2858 // initialize stack frame. 2859 2860 MachineFunction &MF = DAG.getMachineFunction(); 2861 MachineFrameInfo *MFI = MF.getFrameInfo(); 2862 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2863 unsigned firstRegToSaveIndex, lastRegToSaveIndex; 2864 unsigned RBegin, REnd; 2865 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2866 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2867 firstRegToSaveIndex = RBegin - ARM::R0; 2868 lastRegToSaveIndex = REnd - ARM::R0; 2869 } else { 2870 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2871 (GPRArgRegs, array_lengthof(GPRArgRegs)); 2872 lastRegToSaveIndex = 4; 2873 } 2874 2875 unsigned ArgRegsSize, ArgRegsSaveSize; 2876 computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, 2877 ArgRegsSize, ArgRegsSaveSize); 2878 2879 // Store any by-val regs to their spots on the stack so that they may be 2880 // loaded by deferencing the result of formal parameter pointer or va_next. 2881 // Note: once stack area for byval/varargs registers 2882 // was initialized, it can't be initialized again. 2883 if (ArgRegsSaveSize) { 2884 2885 unsigned Padding = ArgRegsSaveSize - ArgRegsSize; 2886 2887 if (Padding) { 2888 assert(AFI->getStoredByValParamsPadding() == 0 && 2889 "The only parameter may be padded."); 2890 AFI->setStoredByValParamsPadding(Padding); 2891 } 2892 2893 int FrameIndex = MFI->CreateFixedObject( 2894 ArgRegsSaveSize, 2895 Padding + ArgOffset, 2896 false); 2897 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); 2898 2899 SmallVector<SDValue, 4> MemOps; 2900 for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; 2901 ++firstRegToSaveIndex, ++i) { 2902 const TargetRegisterClass *RC; 2903 if (AFI->isThumb1OnlyFunction()) 2904 RC = &ARM::tGPRRegClass; 2905 else 2906 RC = &ARM::GPRRegClass; 2907 2908 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2909 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2910 SDValue Store = 2911 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2912 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), 2913 false, false, 0); 2914 MemOps.push_back(Store); 2915 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2916 DAG.getConstant(4, getPointerTy())); 2917 } 2918 2919 AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); 2920 2921 if (!MemOps.empty()) 2922 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2923 &MemOps[0], MemOps.size()); 2924 return FrameIndex; 2925 } else 2926 // This will point to the next argument passed via stack. 2927 return MFI->CreateFixedObject( 2928 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); 2929 } 2930 2931 // Setup stack frame, the va_list pointer will start from. 2932 void 2933 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2934 SDLoc dl, SDValue &Chain, 2935 unsigned ArgOffset, 2936 bool ForceMutable) const { 2937 MachineFunction &MF = DAG.getMachineFunction(); 2938 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2939 2940 // Try to store any remaining integer argument regs 2941 // to their spots on the stack so that they may be loaded by deferencing 2942 // the result of va_next. 2943 // If there is no regs to be stored, just point address after last 2944 // argument passed via stack. 2945 int FrameIndex = 2946 StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), 2947 0, ArgOffset, 0, ForceMutable); 2948 2949 AFI->setVarArgsFrameIndex(FrameIndex); 2950 } 2951 2952 SDValue 2953 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2954 CallingConv::ID CallConv, bool isVarArg, 2955 const SmallVectorImpl<ISD::InputArg> 2956 &Ins, 2957 SDLoc dl, SelectionDAG &DAG, 2958 SmallVectorImpl<SDValue> &InVals) 2959 const { 2960 MachineFunction &MF = DAG.getMachineFunction(); 2961 MachineFrameInfo *MFI = MF.getFrameInfo(); 2962 2963 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2964 2965 // Assign locations to all of the incoming arguments. 2966 SmallVector<CCValAssign, 16> ArgLocs; 2967 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2968 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2969 CCInfo.AnalyzeFormalArguments(Ins, 2970 CCAssignFnForNode(CallConv, /* Return*/ false, 2971 isVarArg)); 2972 2973 SmallVector<SDValue, 16> ArgValues; 2974 int lastInsIndex = -1; 2975 SDValue ArgValue; 2976 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 2977 unsigned CurArgIdx = 0; 2978 2979 // Initially ArgRegsSaveSize is zero. 2980 // Then we increase this value each time we meet byval parameter. 2981 // We also increase this value in case of varargs function. 2982 AFI->setArgRegsSaveSize(0); 2983 2984 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2985 CCValAssign &VA = ArgLocs[i]; 2986 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); 2987 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; 2988 // Arguments stored in registers. 2989 if (VA.isRegLoc()) { 2990 EVT RegVT = VA.getLocVT(); 2991 2992 if (VA.needsCustom()) { 2993 // f64 and vector types are split up into multiple registers or 2994 // combinations of registers and stack slots. 2995 if (VA.getLocVT() == MVT::v2f64) { 2996 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2997 Chain, DAG, dl); 2998 VA = ArgLocs[++i]; // skip ahead to next loc 2999 SDValue ArgValue2; 3000 if (VA.isMemLoc()) { 3001 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 3002 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 3003 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 3004 MachinePointerInfo::getFixedStack(FI), 3005 false, false, false, 0); 3006 } else { 3007 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 3008 Chain, DAG, dl); 3009 } 3010 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 3011 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 3012 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 3013 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 3014 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 3015 } else 3016 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 3017 3018 } else { 3019 const TargetRegisterClass *RC; 3020 3021 if (RegVT == MVT::f32) 3022 RC = &ARM::SPRRegClass; 3023 else if (RegVT == MVT::f64) 3024 RC = &ARM::DPRRegClass; 3025 else if (RegVT == MVT::v2f64) 3026 RC = &ARM::QPRRegClass; 3027 else if (RegVT == MVT::i32) 3028 RC = AFI->isThumb1OnlyFunction() ? 3029 (const TargetRegisterClass*)&ARM::tGPRRegClass : 3030 (const TargetRegisterClass*)&ARM::GPRRegClass; 3031 else 3032 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 3033 3034 // Transform the arguments in physical registers into virtual ones. 3035 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 3036 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 3037 } 3038 3039 // If this is an 8 or 16-bit value, it is really passed promoted 3040 // to 32 bits. Insert an assert[sz]ext to capture this, then 3041 // truncate to the right size. 3042 switch (VA.getLocInfo()) { 3043 default: llvm_unreachable("Unknown loc info!"); 3044 case CCValAssign::Full: break; 3045 case CCValAssign::BCvt: 3046 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 3047 break; 3048 case CCValAssign::SExt: 3049 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 3050 DAG.getValueType(VA.getValVT())); 3051 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 3052 break; 3053 case CCValAssign::ZExt: 3054 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 3055 DAG.getValueType(VA.getValVT())); 3056 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 3057 break; 3058 } 3059 3060 InVals.push_back(ArgValue); 3061 3062 } else { // VA.isRegLoc() 3063 3064 // sanity check 3065 assert(VA.isMemLoc()); 3066 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 3067 3068 int index = ArgLocs[i].getValNo(); 3069 3070 // Some Ins[] entries become multiple ArgLoc[] entries. 3071 // Process them only once. 3072 if (index != lastInsIndex) 3073 { 3074 ISD::ArgFlagsTy Flags = Ins[index].Flags; 3075 // FIXME: For now, all byval parameter objects are marked mutable. 3076 // This can be changed with more analysis. 3077 // In case of tail call optimization mark all arguments mutable. 3078 // Since they could be overwritten by lowering of arguments in case of 3079 // a tail call. 3080 if (Flags.isByVal()) { 3081 unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); 3082 int FrameIndex = StoreByValRegs( 3083 CCInfo, DAG, dl, Chain, CurOrigArg, 3084 CurByValIndex, 3085 Ins[VA.getValNo()].PartOffset, 3086 VA.getLocMemOffset(), 3087 Flags.getByValSize(), 3088 true /*force mutable frames*/); 3089 InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); 3090 CCInfo.nextInRegsParam(); 3091 } else { 3092 unsigned FIOffset = VA.getLocMemOffset() + 3093 AFI->getStoredByValParamsPadding(); 3094 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 3095 FIOffset, true); 3096 3097 // Create load nodes to retrieve arguments from the stack. 3098 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 3099 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 3100 MachinePointerInfo::getFixedStack(FI), 3101 false, false, false, 0)); 3102 } 3103 lastInsIndex = index; 3104 } 3105 } 3106 } 3107 3108 // varargs 3109 if (isVarArg) 3110 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 3111 CCInfo.getNextStackOffset()); 3112 3113 return Chain; 3114 } 3115 3116 /// isFloatingPointZero - Return true if this is +0.0. 3117 static bool isFloatingPointZero(SDValue Op) { 3118 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 3119 return CFP->getValueAPF().isPosZero(); 3120 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 3121 // Maybe this has already been legalized into the constant pool? 3122 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 3123 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 3124 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 3125 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 3126 return CFP->getValueAPF().isPosZero(); 3127 } 3128 } 3129 return false; 3130 } 3131 3132 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 3133 /// the given operands. 3134 SDValue 3135 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3136 SDValue &ARMcc, SelectionDAG &DAG, 3137 SDLoc dl) const { 3138 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 3139 unsigned C = RHSC->getZExtValue(); 3140 if (!isLegalICmpImmediate(C)) { 3141 // Constant does not fit, try adjusting it by one? 3142 switch (CC) { 3143 default: break; 3144 case ISD::SETLT: 3145 case ISD::SETGE: 3146 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 3147 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 3148 RHS = DAG.getConstant(C-1, MVT::i32); 3149 } 3150 break; 3151 case ISD::SETULT: 3152 case ISD::SETUGE: 3153 if (C != 0 && isLegalICmpImmediate(C-1)) { 3154 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 3155 RHS = DAG.getConstant(C-1, MVT::i32); 3156 } 3157 break; 3158 case ISD::SETLE: 3159 case ISD::SETGT: 3160 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 3161 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 3162 RHS = DAG.getConstant(C+1, MVT::i32); 3163 } 3164 break; 3165 case ISD::SETULE: 3166 case ISD::SETUGT: 3167 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 3168 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 3169 RHS = DAG.getConstant(C+1, MVT::i32); 3170 } 3171 break; 3172 } 3173 } 3174 } 3175 3176 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3177 ARMISD::NodeType CompareType; 3178 switch (CondCode) { 3179 default: 3180 CompareType = ARMISD::CMP; 3181 break; 3182 case ARMCC::EQ: 3183 case ARMCC::NE: 3184 // Uses only Z Flag 3185 CompareType = ARMISD::CMPZ; 3186 break; 3187 } 3188 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3189 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 3190 } 3191 3192 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 3193 SDValue 3194 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 3195 SDLoc dl) const { 3196 SDValue Cmp; 3197 if (!isFloatingPointZero(RHS)) 3198 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 3199 else 3200 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 3201 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 3202 } 3203 3204 /// duplicateCmp - Glue values can have only one use, so this function 3205 /// duplicates a comparison node. 3206 SDValue 3207 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 3208 unsigned Opc = Cmp.getOpcode(); 3209 SDLoc DL(Cmp); 3210 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 3211 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3212 3213 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 3214 Cmp = Cmp.getOperand(0); 3215 Opc = Cmp.getOpcode(); 3216 if (Opc == ARMISD::CMPFP) 3217 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3218 else { 3219 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 3220 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 3221 } 3222 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 3223 } 3224 3225 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 3226 SDValue Cond = Op.getOperand(0); 3227 SDValue SelectTrue = Op.getOperand(1); 3228 SDValue SelectFalse = Op.getOperand(2); 3229 SDLoc dl(Op); 3230 3231 // Convert: 3232 // 3233 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 3234 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 3235 // 3236 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 3237 const ConstantSDNode *CMOVTrue = 3238 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 3239 const ConstantSDNode *CMOVFalse = 3240 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 3241 3242 if (CMOVTrue && CMOVFalse) { 3243 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 3244 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 3245 3246 SDValue True; 3247 SDValue False; 3248 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 3249 True = SelectTrue; 3250 False = SelectFalse; 3251 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 3252 True = SelectFalse; 3253 False = SelectTrue; 3254 } 3255 3256 if (True.getNode() && False.getNode()) { 3257 EVT VT = Op.getValueType(); 3258 SDValue ARMcc = Cond.getOperand(2); 3259 SDValue CCR = Cond.getOperand(3); 3260 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 3261 assert(True.getValueType() == VT); 3262 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 3263 } 3264 } 3265 } 3266 3267 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 3268 // undefined bits before doing a full-word comparison with zero. 3269 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 3270 DAG.getConstant(1, Cond.getValueType())); 3271 3272 return DAG.getSelectCC(dl, Cond, 3273 DAG.getConstant(0, Cond.getValueType()), 3274 SelectTrue, SelectFalse, ISD::SETNE); 3275 } 3276 3277 static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { 3278 if (CC == ISD::SETNE) 3279 return ISD::SETEQ; 3280 return ISD::getSetCCSwappedOperands(CC); 3281 } 3282 3283 static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 3284 bool &swpCmpOps, bool &swpVselOps) { 3285 // Start by selecting the GE condition code for opcodes that return true for 3286 // 'equality' 3287 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || 3288 CC == ISD::SETULE) 3289 CondCode = ARMCC::GE; 3290 3291 // and GT for opcodes that return false for 'equality'. 3292 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || 3293 CC == ISD::SETULT) 3294 CondCode = ARMCC::GT; 3295 3296 // Since we are constrained to GE/GT, if the opcode contains 'less', we need 3297 // to swap the compare operands. 3298 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || 3299 CC == ISD::SETULT) 3300 swpCmpOps = true; 3301 3302 // Both GT and GE are ordered comparisons, and return false for 'unordered'. 3303 // If we have an unordered opcode, we need to swap the operands to the VSEL 3304 // instruction (effectively negating the condition). 3305 // 3306 // This also has the effect of swapping which one of 'less' or 'greater' 3307 // returns true, so we also swap the compare operands. It also switches 3308 // whether we return true for 'equality', so we compensate by picking the 3309 // opposite condition code to our original choice. 3310 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || 3311 CC == ISD::SETUGT) { 3312 swpCmpOps = !swpCmpOps; 3313 swpVselOps = !swpVselOps; 3314 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; 3315 } 3316 3317 // 'ordered' is 'anything but unordered', so use the VS condition code and 3318 // swap the VSEL operands. 3319 if (CC == ISD::SETO) { 3320 CondCode = ARMCC::VS; 3321 swpVselOps = true; 3322 } 3323 3324 // 'unordered or not equal' is 'anything but equal', so use the EQ condition 3325 // code and swap the VSEL operands. 3326 if (CC == ISD::SETUNE) { 3327 CondCode = ARMCC::EQ; 3328 swpVselOps = true; 3329 } 3330 } 3331 3332 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 3333 EVT VT = Op.getValueType(); 3334 SDValue LHS = Op.getOperand(0); 3335 SDValue RHS = Op.getOperand(1); 3336 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3337 SDValue TrueVal = Op.getOperand(2); 3338 SDValue FalseVal = Op.getOperand(3); 3339 SDLoc dl(Op); 3340 3341 if (LHS.getValueType() == MVT::i32) { 3342 // Try to generate VSEL on ARMv8. 3343 // The VSEL instruction can't use all the usual ARM condition 3344 // codes: it only has two bits to select the condition code, so it's 3345 // constrained to use only GE, GT, VS and EQ. 3346 // 3347 // To implement all the various ISD::SETXXX opcodes, we sometimes need to 3348 // swap the operands of the previous compare instruction (effectively 3349 // inverting the compare condition, swapping 'less' and 'greater') and 3350 // sometimes need to swap the operands to the VSEL (which inverts the 3351 // condition in the sense of firing whenever the previous condition didn't) 3352 if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 3353 TrueVal.getValueType() == MVT::f64)) { 3354 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3355 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || 3356 CondCode == ARMCC::VC || CondCode == ARMCC::NE) { 3357 CC = getInverseCCForVSEL(CC); 3358 std::swap(TrueVal, FalseVal); 3359 } 3360 } 3361 3362 SDValue ARMcc; 3363 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3364 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3365 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, 3366 Cmp); 3367 } 3368 3369 ARMCC::CondCodes CondCode, CondCode2; 3370 FPCCToARMCC(CC, CondCode, CondCode2); 3371 3372 // Try to generate VSEL on ARMv8. 3373 if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || 3374 TrueVal.getValueType() == MVT::f64)) { 3375 // We can select VMAXNM/VMINNM from a compare followed by a select with the 3376 // same operands, as follows: 3377 // c = fcmp [ogt, olt, ugt, ult] a, b 3378 // select c, a, b 3379 // We only do this in unsafe-fp-math, because signed zeros and NaNs are 3380 // handled differently than the original code sequence. 3381 if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && 3382 RHS == FalseVal) { 3383 if (CC == ISD::SETOGT || CC == ISD::SETUGT) 3384 return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); 3385 if (CC == ISD::SETOLT || CC == ISD::SETULT) 3386 return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); 3387 } 3388 3389 bool swpCmpOps = false; 3390 bool swpVselOps = false; 3391 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); 3392 3393 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || 3394 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { 3395 if (swpCmpOps) 3396 std::swap(LHS, RHS); 3397 if (swpVselOps) 3398 std::swap(TrueVal, FalseVal); 3399 } 3400 } 3401 3402 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3403 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3404 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3405 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 3406 ARMcc, CCR, Cmp); 3407 if (CondCode2 != ARMCC::AL) { 3408 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 3409 // FIXME: Needs another CMP because flag can have but one use. 3410 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 3411 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 3412 Result, TrueVal, ARMcc2, CCR, Cmp2); 3413 } 3414 return Result; 3415 } 3416 3417 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 3418 /// to morph to an integer compare sequence. 3419 static bool canChangeToInt(SDValue Op, bool &SeenZero, 3420 const ARMSubtarget *Subtarget) { 3421 SDNode *N = Op.getNode(); 3422 if (!N->hasOneUse()) 3423 // Otherwise it requires moving the value from fp to integer registers. 3424 return false; 3425 if (!N->getNumValues()) 3426 return false; 3427 EVT VT = Op.getValueType(); 3428 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 3429 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 3430 // vmrs are very slow, e.g. cortex-a8. 3431 return false; 3432 3433 if (isFloatingPointZero(Op)) { 3434 SeenZero = true; 3435 return true; 3436 } 3437 return ISD::isNormalLoad(N); 3438 } 3439 3440 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 3441 if (isFloatingPointZero(Op)) 3442 return DAG.getConstant(0, MVT::i32); 3443 3444 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 3445 return DAG.getLoad(MVT::i32, SDLoc(Op), 3446 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 3447 Ld->isVolatile(), Ld->isNonTemporal(), 3448 Ld->isInvariant(), Ld->getAlignment()); 3449 3450 llvm_unreachable("Unknown VFP cmp argument!"); 3451 } 3452 3453 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 3454 SDValue &RetVal1, SDValue &RetVal2) { 3455 if (isFloatingPointZero(Op)) { 3456 RetVal1 = DAG.getConstant(0, MVT::i32); 3457 RetVal2 = DAG.getConstant(0, MVT::i32); 3458 return; 3459 } 3460 3461 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 3462 SDValue Ptr = Ld->getBasePtr(); 3463 RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), 3464 Ld->getChain(), Ptr, 3465 Ld->getPointerInfo(), 3466 Ld->isVolatile(), Ld->isNonTemporal(), 3467 Ld->isInvariant(), Ld->getAlignment()); 3468 3469 EVT PtrType = Ptr.getValueType(); 3470 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 3471 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), 3472 PtrType, Ptr, DAG.getConstant(4, PtrType)); 3473 RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), 3474 Ld->getChain(), NewPtr, 3475 Ld->getPointerInfo().getWithOffset(4), 3476 Ld->isVolatile(), Ld->isNonTemporal(), 3477 Ld->isInvariant(), NewAlign); 3478 return; 3479 } 3480 3481 llvm_unreachable("Unknown VFP cmp argument!"); 3482 } 3483 3484 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 3485 /// f32 and even f64 comparisons to integer ones. 3486 SDValue 3487 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 3488 SDValue Chain = Op.getOperand(0); 3489 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3490 SDValue LHS = Op.getOperand(2); 3491 SDValue RHS = Op.getOperand(3); 3492 SDValue Dest = Op.getOperand(4); 3493 SDLoc dl(Op); 3494 3495 bool LHSSeenZero = false; 3496 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 3497 bool RHSSeenZero = false; 3498 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 3499 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 3500 // If unsafe fp math optimization is enabled and there are no other uses of 3501 // the CMP operands, and the condition code is EQ or NE, we can optimize it 3502 // to an integer comparison. 3503 if (CC == ISD::SETOEQ) 3504 CC = ISD::SETEQ; 3505 else if (CC == ISD::SETUNE) 3506 CC = ISD::SETNE; 3507 3508 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 3509 SDValue ARMcc; 3510 if (LHS.getValueType() == MVT::f32) { 3511 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3512 bitcastf32Toi32(LHS, DAG), Mask); 3513 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3514 bitcastf32Toi32(RHS, DAG), Mask); 3515 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3516 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3517 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3518 Chain, Dest, ARMcc, CCR, Cmp); 3519 } 3520 3521 SDValue LHS1, LHS2; 3522 SDValue RHS1, RHS2; 3523 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3524 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3525 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3526 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3527 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3528 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3529 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3530 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3531 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 3532 } 3533 3534 return SDValue(); 3535 } 3536 3537 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3538 SDValue Chain = Op.getOperand(0); 3539 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3540 SDValue LHS = Op.getOperand(2); 3541 SDValue RHS = Op.getOperand(3); 3542 SDValue Dest = Op.getOperand(4); 3543 SDLoc dl(Op); 3544 3545 if (LHS.getValueType() == MVT::i32) { 3546 SDValue ARMcc; 3547 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3548 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3549 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3550 Chain, Dest, ARMcc, CCR, Cmp); 3551 } 3552 3553 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3554 3555 if (getTargetMachine().Options.UnsafeFPMath && 3556 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3557 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3558 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3559 if (Result.getNode()) 3560 return Result; 3561 } 3562 3563 ARMCC::CondCodes CondCode, CondCode2; 3564 FPCCToARMCC(CC, CondCode, CondCode2); 3565 3566 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3567 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3568 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3569 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3570 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3571 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3572 if (CondCode2 != ARMCC::AL) { 3573 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3574 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3575 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3576 } 3577 return Res; 3578 } 3579 3580 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3581 SDValue Chain = Op.getOperand(0); 3582 SDValue Table = Op.getOperand(1); 3583 SDValue Index = Op.getOperand(2); 3584 SDLoc dl(Op); 3585 3586 EVT PTy = getPointerTy(); 3587 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3588 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3589 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3590 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3591 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3592 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3593 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3594 if (Subtarget->isThumb2()) { 3595 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3596 // which does another jump to the destination. This also makes it easier 3597 // to translate it to TBB / TBH later. 3598 // FIXME: This might not work if the function is extremely large. 3599 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3600 Addr, Op.getOperand(2), JTI, UId); 3601 } 3602 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3603 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3604 MachinePointerInfo::getJumpTable(), 3605 false, false, false, 0); 3606 Chain = Addr.getValue(1); 3607 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3608 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3609 } else { 3610 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3611 MachinePointerInfo::getJumpTable(), 3612 false, false, false, 0); 3613 Chain = Addr.getValue(1); 3614 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3615 } 3616 } 3617 3618 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3619 EVT VT = Op.getValueType(); 3620 SDLoc dl(Op); 3621 3622 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3623 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3624 return Op; 3625 return DAG.UnrollVectorOp(Op.getNode()); 3626 } 3627 3628 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3629 "Invalid type for custom lowering!"); 3630 if (VT != MVT::v4i16) 3631 return DAG.UnrollVectorOp(Op.getNode()); 3632 3633 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3634 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3635 } 3636 3637 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3638 EVT VT = Op.getValueType(); 3639 if (VT.isVector()) 3640 return LowerVectorFP_TO_INT(Op, DAG); 3641 3642 SDLoc dl(Op); 3643 unsigned Opc; 3644 3645 switch (Op.getOpcode()) { 3646 default: llvm_unreachable("Invalid opcode!"); 3647 case ISD::FP_TO_SINT: 3648 Opc = ARMISD::FTOSI; 3649 break; 3650 case ISD::FP_TO_UINT: 3651 Opc = ARMISD::FTOUI; 3652 break; 3653 } 3654 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3655 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3656 } 3657 3658 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3659 EVT VT = Op.getValueType(); 3660 SDLoc dl(Op); 3661 3662 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3663 if (VT.getVectorElementType() == MVT::f32) 3664 return Op; 3665 return DAG.UnrollVectorOp(Op.getNode()); 3666 } 3667 3668 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3669 "Invalid type for custom lowering!"); 3670 if (VT != MVT::v4f32) 3671 return DAG.UnrollVectorOp(Op.getNode()); 3672 3673 unsigned CastOpc; 3674 unsigned Opc; 3675 switch (Op.getOpcode()) { 3676 default: llvm_unreachable("Invalid opcode!"); 3677 case ISD::SINT_TO_FP: 3678 CastOpc = ISD::SIGN_EXTEND; 3679 Opc = ISD::SINT_TO_FP; 3680 break; 3681 case ISD::UINT_TO_FP: 3682 CastOpc = ISD::ZERO_EXTEND; 3683 Opc = ISD::UINT_TO_FP; 3684 break; 3685 } 3686 3687 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3688 return DAG.getNode(Opc, dl, VT, Op); 3689 } 3690 3691 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3692 EVT VT = Op.getValueType(); 3693 if (VT.isVector()) 3694 return LowerVectorINT_TO_FP(Op, DAG); 3695 3696 SDLoc dl(Op); 3697 unsigned Opc; 3698 3699 switch (Op.getOpcode()) { 3700 default: llvm_unreachable("Invalid opcode!"); 3701 case ISD::SINT_TO_FP: 3702 Opc = ARMISD::SITOF; 3703 break; 3704 case ISD::UINT_TO_FP: 3705 Opc = ARMISD::UITOF; 3706 break; 3707 } 3708 3709 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3710 return DAG.getNode(Opc, dl, VT, Op); 3711 } 3712 3713 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3714 // Implement fcopysign with a fabs and a conditional fneg. 3715 SDValue Tmp0 = Op.getOperand(0); 3716 SDValue Tmp1 = Op.getOperand(1); 3717 SDLoc dl(Op); 3718 EVT VT = Op.getValueType(); 3719 EVT SrcVT = Tmp1.getValueType(); 3720 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3721 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3722 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3723 3724 if (UseNEON) { 3725 // Use VBSL to copy the sign bit. 3726 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3727 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3728 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3729 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3730 if (VT == MVT::f64) 3731 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3732 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3733 DAG.getConstant(32, MVT::i32)); 3734 else /*if (VT == MVT::f32)*/ 3735 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3736 if (SrcVT == MVT::f32) { 3737 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3738 if (VT == MVT::f64) 3739 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3740 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3741 DAG.getConstant(32, MVT::i32)); 3742 } else if (VT == MVT::f32) 3743 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3744 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3745 DAG.getConstant(32, MVT::i32)); 3746 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3747 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3748 3749 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3750 MVT::i32); 3751 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3752 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3753 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3754 3755 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3756 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3757 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3758 if (VT == MVT::f32) { 3759 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3760 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3761 DAG.getConstant(0, MVT::i32)); 3762 } else { 3763 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3764 } 3765 3766 return Res; 3767 } 3768 3769 // Bitcast operand 1 to i32. 3770 if (SrcVT == MVT::f64) 3771 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3772 &Tmp1, 1).getValue(1); 3773 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3774 3775 // Or in the signbit with integer operations. 3776 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3777 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3778 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3779 if (VT == MVT::f32) { 3780 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3781 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3782 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3783 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3784 } 3785 3786 // f64: Or the high part with signbit and then combine two parts. 3787 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3788 &Tmp0, 1); 3789 SDValue Lo = Tmp0.getValue(0); 3790 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3791 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3792 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3793 } 3794 3795 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3796 MachineFunction &MF = DAG.getMachineFunction(); 3797 MachineFrameInfo *MFI = MF.getFrameInfo(); 3798 MFI->setReturnAddressIsTaken(true); 3799 3800 EVT VT = Op.getValueType(); 3801 SDLoc dl(Op); 3802 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3803 if (Depth) { 3804 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3805 SDValue Offset = DAG.getConstant(4, MVT::i32); 3806 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3807 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3808 MachinePointerInfo(), false, false, false, 0); 3809 } 3810 3811 // Return LR, which contains the return address. Mark it an implicit live-in. 3812 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3813 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3814 } 3815 3816 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3817 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3818 MFI->setFrameAddressIsTaken(true); 3819 3820 EVT VT = Op.getValueType(); 3821 SDLoc dl(Op); // FIXME probably not meaningful 3822 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3823 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3824 ? ARM::R7 : ARM::R11; 3825 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3826 while (Depth--) 3827 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3828 MachinePointerInfo(), 3829 false, false, false, 0); 3830 return FrameAddr; 3831 } 3832 3833 /// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), 3834 /// and size(DestVec) > 128-bits. 3835 /// This is achieved by doing the one extension from the SrcVec, splitting the 3836 /// result, extending these parts, and then concatenating these into the 3837 /// destination. 3838 static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { 3839 SDValue Op = N->getOperand(0); 3840 EVT SrcVT = Op.getValueType(); 3841 EVT DestVT = N->getValueType(0); 3842 3843 assert(DestVT.getSizeInBits() > 128 && 3844 "Custom sext/zext expansion needs >128-bit vector."); 3845 // If this is a normal length extension, use the default expansion. 3846 if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && 3847 SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) 3848 return SDValue(); 3849 3850 SDLoc dl(N); 3851 unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); 3852 unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); 3853 unsigned NumElts = SrcVT.getVectorNumElements(); 3854 LLVMContext &Ctx = *DAG.getContext(); 3855 SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; 3856 3857 EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3858 NumElts); 3859 EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3860 NumElts/2); 3861 EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), 3862 NumElts/2); 3863 3864 Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); 3865 SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3866 DAG.getIntPtrConstant(0)); 3867 SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3868 DAG.getIntPtrConstant(NumElts/2)); 3869 ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); 3870 ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); 3871 return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); 3872 } 3873 3874 /// ExpandBITCAST - If the target supports VFP, this function is called to 3875 /// expand a bit convert where either the source or destination type is i64 to 3876 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3877 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 3878 /// vectors), since the legalizer won't know what to do with that. 3879 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3880 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3881 SDLoc dl(N); 3882 SDValue Op = N->getOperand(0); 3883 3884 // This function is only supposed to be called for i64 types, either as the 3885 // source or destination of the bit convert. 3886 EVT SrcVT = Op.getValueType(); 3887 EVT DstVT = N->getValueType(0); 3888 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3889 "ExpandBITCAST called for non-i64 type"); 3890 3891 // Turn i64->f64 into VMOVDRR. 3892 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3893 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3894 DAG.getConstant(0, MVT::i32)); 3895 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3896 DAG.getConstant(1, MVT::i32)); 3897 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3898 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3899 } 3900 3901 // Turn f64->i64 into VMOVRRD. 3902 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3903 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3904 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3905 // Merge the pieces into a single i64 value. 3906 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3907 } 3908 3909 return SDValue(); 3910 } 3911 3912 /// getZeroVector - Returns a vector of specified type with all zero elements. 3913 /// Zero vectors are used to represent vector negation and in those cases 3914 /// will be implemented with the NEON VNEG instruction. However, VNEG does 3915 /// not support i64 elements, so sometimes the zero vectors will need to be 3916 /// explicitly constructed. Regardless, use a canonical VMOV to create the 3917 /// zero vector. 3918 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { 3919 assert(VT.isVector() && "Expected a vector type"); 3920 // The canonical modified immediate encoding of a zero vector is....0! 3921 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3922 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3923 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3924 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3925 } 3926 3927 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3928 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3929 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3930 SelectionDAG &DAG) const { 3931 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3932 EVT VT = Op.getValueType(); 3933 unsigned VTBits = VT.getSizeInBits(); 3934 SDLoc dl(Op); 3935 SDValue ShOpLo = Op.getOperand(0); 3936 SDValue ShOpHi = Op.getOperand(1); 3937 SDValue ShAmt = Op.getOperand(2); 3938 SDValue ARMcc; 3939 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3940 3941 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3942 3943 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3944 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3945 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3946 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3947 DAG.getConstant(VTBits, MVT::i32)); 3948 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3949 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3950 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3951 3952 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3953 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3954 ARMcc, DAG, dl); 3955 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3956 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3957 CCR, Cmp); 3958 3959 SDValue Ops[2] = { Lo, Hi }; 3960 return DAG.getMergeValues(Ops, 2, dl); 3961 } 3962 3963 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3964 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3965 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3966 SelectionDAG &DAG) const { 3967 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3968 EVT VT = Op.getValueType(); 3969 unsigned VTBits = VT.getSizeInBits(); 3970 SDLoc dl(Op); 3971 SDValue ShOpLo = Op.getOperand(0); 3972 SDValue ShOpHi = Op.getOperand(1); 3973 SDValue ShAmt = Op.getOperand(2); 3974 SDValue ARMcc; 3975 3976 assert(Op.getOpcode() == ISD::SHL_PARTS); 3977 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3978 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3979 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3980 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3981 DAG.getConstant(VTBits, MVT::i32)); 3982 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3983 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3984 3985 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3986 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3987 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3988 ARMcc, DAG, dl); 3989 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3990 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3991 CCR, Cmp); 3992 3993 SDValue Ops[2] = { Lo, Hi }; 3994 return DAG.getMergeValues(Ops, 2, dl); 3995 } 3996 3997 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3998 SelectionDAG &DAG) const { 3999 // The rounding mode is in bits 23:22 of the FPSCR. 4000 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 4001 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 4002 // so that the shift + and get folded into a bitfield extract. 4003 SDLoc dl(Op); 4004 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 4005 DAG.getConstant(Intrinsic::arm_get_fpscr, 4006 MVT::i32)); 4007 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 4008 DAG.getConstant(1U << 22, MVT::i32)); 4009 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 4010 DAG.getConstant(22, MVT::i32)); 4011 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 4012 DAG.getConstant(3, MVT::i32)); 4013 } 4014 4015 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 4016 const ARMSubtarget *ST) { 4017 EVT VT = N->getValueType(0); 4018 SDLoc dl(N); 4019 4020 if (!ST->hasV6T2Ops()) 4021 return SDValue(); 4022 4023 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 4024 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 4025 } 4026 4027 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 4028 /// for each 16-bit element from operand, repeated. The basic idea is to 4029 /// leverage vcnt to get the 8-bit counts, gather and add the results. 4030 /// 4031 /// Trace for v4i16: 4032 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 4033 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 4034 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 4035 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 4036 /// [b0 b1 b2 b3 b4 b5 b6 b7] 4037 /// +[b1 b0 b3 b2 b5 b4 b7 b6] 4038 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 4039 /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 4040 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 4041 EVT VT = N->getValueType(0); 4042 SDLoc DL(N); 4043 4044 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 4045 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 4046 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 4047 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 4048 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 4049 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 4050 } 4051 4052 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 4053 /// bit-count for each 16-bit element from the operand. We need slightly 4054 /// different sequencing for v4i16 and v8i16 to stay within NEON's available 4055 /// 64/128-bit registers. 4056 /// 4057 /// Trace for v4i16: 4058 /// input = [v0 v1 v2 v3 ] (vi 16-bit element) 4059 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 4060 /// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 4061 /// v4i16:Extracted = [k0 k1 k2 k3 ] 4062 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 4063 EVT VT = N->getValueType(0); 4064 SDLoc DL(N); 4065 4066 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 4067 if (VT.is64BitVector()) { 4068 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 4069 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 4070 DAG.getIntPtrConstant(0)); 4071 } else { 4072 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 4073 BitCounts, DAG.getIntPtrConstant(0)); 4074 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 4075 } 4076 } 4077 4078 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 4079 /// bit-count for each 32-bit element from the operand. The idea here is 4080 /// to split the vector into 16-bit elements, leverage the 16-bit count 4081 /// routine, and then combine the results. 4082 /// 4083 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 4084 /// input = [v0 v1 ] (vi: 32-bit elements) 4085 /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 4086 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 4087 /// vrev: N0 = [k1 k0 k3 k2 ] 4088 /// [k0 k1 k2 k3 ] 4089 /// N1 =+[k1 k0 k3 k2 ] 4090 /// [k0 k2 k1 k3 ] 4091 /// N2 =+[k1 k3 k0 k2 ] 4092 /// [k0 k2 k1 k3 ] 4093 /// Extended =+[k1 k3 k0 k2 ] 4094 /// [k0 k2 ] 4095 /// Extracted=+[k1 k3 ] 4096 /// 4097 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 4098 EVT VT = N->getValueType(0); 4099 SDLoc DL(N); 4100 4101 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 4102 4103 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 4104 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 4105 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 4106 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 4107 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 4108 4109 if (VT.is64BitVector()) { 4110 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 4111 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 4112 DAG.getIntPtrConstant(0)); 4113 } else { 4114 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 4115 DAG.getIntPtrConstant(0)); 4116 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 4117 } 4118 } 4119 4120 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 4121 const ARMSubtarget *ST) { 4122 EVT VT = N->getValueType(0); 4123 4124 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 4125 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 4126 VT == MVT::v4i16 || VT == MVT::v8i16) && 4127 "Unexpected type for custom ctpop lowering"); 4128 4129 if (VT.getVectorElementType() == MVT::i32) 4130 return lowerCTPOP32BitElements(N, DAG); 4131 else 4132 return lowerCTPOP16BitElements(N, DAG); 4133 } 4134 4135 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 4136 const ARMSubtarget *ST) { 4137 EVT VT = N->getValueType(0); 4138 SDLoc dl(N); 4139 4140 if (!VT.isVector()) 4141 return SDValue(); 4142 4143 // Lower vector shifts on NEON to use VSHL. 4144 assert(ST->hasNEON() && "unexpected vector shift"); 4145 4146 // Left shifts translate directly to the vshiftu intrinsic. 4147 if (N->getOpcode() == ISD::SHL) 4148 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 4149 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 4150 N->getOperand(0), N->getOperand(1)); 4151 4152 assert((N->getOpcode() == ISD::SRA || 4153 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 4154 4155 // NEON uses the same intrinsics for both left and right shifts. For 4156 // right shifts, the shift amounts are negative, so negate the vector of 4157 // shift amounts. 4158 EVT ShiftVT = N->getOperand(1).getValueType(); 4159 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 4160 getZeroVector(ShiftVT, DAG, dl), 4161 N->getOperand(1)); 4162 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 4163 Intrinsic::arm_neon_vshifts : 4164 Intrinsic::arm_neon_vshiftu); 4165 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 4166 DAG.getConstant(vshiftInt, MVT::i32), 4167 N->getOperand(0), NegatedCount); 4168 } 4169 4170 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 4171 const ARMSubtarget *ST) { 4172 EVT VT = N->getValueType(0); 4173 SDLoc dl(N); 4174 4175 // We can get here for a node like i32 = ISD::SHL i32, i64 4176 if (VT != MVT::i64) 4177 return SDValue(); 4178 4179 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 4180 "Unknown shift to lower!"); 4181 4182 // We only lower SRA, SRL of 1 here, all others use generic lowering. 4183 if (!isa<ConstantSDNode>(N->getOperand(1)) || 4184 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 4185 return SDValue(); 4186 4187 // If we are in thumb mode, we don't have RRX. 4188 if (ST->isThumb1Only()) return SDValue(); 4189 4190 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 4191 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 4192 DAG.getConstant(0, MVT::i32)); 4193 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 4194 DAG.getConstant(1, MVT::i32)); 4195 4196 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 4197 // captures the result into a carry flag. 4198 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 4199 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 4200 4201 // The low part is an ARMISD::RRX operand, which shifts the carry in. 4202 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 4203 4204 // Merge the pieces into a single i64 value. 4205 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4206 } 4207 4208 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 4209 SDValue TmpOp0, TmpOp1; 4210 bool Invert = false; 4211 bool Swap = false; 4212 unsigned Opc = 0; 4213 4214 SDValue Op0 = Op.getOperand(0); 4215 SDValue Op1 = Op.getOperand(1); 4216 SDValue CC = Op.getOperand(2); 4217 EVT VT = Op.getValueType(); 4218 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4219 SDLoc dl(Op); 4220 4221 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 4222 switch (SetCCOpcode) { 4223 default: llvm_unreachable("Illegal FP comparison"); 4224 case ISD::SETUNE: 4225 case ISD::SETNE: Invert = true; // Fallthrough 4226 case ISD::SETOEQ: 4227 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 4228 case ISD::SETOLT: 4229 case ISD::SETLT: Swap = true; // Fallthrough 4230 case ISD::SETOGT: 4231 case ISD::SETGT: Opc = ARMISD::VCGT; break; 4232 case ISD::SETOLE: 4233 case ISD::SETLE: Swap = true; // Fallthrough 4234 case ISD::SETOGE: 4235 case ISD::SETGE: Opc = ARMISD::VCGE; break; 4236 case ISD::SETUGE: Swap = true; // Fallthrough 4237 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 4238 case ISD::SETUGT: Swap = true; // Fallthrough 4239 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 4240 case ISD::SETUEQ: Invert = true; // Fallthrough 4241 case ISD::SETONE: 4242 // Expand this to (OLT | OGT). 4243 TmpOp0 = Op0; 4244 TmpOp1 = Op1; 4245 Opc = ISD::OR; 4246 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 4247 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 4248 break; 4249 case ISD::SETUO: Invert = true; // Fallthrough 4250 case ISD::SETO: 4251 // Expand this to (OLT | OGE). 4252 TmpOp0 = Op0; 4253 TmpOp1 = Op1; 4254 Opc = ISD::OR; 4255 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 4256 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 4257 break; 4258 } 4259 } else { 4260 // Integer comparisons. 4261 switch (SetCCOpcode) { 4262 default: llvm_unreachable("Illegal integer comparison"); 4263 case ISD::SETNE: Invert = true; 4264 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 4265 case ISD::SETLT: Swap = true; 4266 case ISD::SETGT: Opc = ARMISD::VCGT; break; 4267 case ISD::SETLE: Swap = true; 4268 case ISD::SETGE: Opc = ARMISD::VCGE; break; 4269 case ISD::SETULT: Swap = true; 4270 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 4271 case ISD::SETULE: Swap = true; 4272 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 4273 } 4274 4275 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 4276 if (Opc == ARMISD::VCEQ) { 4277 4278 SDValue AndOp; 4279 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4280 AndOp = Op0; 4281 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 4282 AndOp = Op1; 4283 4284 // Ignore bitconvert. 4285 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 4286 AndOp = AndOp.getOperand(0); 4287 4288 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 4289 Opc = ARMISD::VTST; 4290 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 4291 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 4292 Invert = !Invert; 4293 } 4294 } 4295 } 4296 4297 if (Swap) 4298 std::swap(Op0, Op1); 4299 4300 // If one of the operands is a constant vector zero, attempt to fold the 4301 // comparison to a specialized compare-against-zero form. 4302 SDValue SingleOp; 4303 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4304 SingleOp = Op0; 4305 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 4306 if (Opc == ARMISD::VCGE) 4307 Opc = ARMISD::VCLEZ; 4308 else if (Opc == ARMISD::VCGT) 4309 Opc = ARMISD::VCLTZ; 4310 SingleOp = Op1; 4311 } 4312 4313 SDValue Result; 4314 if (SingleOp.getNode()) { 4315 switch (Opc) { 4316 case ARMISD::VCEQ: 4317 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 4318 case ARMISD::VCGE: 4319 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 4320 case ARMISD::VCLEZ: 4321 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 4322 case ARMISD::VCGT: 4323 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 4324 case ARMISD::VCLTZ: 4325 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 4326 default: 4327 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 4328 } 4329 } else { 4330 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 4331 } 4332 4333 if (Invert) 4334 Result = DAG.getNOT(dl, Result, VT); 4335 4336 return Result; 4337 } 4338 4339 /// isNEONModifiedImm - Check if the specified splat value corresponds to a 4340 /// valid vector constant for a NEON instruction with a "modified immediate" 4341 /// operand (e.g., VMOV). If so, return the encoded value. 4342 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 4343 unsigned SplatBitSize, SelectionDAG &DAG, 4344 EVT &VT, bool is128Bits, NEONModImmType type) { 4345 unsigned OpCmode, Imm; 4346 4347 // SplatBitSize is set to the smallest size that splats the vector, so a 4348 // zero vector will always have SplatBitSize == 8. However, NEON modified 4349 // immediate instructions others than VMOV do not support the 8-bit encoding 4350 // of a zero vector, and the default encoding of zero is supposed to be the 4351 // 32-bit version. 4352 if (SplatBits == 0) 4353 SplatBitSize = 32; 4354 4355 switch (SplatBitSize) { 4356 case 8: 4357 if (type != VMOVModImm) 4358 return SDValue(); 4359 // Any 1-byte value is OK. Op=0, Cmode=1110. 4360 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 4361 OpCmode = 0xe; 4362 Imm = SplatBits; 4363 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 4364 break; 4365 4366 case 16: 4367 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 4368 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 4369 if ((SplatBits & ~0xff) == 0) { 4370 // Value = 0x00nn: Op=x, Cmode=100x. 4371 OpCmode = 0x8; 4372 Imm = SplatBits; 4373 break; 4374 } 4375 if ((SplatBits & ~0xff00) == 0) { 4376 // Value = 0xnn00: Op=x, Cmode=101x. 4377 OpCmode = 0xa; 4378 Imm = SplatBits >> 8; 4379 break; 4380 } 4381 return SDValue(); 4382 4383 case 32: 4384 // NEON's 32-bit VMOV supports splat values where: 4385 // * only one byte is nonzero, or 4386 // * the least significant byte is 0xff and the second byte is nonzero, or 4387 // * the least significant 2 bytes are 0xff and the third is nonzero. 4388 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 4389 if ((SplatBits & ~0xff) == 0) { 4390 // Value = 0x000000nn: Op=x, Cmode=000x. 4391 OpCmode = 0; 4392 Imm = SplatBits; 4393 break; 4394 } 4395 if ((SplatBits & ~0xff00) == 0) { 4396 // Value = 0x0000nn00: Op=x, Cmode=001x. 4397 OpCmode = 0x2; 4398 Imm = SplatBits >> 8; 4399 break; 4400 } 4401 if ((SplatBits & ~0xff0000) == 0) { 4402 // Value = 0x00nn0000: Op=x, Cmode=010x. 4403 OpCmode = 0x4; 4404 Imm = SplatBits >> 16; 4405 break; 4406 } 4407 if ((SplatBits & ~0xff000000) == 0) { 4408 // Value = 0xnn000000: Op=x, Cmode=011x. 4409 OpCmode = 0x6; 4410 Imm = SplatBits >> 24; 4411 break; 4412 } 4413 4414 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 4415 if (type == OtherModImm) return SDValue(); 4416 4417 if ((SplatBits & ~0xffff) == 0 && 4418 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 4419 // Value = 0x0000nnff: Op=x, Cmode=1100. 4420 OpCmode = 0xc; 4421 Imm = SplatBits >> 8; 4422 SplatBits |= 0xff; 4423 break; 4424 } 4425 4426 if ((SplatBits & ~0xffffff) == 0 && 4427 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 4428 // Value = 0x00nnffff: Op=x, Cmode=1101. 4429 OpCmode = 0xd; 4430 Imm = SplatBits >> 16; 4431 SplatBits |= 0xffff; 4432 break; 4433 } 4434 4435 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 4436 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 4437 // VMOV.I32. A (very) minor optimization would be to replicate the value 4438 // and fall through here to test for a valid 64-bit splat. But, then the 4439 // caller would also need to check and handle the change in size. 4440 return SDValue(); 4441 4442 case 64: { 4443 if (type != VMOVModImm) 4444 return SDValue(); 4445 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 4446 uint64_t BitMask = 0xff; 4447 uint64_t Val = 0; 4448 unsigned ImmMask = 1; 4449 Imm = 0; 4450 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 4451 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 4452 Val |= BitMask; 4453 Imm |= ImmMask; 4454 } else if ((SplatBits & BitMask) != 0) { 4455 return SDValue(); 4456 } 4457 BitMask <<= 8; 4458 ImmMask <<= 1; 4459 } 4460 // Op=1, Cmode=1110. 4461 OpCmode = 0x1e; 4462 SplatBits = Val; 4463 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 4464 break; 4465 } 4466 4467 default: 4468 llvm_unreachable("unexpected size for isNEONModifiedImm"); 4469 } 4470 4471 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 4472 return DAG.getTargetConstant(EncodedVal, MVT::i32); 4473 } 4474 4475 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 4476 const ARMSubtarget *ST) const { 4477 if (!ST->hasVFP3()) 4478 return SDValue(); 4479 4480 bool IsDouble = Op.getValueType() == MVT::f64; 4481 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 4482 4483 // Try splatting with a VMOV.f32... 4484 APFloat FPVal = CFP->getValueAPF(); 4485 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); 4486 4487 if (ImmVal != -1) { 4488 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { 4489 // We have code in place to select a valid ConstantFP already, no need to 4490 // do any mangling. 4491 return Op; 4492 } 4493 4494 // It's a float and we are trying to use NEON operations where 4495 // possible. Lower it to a splat followed by an extract. 4496 SDLoc DL(Op); 4497 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); 4498 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 4499 NewVal); 4500 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 4501 DAG.getConstant(0, MVT::i32)); 4502 } 4503 4504 // The rest of our options are NEON only, make sure that's allowed before 4505 // proceeding.. 4506 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) 4507 return SDValue(); 4508 4509 EVT VMovVT; 4510 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); 4511 4512 // It wouldn't really be worth bothering for doubles except for one very 4513 // important value, which does happen to match: 0.0. So make sure we don't do 4514 // anything stupid. 4515 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) 4516 return SDValue(); 4517 4518 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). 4519 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT, 4520 false, VMOVModImm); 4521 if (NewVal != SDValue()) { 4522 SDLoc DL(Op); 4523 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 4524 NewVal); 4525 if (IsDouble) 4526 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 4527 4528 // It's a float: cast and extract a vector element. 4529 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4530 VecConstant); 4531 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4532 DAG.getConstant(0, MVT::i32)); 4533 } 4534 4535 // Finally, try a VMVN.i32 4536 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT, 4537 false, VMVNModImm); 4538 if (NewVal != SDValue()) { 4539 SDLoc DL(Op); 4540 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 4541 4542 if (IsDouble) 4543 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); 4544 4545 // It's a float: cast and extract a vector element. 4546 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4547 VecConstant); 4548 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4549 DAG.getConstant(0, MVT::i32)); 4550 } 4551 4552 return SDValue(); 4553 } 4554 4555 // check if an VEXT instruction can handle the shuffle mask when the 4556 // vector sources of the shuffle are the same. 4557 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { 4558 unsigned NumElts = VT.getVectorNumElements(); 4559 4560 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4561 if (M[0] < 0) 4562 return false; 4563 4564 Imm = M[0]; 4565 4566 // If this is a VEXT shuffle, the immediate value is the index of the first 4567 // element. The other shuffle indices must be the successive elements after 4568 // the first one. 4569 unsigned ExpectedElt = Imm; 4570 for (unsigned i = 1; i < NumElts; ++i) { 4571 // Increment the expected index. If it wraps around, just follow it 4572 // back to index zero and keep going. 4573 ++ExpectedElt; 4574 if (ExpectedElt == NumElts) 4575 ExpectedElt = 0; 4576 4577 if (M[i] < 0) continue; // ignore UNDEF indices 4578 if (ExpectedElt != static_cast<unsigned>(M[i])) 4579 return false; 4580 } 4581 4582 return true; 4583 } 4584 4585 4586 static bool isVEXTMask(ArrayRef<int> M, EVT VT, 4587 bool &ReverseVEXT, unsigned &Imm) { 4588 unsigned NumElts = VT.getVectorNumElements(); 4589 ReverseVEXT = false; 4590 4591 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4592 if (M[0] < 0) 4593 return false; 4594 4595 Imm = M[0]; 4596 4597 // If this is a VEXT shuffle, the immediate value is the index of the first 4598 // element. The other shuffle indices must be the successive elements after 4599 // the first one. 4600 unsigned ExpectedElt = Imm; 4601 for (unsigned i = 1; i < NumElts; ++i) { 4602 // Increment the expected index. If it wraps around, it may still be 4603 // a VEXT but the source vectors must be swapped. 4604 ExpectedElt += 1; 4605 if (ExpectedElt == NumElts * 2) { 4606 ExpectedElt = 0; 4607 ReverseVEXT = true; 4608 } 4609 4610 if (M[i] < 0) continue; // ignore UNDEF indices 4611 if (ExpectedElt != static_cast<unsigned>(M[i])) 4612 return false; 4613 } 4614 4615 // Adjust the index value if the source operands will be swapped. 4616 if (ReverseVEXT) 4617 Imm -= NumElts; 4618 4619 return true; 4620 } 4621 4622 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 4623 /// instruction with the specified blocksize. (The order of the elements 4624 /// within each block of the vector is reversed.) 4625 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 4626 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 4627 "Only possible block sizes for VREV are: 16, 32, 64"); 4628 4629 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4630 if (EltSz == 64) 4631 return false; 4632 4633 unsigned NumElts = VT.getVectorNumElements(); 4634 unsigned BlockElts = M[0] + 1; 4635 // If the first shuffle index is UNDEF, be optimistic. 4636 if (M[0] < 0) 4637 BlockElts = BlockSize / EltSz; 4638 4639 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 4640 return false; 4641 4642 for (unsigned i = 0; i < NumElts; ++i) { 4643 if (M[i] < 0) continue; // ignore UNDEF indices 4644 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 4645 return false; 4646 } 4647 4648 return true; 4649 } 4650 4651 static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 4652 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 4653 // range, then 0 is placed into the resulting vector. So pretty much any mask 4654 // of 8 elements can work here. 4655 return VT == MVT::v8i8 && M.size() == 8; 4656 } 4657 4658 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4659 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4660 if (EltSz == 64) 4661 return false; 4662 4663 unsigned NumElts = VT.getVectorNumElements(); 4664 WhichResult = (M[0] == 0 ? 0 : 1); 4665 for (unsigned i = 0; i < NumElts; i += 2) { 4666 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4667 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 4668 return false; 4669 } 4670 return true; 4671 } 4672 4673 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 4674 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4675 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 4676 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4677 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4678 if (EltSz == 64) 4679 return false; 4680 4681 unsigned NumElts = VT.getVectorNumElements(); 4682 WhichResult = (M[0] == 0 ? 0 : 1); 4683 for (unsigned i = 0; i < NumElts; i += 2) { 4684 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4685 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 4686 return false; 4687 } 4688 return true; 4689 } 4690 4691 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4692 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4693 if (EltSz == 64) 4694 return false; 4695 4696 unsigned NumElts = VT.getVectorNumElements(); 4697 WhichResult = (M[0] == 0 ? 0 : 1); 4698 for (unsigned i = 0; i != NumElts; ++i) { 4699 if (M[i] < 0) continue; // ignore UNDEF indices 4700 if ((unsigned) M[i] != 2 * i + WhichResult) 4701 return false; 4702 } 4703 4704 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4705 if (VT.is64BitVector() && EltSz == 32) 4706 return false; 4707 4708 return true; 4709 } 4710 4711 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 4712 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4713 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 4714 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4715 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4716 if (EltSz == 64) 4717 return false; 4718 4719 unsigned Half = VT.getVectorNumElements() / 2; 4720 WhichResult = (M[0] == 0 ? 0 : 1); 4721 for (unsigned j = 0; j != 2; ++j) { 4722 unsigned Idx = WhichResult; 4723 for (unsigned i = 0; i != Half; ++i) { 4724 int MIdx = M[i + j * Half]; 4725 if (MIdx >= 0 && (unsigned) MIdx != Idx) 4726 return false; 4727 Idx += 2; 4728 } 4729 } 4730 4731 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4732 if (VT.is64BitVector() && EltSz == 32) 4733 return false; 4734 4735 return true; 4736 } 4737 4738 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4739 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4740 if (EltSz == 64) 4741 return false; 4742 4743 unsigned NumElts = VT.getVectorNumElements(); 4744 WhichResult = (M[0] == 0 ? 0 : 1); 4745 unsigned Idx = WhichResult * NumElts / 2; 4746 for (unsigned i = 0; i != NumElts; i += 2) { 4747 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4748 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 4749 return false; 4750 Idx += 1; 4751 } 4752 4753 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4754 if (VT.is64BitVector() && EltSz == 32) 4755 return false; 4756 4757 return true; 4758 } 4759 4760 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 4761 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4762 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 4763 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4764 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4765 if (EltSz == 64) 4766 return false; 4767 4768 unsigned NumElts = VT.getVectorNumElements(); 4769 WhichResult = (M[0] == 0 ? 0 : 1); 4770 unsigned Idx = WhichResult * NumElts / 2; 4771 for (unsigned i = 0; i != NumElts; i += 2) { 4772 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4773 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 4774 return false; 4775 Idx += 1; 4776 } 4777 4778 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4779 if (VT.is64BitVector() && EltSz == 32) 4780 return false; 4781 4782 return true; 4783 } 4784 4785 /// \return true if this is a reverse operation on an vector. 4786 static bool isReverseMask(ArrayRef<int> M, EVT VT) { 4787 unsigned NumElts = VT.getVectorNumElements(); 4788 // Make sure the mask has the right size. 4789 if (NumElts != M.size()) 4790 return false; 4791 4792 // Look for <15, ..., 3, -1, 1, 0>. 4793 for (unsigned i = 0; i != NumElts; ++i) 4794 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) 4795 return false; 4796 4797 return true; 4798 } 4799 4800 // If N is an integer constant that can be moved into a register in one 4801 // instruction, return an SDValue of such a constant (will become a MOV 4802 // instruction). Otherwise return null. 4803 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 4804 const ARMSubtarget *ST, SDLoc dl) { 4805 uint64_t Val; 4806 if (!isa<ConstantSDNode>(N)) 4807 return SDValue(); 4808 Val = cast<ConstantSDNode>(N)->getZExtValue(); 4809 4810 if (ST->isThumb1Only()) { 4811 if (Val <= 255 || ~Val <= 255) 4812 return DAG.getConstant(Val, MVT::i32); 4813 } else { 4814 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 4815 return DAG.getConstant(Val, MVT::i32); 4816 } 4817 return SDValue(); 4818 } 4819 4820 // If this is a case we can't handle, return null and let the default 4821 // expansion code take care of it. 4822 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 4823 const ARMSubtarget *ST) const { 4824 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 4825 SDLoc dl(Op); 4826 EVT VT = Op.getValueType(); 4827 4828 APInt SplatBits, SplatUndef; 4829 unsigned SplatBitSize; 4830 bool HasAnyUndefs; 4831 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 4832 if (SplatBitSize <= 64) { 4833 // Check if an immediate VMOV works. 4834 EVT VmovVT; 4835 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 4836 SplatUndef.getZExtValue(), SplatBitSize, 4837 DAG, VmovVT, VT.is128BitVector(), 4838 VMOVModImm); 4839 if (Val.getNode()) { 4840 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 4841 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4842 } 4843 4844 // Try an immediate VMVN. 4845 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 4846 Val = isNEONModifiedImm(NegatedImm, 4847 SplatUndef.getZExtValue(), SplatBitSize, 4848 DAG, VmovVT, VT.is128BitVector(), 4849 VMVNModImm); 4850 if (Val.getNode()) { 4851 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 4852 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4853 } 4854 4855 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 4856 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 4857 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 4858 if (ImmVal != -1) { 4859 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 4860 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 4861 } 4862 } 4863 } 4864 } 4865 4866 // Scan through the operands to see if only one value is used. 4867 // 4868 // As an optimisation, even if more than one value is used it may be more 4869 // profitable to splat with one value then change some lanes. 4870 // 4871 // Heuristically we decide to do this if the vector has a "dominant" value, 4872 // defined as splatted to more than half of the lanes. 4873 unsigned NumElts = VT.getVectorNumElements(); 4874 bool isOnlyLowElement = true; 4875 bool usesOnlyOneValue = true; 4876 bool hasDominantValue = false; 4877 bool isConstant = true; 4878 4879 // Map of the number of times a particular SDValue appears in the 4880 // element list. 4881 DenseMap<SDValue, unsigned> ValueCounts; 4882 SDValue Value; 4883 for (unsigned i = 0; i < NumElts; ++i) { 4884 SDValue V = Op.getOperand(i); 4885 if (V.getOpcode() == ISD::UNDEF) 4886 continue; 4887 if (i > 0) 4888 isOnlyLowElement = false; 4889 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 4890 isConstant = false; 4891 4892 ValueCounts.insert(std::make_pair(V, 0)); 4893 unsigned &Count = ValueCounts[V]; 4894 4895 // Is this value dominant? (takes up more than half of the lanes) 4896 if (++Count > (NumElts / 2)) { 4897 hasDominantValue = true; 4898 Value = V; 4899 } 4900 } 4901 if (ValueCounts.size() != 1) 4902 usesOnlyOneValue = false; 4903 if (!Value.getNode() && ValueCounts.size() > 0) 4904 Value = ValueCounts.begin()->first; 4905 4906 if (ValueCounts.size() == 0) 4907 return DAG.getUNDEF(VT); 4908 4909 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. 4910 // Keep going if we are hitting this case. 4911 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) 4912 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 4913 4914 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4915 4916 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 4917 // i32 and try again. 4918 if (hasDominantValue && EltSize <= 32) { 4919 if (!isConstant) { 4920 SDValue N; 4921 4922 // If we are VDUPing a value that comes directly from a vector, that will 4923 // cause an unnecessary move to and from a GPR, where instead we could 4924 // just use VDUPLANE. We can only do this if the lane being extracted 4925 // is at a constant index, as the VDUP from lane instructions only have 4926 // constant-index forms. 4927 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4928 isa<ConstantSDNode>(Value->getOperand(1))) { 4929 // We need to create a new undef vector to use for the VDUPLANE if the 4930 // size of the vector from which we get the value is different than the 4931 // size of the vector that we need to create. We will insert the element 4932 // such that the register coalescer will remove unnecessary copies. 4933 if (VT != Value->getOperand(0).getValueType()) { 4934 ConstantSDNode *constIndex; 4935 constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); 4936 assert(constIndex && "The index is not a constant!"); 4937 unsigned index = constIndex->getAPIntValue().getLimitedValue() % 4938 VT.getVectorNumElements(); 4939 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4940 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), 4941 Value, DAG.getConstant(index, MVT::i32)), 4942 DAG.getConstant(index, MVT::i32)); 4943 } else 4944 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4945 Value->getOperand(0), Value->getOperand(1)); 4946 } else 4947 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); 4948 4949 if (!usesOnlyOneValue) { 4950 // The dominant value was splatted as 'N', but we now have to insert 4951 // all differing elements. 4952 for (unsigned I = 0; I < NumElts; ++I) { 4953 if (Op.getOperand(I) == Value) 4954 continue; 4955 SmallVector<SDValue, 3> Ops; 4956 Ops.push_back(N); 4957 Ops.push_back(Op.getOperand(I)); 4958 Ops.push_back(DAG.getConstant(I, MVT::i32)); 4959 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); 4960 } 4961 } 4962 return N; 4963 } 4964 if (VT.getVectorElementType().isFloatingPoint()) { 4965 SmallVector<SDValue, 8> Ops; 4966 for (unsigned i = 0; i < NumElts; ++i) 4967 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4968 Op.getOperand(i))); 4969 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 4970 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 4971 Val = LowerBUILD_VECTOR(Val, DAG, ST); 4972 if (Val.getNode()) 4973 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4974 } 4975 if (usesOnlyOneValue) { 4976 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 4977 if (isConstant && Val.getNode()) 4978 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 4979 } 4980 } 4981 4982 // If all elements are constants and the case above didn't get hit, fall back 4983 // to the default expansion, which will generate a load from the constant 4984 // pool. 4985 if (isConstant) 4986 return SDValue(); 4987 4988 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4989 if (NumElts >= 4) { 4990 SDValue shuffle = ReconstructShuffle(Op, DAG); 4991 if (shuffle != SDValue()) 4992 return shuffle; 4993 } 4994 4995 // Vectors with 32- or 64-bit elements can be built by directly assigning 4996 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4997 // will be legalized. 4998 if (EltSize >= 32) { 4999 // Do the expansion with floating-point types, since that is what the VFP 5000 // registers are defined to use, and since i64 is not legal. 5001 EVT EltVT = EVT::getFloatingPointVT(EltSize); 5002 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 5003 SmallVector<SDValue, 8> Ops; 5004 for (unsigned i = 0; i < NumElts; ++i) 5005 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 5006 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 5007 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5008 } 5009 5010 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we 5011 // know the default expansion would otherwise fall back on something even 5012 // worse. For a vector with one or two non-undef values, that's 5013 // scalar_to_vector for the elements followed by a shuffle (provided the 5014 // shuffle is valid for the target) and materialization element by element 5015 // on the stack followed by a load for everything else. 5016 if (!isConstant && !usesOnlyOneValue) { 5017 SDValue Vec = DAG.getUNDEF(VT); 5018 for (unsigned i = 0 ; i < NumElts; ++i) { 5019 SDValue V = Op.getOperand(i); 5020 if (V.getOpcode() == ISD::UNDEF) 5021 continue; 5022 SDValue LaneIdx = DAG.getConstant(i, MVT::i32); 5023 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); 5024 } 5025 return Vec; 5026 } 5027 5028 return SDValue(); 5029 } 5030 5031 // Gather data to see if the operation can be modelled as a 5032 // shuffle in combination with VEXTs. 5033 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 5034 SelectionDAG &DAG) const { 5035 SDLoc dl(Op); 5036 EVT VT = Op.getValueType(); 5037 unsigned NumElts = VT.getVectorNumElements(); 5038 5039 SmallVector<SDValue, 2> SourceVecs; 5040 SmallVector<unsigned, 2> MinElts; 5041 SmallVector<unsigned, 2> MaxElts; 5042 5043 for (unsigned i = 0; i < NumElts; ++i) { 5044 SDValue V = Op.getOperand(i); 5045 if (V.getOpcode() == ISD::UNDEF) 5046 continue; 5047 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 5048 // A shuffle can only come from building a vector from various 5049 // elements of other vectors. 5050 return SDValue(); 5051 } else if (V.getOperand(0).getValueType().getVectorElementType() != 5052 VT.getVectorElementType()) { 5053 // This code doesn't know how to handle shuffles where the vector 5054 // element types do not match (this happens because type legalization 5055 // promotes the return type of EXTRACT_VECTOR_ELT). 5056 // FIXME: It might be appropriate to extend this code to handle 5057 // mismatched types. 5058 return SDValue(); 5059 } 5060 5061 // Record this extraction against the appropriate vector if possible... 5062 SDValue SourceVec = V.getOperand(0); 5063 // If the element number isn't a constant, we can't effectively 5064 // analyze what's going on. 5065 if (!isa<ConstantSDNode>(V.getOperand(1))) 5066 return SDValue(); 5067 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 5068 bool FoundSource = false; 5069 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 5070 if (SourceVecs[j] == SourceVec) { 5071 if (MinElts[j] > EltNo) 5072 MinElts[j] = EltNo; 5073 if (MaxElts[j] < EltNo) 5074 MaxElts[j] = EltNo; 5075 FoundSource = true; 5076 break; 5077 } 5078 } 5079 5080 // Or record a new source if not... 5081 if (!FoundSource) { 5082 SourceVecs.push_back(SourceVec); 5083 MinElts.push_back(EltNo); 5084 MaxElts.push_back(EltNo); 5085 } 5086 } 5087 5088 // Currently only do something sane when at most two source vectors 5089 // involved. 5090 if (SourceVecs.size() > 2) 5091 return SDValue(); 5092 5093 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 5094 int VEXTOffsets[2] = {0, 0}; 5095 5096 // This loop extracts the usage patterns of the source vectors 5097 // and prepares appropriate SDValues for a shuffle if possible. 5098 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 5099 if (SourceVecs[i].getValueType() == VT) { 5100 // No VEXT necessary 5101 ShuffleSrcs[i] = SourceVecs[i]; 5102 VEXTOffsets[i] = 0; 5103 continue; 5104 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 5105 // It probably isn't worth padding out a smaller vector just to 5106 // break it down again in a shuffle. 5107 return SDValue(); 5108 } 5109 5110 // Since only 64-bit and 128-bit vectors are legal on ARM and 5111 // we've eliminated the other cases... 5112 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 5113 "unexpected vector sizes in ReconstructShuffle"); 5114 5115 if (MaxElts[i] - MinElts[i] >= NumElts) { 5116 // Span too large for a VEXT to cope 5117 return SDValue(); 5118 } 5119 5120 if (MinElts[i] >= NumElts) { 5121 // The extraction can just take the second half 5122 VEXTOffsets[i] = NumElts; 5123 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5124 SourceVecs[i], 5125 DAG.getIntPtrConstant(NumElts)); 5126 } else if (MaxElts[i] < NumElts) { 5127 // The extraction can just take the first half 5128 VEXTOffsets[i] = 0; 5129 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5130 SourceVecs[i], 5131 DAG.getIntPtrConstant(0)); 5132 } else { 5133 // An actual VEXT is needed 5134 VEXTOffsets[i] = MinElts[i]; 5135 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5136 SourceVecs[i], 5137 DAG.getIntPtrConstant(0)); 5138 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 5139 SourceVecs[i], 5140 DAG.getIntPtrConstant(NumElts)); 5141 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 5142 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 5143 } 5144 } 5145 5146 SmallVector<int, 8> Mask; 5147 5148 for (unsigned i = 0; i < NumElts; ++i) { 5149 SDValue Entry = Op.getOperand(i); 5150 if (Entry.getOpcode() == ISD::UNDEF) { 5151 Mask.push_back(-1); 5152 continue; 5153 } 5154 5155 SDValue ExtractVec = Entry.getOperand(0); 5156 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 5157 .getOperand(1))->getSExtValue(); 5158 if (ExtractVec == SourceVecs[0]) { 5159 Mask.push_back(ExtractElt - VEXTOffsets[0]); 5160 } else { 5161 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 5162 } 5163 } 5164 5165 // Final check before we try to produce nonsense... 5166 if (isShuffleMaskLegal(Mask, VT)) 5167 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 5168 &Mask[0]); 5169 5170 return SDValue(); 5171 } 5172 5173 /// isShuffleMaskLegal - Targets can use this to indicate that they only 5174 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 5175 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 5176 /// are assumed to be legal. 5177 bool 5178 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 5179 EVT VT) const { 5180 if (VT.getVectorNumElements() == 4 && 5181 (VT.is128BitVector() || VT.is64BitVector())) { 5182 unsigned PFIndexes[4]; 5183 for (unsigned i = 0; i != 4; ++i) { 5184 if (M[i] < 0) 5185 PFIndexes[i] = 8; 5186 else 5187 PFIndexes[i] = M[i]; 5188 } 5189 5190 // Compute the index in the perfect shuffle table. 5191 unsigned PFTableIndex = 5192 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5193 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5194 unsigned Cost = (PFEntry >> 30); 5195 5196 if (Cost <= 4) 5197 return true; 5198 } 5199 5200 bool ReverseVEXT; 5201 unsigned Imm, WhichResult; 5202 5203 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5204 return (EltSize >= 32 || 5205 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 5206 isVREVMask(M, VT, 64) || 5207 isVREVMask(M, VT, 32) || 5208 isVREVMask(M, VT, 16) || 5209 isVEXTMask(M, VT, ReverseVEXT, Imm) || 5210 isVTBLMask(M, VT) || 5211 isVTRNMask(M, VT, WhichResult) || 5212 isVUZPMask(M, VT, WhichResult) || 5213 isVZIPMask(M, VT, WhichResult) || 5214 isVTRN_v_undef_Mask(M, VT, WhichResult) || 5215 isVUZP_v_undef_Mask(M, VT, WhichResult) || 5216 isVZIP_v_undef_Mask(M, VT, WhichResult) || 5217 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); 5218 } 5219 5220 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5221 /// the specified operations to build the shuffle. 5222 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5223 SDValue RHS, SelectionDAG &DAG, 5224 SDLoc dl) { 5225 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5226 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5227 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5228 5229 enum { 5230 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5231 OP_VREV, 5232 OP_VDUP0, 5233 OP_VDUP1, 5234 OP_VDUP2, 5235 OP_VDUP3, 5236 OP_VEXT1, 5237 OP_VEXT2, 5238 OP_VEXT3, 5239 OP_VUZPL, // VUZP, left result 5240 OP_VUZPR, // VUZP, right result 5241 OP_VZIPL, // VZIP, left result 5242 OP_VZIPR, // VZIP, right result 5243 OP_VTRNL, // VTRN, left result 5244 OP_VTRNR // VTRN, right result 5245 }; 5246 5247 if (OpNum == OP_COPY) { 5248 if (LHSID == (1*9+2)*9+3) return LHS; 5249 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5250 return RHS; 5251 } 5252 5253 SDValue OpLHS, OpRHS; 5254 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5255 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5256 EVT VT = OpLHS.getValueType(); 5257 5258 switch (OpNum) { 5259 default: llvm_unreachable("Unknown shuffle opcode!"); 5260 case OP_VREV: 5261 // VREV divides the vector in half and swaps within the half. 5262 if (VT.getVectorElementType() == MVT::i32 || 5263 VT.getVectorElementType() == MVT::f32) 5264 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 5265 // vrev <4 x i16> -> VREV32 5266 if (VT.getVectorElementType() == MVT::i16) 5267 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 5268 // vrev <4 x i8> -> VREV16 5269 assert(VT.getVectorElementType() == MVT::i8); 5270 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 5271 case OP_VDUP0: 5272 case OP_VDUP1: 5273 case OP_VDUP2: 5274 case OP_VDUP3: 5275 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 5276 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 5277 case OP_VEXT1: 5278 case OP_VEXT2: 5279 case OP_VEXT3: 5280 return DAG.getNode(ARMISD::VEXT, dl, VT, 5281 OpLHS, OpRHS, 5282 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 5283 case OP_VUZPL: 5284 case OP_VUZPR: 5285 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5286 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 5287 case OP_VZIPL: 5288 case OP_VZIPR: 5289 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5290 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 5291 case OP_VTRNL: 5292 case OP_VTRNR: 5293 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5294 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 5295 } 5296 } 5297 5298 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 5299 ArrayRef<int> ShuffleMask, 5300 SelectionDAG &DAG) { 5301 // Check to see if we can use the VTBL instruction. 5302 SDValue V1 = Op.getOperand(0); 5303 SDValue V2 = Op.getOperand(1); 5304 SDLoc DL(Op); 5305 5306 SmallVector<SDValue, 8> VTBLMask; 5307 for (ArrayRef<int>::iterator 5308 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 5309 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 5310 5311 if (V2.getNode()->getOpcode() == ISD::UNDEF) 5312 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 5313 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 5314 &VTBLMask[0], 8)); 5315 5316 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 5317 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 5318 &VTBLMask[0], 8)); 5319 } 5320 5321 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, 5322 SelectionDAG &DAG) { 5323 SDLoc DL(Op); 5324 SDValue OpLHS = Op.getOperand(0); 5325 EVT VT = OpLHS.getValueType(); 5326 5327 assert((VT == MVT::v8i16 || VT == MVT::v16i8) && 5328 "Expect an v8i16/v16i8 type"); 5329 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); 5330 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, 5331 // extract the first 8 bytes into the top double word and the last 8 bytes 5332 // into the bottom double word. The v8i16 case is similar. 5333 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; 5334 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, 5335 DAG.getConstant(ExtractNum, MVT::i32)); 5336 } 5337 5338 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 5339 SDValue V1 = Op.getOperand(0); 5340 SDValue V2 = Op.getOperand(1); 5341 SDLoc dl(Op); 5342 EVT VT = Op.getValueType(); 5343 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 5344 5345 // Convert shuffles that are directly supported on NEON to target-specific 5346 // DAG nodes, instead of keeping them as shuffles and matching them again 5347 // during code selection. This is more efficient and avoids the possibility 5348 // of inconsistencies between legalization and selection. 5349 // FIXME: floating-point vectors should be canonicalized to integer vectors 5350 // of the same time so that they get CSEd properly. 5351 ArrayRef<int> ShuffleMask = SVN->getMask(); 5352 5353 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5354 if (EltSize <= 32) { 5355 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 5356 int Lane = SVN->getSplatIndex(); 5357 // If this is undef splat, generate it via "just" vdup, if possible. 5358 if (Lane == -1) Lane = 0; 5359 5360 // Test if V1 is a SCALAR_TO_VECTOR. 5361 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5362 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5363 } 5364 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 5365 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 5366 // reaches it). 5367 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 5368 !isa<ConstantSDNode>(V1.getOperand(0))) { 5369 bool IsScalarToVector = true; 5370 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 5371 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 5372 IsScalarToVector = false; 5373 break; 5374 } 5375 if (IsScalarToVector) 5376 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5377 } 5378 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 5379 DAG.getConstant(Lane, MVT::i32)); 5380 } 5381 5382 bool ReverseVEXT; 5383 unsigned Imm; 5384 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 5385 if (ReverseVEXT) 5386 std::swap(V1, V2); 5387 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 5388 DAG.getConstant(Imm, MVT::i32)); 5389 } 5390 5391 if (isVREVMask(ShuffleMask, VT, 64)) 5392 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 5393 if (isVREVMask(ShuffleMask, VT, 32)) 5394 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 5395 if (isVREVMask(ShuffleMask, VT, 16)) 5396 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 5397 5398 if (V2->getOpcode() == ISD::UNDEF && 5399 isSingletonVEXTMask(ShuffleMask, VT, Imm)) { 5400 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, 5401 DAG.getConstant(Imm, MVT::i32)); 5402 } 5403 5404 // Check for Neon shuffles that modify both input vectors in place. 5405 // If both results are used, i.e., if there are two shuffles with the same 5406 // source operands and with masks corresponding to both results of one of 5407 // these operations, DAG memoization will ensure that a single node is 5408 // used for both shuffles. 5409 unsigned WhichResult; 5410 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 5411 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5412 V1, V2).getValue(WhichResult); 5413 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 5414 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5415 V1, V2).getValue(WhichResult); 5416 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 5417 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5418 V1, V2).getValue(WhichResult); 5419 5420 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5421 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5422 V1, V1).getValue(WhichResult); 5423 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5424 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5425 V1, V1).getValue(WhichResult); 5426 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5427 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5428 V1, V1).getValue(WhichResult); 5429 } 5430 5431 // If the shuffle is not directly supported and it has 4 elements, use 5432 // the PerfectShuffle-generated table to synthesize it from other shuffles. 5433 unsigned NumElts = VT.getVectorNumElements(); 5434 if (NumElts == 4) { 5435 unsigned PFIndexes[4]; 5436 for (unsigned i = 0; i != 4; ++i) { 5437 if (ShuffleMask[i] < 0) 5438 PFIndexes[i] = 8; 5439 else 5440 PFIndexes[i] = ShuffleMask[i]; 5441 } 5442 5443 // Compute the index in the perfect shuffle table. 5444 unsigned PFTableIndex = 5445 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5446 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5447 unsigned Cost = (PFEntry >> 30); 5448 5449 if (Cost <= 4) 5450 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5451 } 5452 5453 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 5454 if (EltSize >= 32) { 5455 // Do the expansion with floating-point types, since that is what the VFP 5456 // registers are defined to use, and since i64 is not legal. 5457 EVT EltVT = EVT::getFloatingPointVT(EltSize); 5458 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 5459 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 5460 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 5461 SmallVector<SDValue, 8> Ops; 5462 for (unsigned i = 0; i < NumElts; ++i) { 5463 if (ShuffleMask[i] < 0) 5464 Ops.push_back(DAG.getUNDEF(EltVT)); 5465 else 5466 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 5467 ShuffleMask[i] < (int)NumElts ? V1 : V2, 5468 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 5469 MVT::i32))); 5470 } 5471 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 5472 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5473 } 5474 5475 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) 5476 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); 5477 5478 if (VT == MVT::v8i8) { 5479 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 5480 if (NewOp.getNode()) 5481 return NewOp; 5482 } 5483 5484 return SDValue(); 5485 } 5486 5487 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5488 // INSERT_VECTOR_ELT is legal only for immediate indexes. 5489 SDValue Lane = Op.getOperand(2); 5490 if (!isa<ConstantSDNode>(Lane)) 5491 return SDValue(); 5492 5493 return Op; 5494 } 5495 5496 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5497 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 5498 SDValue Lane = Op.getOperand(1); 5499 if (!isa<ConstantSDNode>(Lane)) 5500 return SDValue(); 5501 5502 SDValue Vec = Op.getOperand(0); 5503 if (Op.getValueType() == MVT::i32 && 5504 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 5505 SDLoc dl(Op); 5506 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 5507 } 5508 5509 return Op; 5510 } 5511 5512 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 5513 // The only time a CONCAT_VECTORS operation can have legal types is when 5514 // two 64-bit vectors are concatenated to a 128-bit vector. 5515 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 5516 "unexpected CONCAT_VECTORS"); 5517 SDLoc dl(Op); 5518 SDValue Val = DAG.getUNDEF(MVT::v2f64); 5519 SDValue Op0 = Op.getOperand(0); 5520 SDValue Op1 = Op.getOperand(1); 5521 if (Op0.getOpcode() != ISD::UNDEF) 5522 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5523 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 5524 DAG.getIntPtrConstant(0)); 5525 if (Op1.getOpcode() != ISD::UNDEF) 5526 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5527 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 5528 DAG.getIntPtrConstant(1)); 5529 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 5530 } 5531 5532 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 5533 /// element has been zero/sign-extended, depending on the isSigned parameter, 5534 /// from an integer type half its size. 5535 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 5536 bool isSigned) { 5537 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 5538 EVT VT = N->getValueType(0); 5539 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 5540 SDNode *BVN = N->getOperand(0).getNode(); 5541 if (BVN->getValueType(0) != MVT::v4i32 || 5542 BVN->getOpcode() != ISD::BUILD_VECTOR) 5543 return false; 5544 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5545 unsigned HiElt = 1 - LoElt; 5546 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 5547 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 5548 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 5549 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 5550 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 5551 return false; 5552 if (isSigned) { 5553 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 5554 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 5555 return true; 5556 } else { 5557 if (Hi0->isNullValue() && Hi1->isNullValue()) 5558 return true; 5559 } 5560 return false; 5561 } 5562 5563 if (N->getOpcode() != ISD::BUILD_VECTOR) 5564 return false; 5565 5566 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 5567 SDNode *Elt = N->getOperand(i).getNode(); 5568 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 5569 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5570 unsigned HalfSize = EltSize / 2; 5571 if (isSigned) { 5572 if (!isIntN(HalfSize, C->getSExtValue())) 5573 return false; 5574 } else { 5575 if (!isUIntN(HalfSize, C->getZExtValue())) 5576 return false; 5577 } 5578 continue; 5579 } 5580 return false; 5581 } 5582 5583 return true; 5584 } 5585 5586 /// isSignExtended - Check if a node is a vector value that is sign-extended 5587 /// or a constant BUILD_VECTOR with sign-extended elements. 5588 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 5589 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 5590 return true; 5591 if (isExtendedBUILD_VECTOR(N, DAG, true)) 5592 return true; 5593 return false; 5594 } 5595 5596 /// isZeroExtended - Check if a node is a vector value that is zero-extended 5597 /// or a constant BUILD_VECTOR with zero-extended elements. 5598 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 5599 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 5600 return true; 5601 if (isExtendedBUILD_VECTOR(N, DAG, false)) 5602 return true; 5603 return false; 5604 } 5605 5606 static EVT getExtensionTo64Bits(const EVT &OrigVT) { 5607 if (OrigVT.getSizeInBits() >= 64) 5608 return OrigVT; 5609 5610 assert(OrigVT.isSimple() && "Expecting a simple value type"); 5611 5612 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; 5613 switch (OrigSimpleTy) { 5614 default: llvm_unreachable("Unexpected Vector Type"); 5615 case MVT::v2i8: 5616 case MVT::v2i16: 5617 return MVT::v2i32; 5618 case MVT::v4i8: 5619 return MVT::v4i16; 5620 } 5621 } 5622 5623 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total 5624 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. 5625 /// We insert the required extension here to get the vector to fill a D register. 5626 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, 5627 const EVT &OrigTy, 5628 const EVT &ExtTy, 5629 unsigned ExtOpcode) { 5630 // The vector originally had a size of OrigTy. It was then extended to ExtTy. 5631 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than 5632 // 64-bits we need to insert a new extension so that it will be 64-bits. 5633 assert(ExtTy.is128BitVector() && "Unexpected extension size"); 5634 if (OrigTy.getSizeInBits() >= 64) 5635 return N; 5636 5637 // Must extend size to at least 64 bits to be used as an operand for VMULL. 5638 EVT NewVT = getExtensionTo64Bits(OrigTy); 5639 5640 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); 5641 } 5642 5643 /// SkipLoadExtensionForVMULL - return a load of the original vector size that 5644 /// does not do any sign/zero extension. If the original vector is less 5645 /// than 64 bits, an appropriate extension will be added after the load to 5646 /// reach a total size of 64 bits. We have to add the extension separately 5647 /// because ARM does not have a sign/zero extending load for vectors. 5648 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { 5649 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); 5650 5651 // The load already has the right type. 5652 if (ExtendedTy == LD->getMemoryVT()) 5653 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), 5654 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 5655 LD->isNonTemporal(), LD->isInvariant(), 5656 LD->getAlignment()); 5657 5658 // We need to create a zextload/sextload. We cannot just create a load 5659 // followed by a zext/zext node because LowerMUL is also run during normal 5660 // operation legalization where we can't create illegal types. 5661 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, 5662 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), 5663 LD->getMemoryVT(), LD->isVolatile(), 5664 LD->isNonTemporal(), LD->getAlignment()); 5665 } 5666 5667 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, 5668 /// extending load, or BUILD_VECTOR with extended elements, return the 5669 /// unextended value. The unextended vector should be 64 bits so that it can 5670 /// be used as an operand to a VMULL instruction. If the original vector size 5671 /// before extension is less than 64 bits we add a an extension to resize 5672 /// the vector to 64 bits. 5673 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { 5674 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 5675 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, 5676 N->getOperand(0)->getValueType(0), 5677 N->getValueType(0), 5678 N->getOpcode()); 5679 5680 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 5681 return SkipLoadExtensionForVMULL(LD, DAG); 5682 5683 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 5684 // have been legalized as a BITCAST from v4i32. 5685 if (N->getOpcode() == ISD::BITCAST) { 5686 SDNode *BVN = N->getOperand(0).getNode(); 5687 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 5688 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 5689 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5690 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, 5691 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 5692 } 5693 // Construct a new BUILD_VECTOR with elements truncated to half the size. 5694 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 5695 EVT VT = N->getValueType(0); 5696 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 5697 unsigned NumElts = VT.getVectorNumElements(); 5698 MVT TruncVT = MVT::getIntegerVT(EltSize); 5699 SmallVector<SDValue, 8> Ops; 5700 for (unsigned i = 0; i != NumElts; ++i) { 5701 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 5702 const APInt &CInt = C->getAPIntValue(); 5703 // Element types smaller than 32 bits are not legal, so use i32 elements. 5704 // The values are implicitly truncated so sext vs. zext doesn't matter. 5705 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); 5706 } 5707 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), 5708 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 5709 } 5710 5711 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 5712 unsigned Opcode = N->getOpcode(); 5713 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5714 SDNode *N0 = N->getOperand(0).getNode(); 5715 SDNode *N1 = N->getOperand(1).getNode(); 5716 return N0->hasOneUse() && N1->hasOneUse() && 5717 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 5718 } 5719 return false; 5720 } 5721 5722 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 5723 unsigned Opcode = N->getOpcode(); 5724 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5725 SDNode *N0 = N->getOperand(0).getNode(); 5726 SDNode *N1 = N->getOperand(1).getNode(); 5727 return N0->hasOneUse() && N1->hasOneUse() && 5728 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 5729 } 5730 return false; 5731 } 5732 5733 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 5734 // Multiplications are only custom-lowered for 128-bit vectors so that 5735 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 5736 EVT VT = Op.getValueType(); 5737 assert(VT.is128BitVector() && VT.isInteger() && 5738 "unexpected type for custom-lowering ISD::MUL"); 5739 SDNode *N0 = Op.getOperand(0).getNode(); 5740 SDNode *N1 = Op.getOperand(1).getNode(); 5741 unsigned NewOpc = 0; 5742 bool isMLA = false; 5743 bool isN0SExt = isSignExtended(N0, DAG); 5744 bool isN1SExt = isSignExtended(N1, DAG); 5745 if (isN0SExt && isN1SExt) 5746 NewOpc = ARMISD::VMULLs; 5747 else { 5748 bool isN0ZExt = isZeroExtended(N0, DAG); 5749 bool isN1ZExt = isZeroExtended(N1, DAG); 5750 if (isN0ZExt && isN1ZExt) 5751 NewOpc = ARMISD::VMULLu; 5752 else if (isN1SExt || isN1ZExt) { 5753 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 5754 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 5755 if (isN1SExt && isAddSubSExt(N0, DAG)) { 5756 NewOpc = ARMISD::VMULLs; 5757 isMLA = true; 5758 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 5759 NewOpc = ARMISD::VMULLu; 5760 isMLA = true; 5761 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 5762 std::swap(N0, N1); 5763 NewOpc = ARMISD::VMULLu; 5764 isMLA = true; 5765 } 5766 } 5767 5768 if (!NewOpc) { 5769 if (VT == MVT::v2i64) 5770 // Fall through to expand this. It is not legal. 5771 return SDValue(); 5772 else 5773 // Other vector multiplications are legal. 5774 return Op; 5775 } 5776 } 5777 5778 // Legalize to a VMULL instruction. 5779 SDLoc DL(Op); 5780 SDValue Op0; 5781 SDValue Op1 = SkipExtensionForVMULL(N1, DAG); 5782 if (!isMLA) { 5783 Op0 = SkipExtensionForVMULL(N0, DAG); 5784 assert(Op0.getValueType().is64BitVector() && 5785 Op1.getValueType().is64BitVector() && 5786 "unexpected types for extended operands to VMULL"); 5787 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 5788 } 5789 5790 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 5791 // isel lowering to take advantage of no-stall back to back vmul + vmla. 5792 // vmull q0, d4, d6 5793 // vmlal q0, d5, d6 5794 // is faster than 5795 // vaddl q0, d4, d5 5796 // vmovl q1, d6 5797 // vmul q0, q0, q1 5798 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); 5799 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); 5800 EVT Op1VT = Op1.getValueType(); 5801 return DAG.getNode(N0->getOpcode(), DL, VT, 5802 DAG.getNode(NewOpc, DL, VT, 5803 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 5804 DAG.getNode(NewOpc, DL, VT, 5805 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 5806 } 5807 5808 static SDValue 5809 LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { 5810 // Convert to float 5811 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 5812 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 5813 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 5814 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 5815 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 5816 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 5817 // Get reciprocal estimate. 5818 // float4 recip = vrecpeq_f32(yf); 5819 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5820 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 5821 // Because char has a smaller range than uchar, we can actually get away 5822 // without any newton steps. This requires that we use a weird bias 5823 // of 0xb000, however (again, this has been exhaustively tested). 5824 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 5825 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 5826 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 5827 Y = DAG.getConstant(0xb000, MVT::i32); 5828 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 5829 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 5830 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 5831 // Convert back to short. 5832 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 5833 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 5834 return X; 5835 } 5836 5837 static SDValue 5838 LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { 5839 SDValue N2; 5840 // Convert to float. 5841 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 5842 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 5843 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 5844 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 5845 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5846 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5847 5848 // Use reciprocal estimate and one refinement step. 5849 // float4 recip = vrecpeq_f32(yf); 5850 // recip *= vrecpsq_f32(yf, recip); 5851 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5852 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 5853 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5854 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5855 N1, N2); 5856 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5857 // Because short has a smaller range than ushort, we can actually get away 5858 // with only a single newton step. This requires that we use a weird bias 5859 // of 89, however (again, this has been exhaustively tested). 5860 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 5861 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5862 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5863 N1 = DAG.getConstant(0x89, MVT::i32); 5864 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5865 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5866 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5867 // Convert back to integer and return. 5868 // return vmovn_s32(vcvt_s32_f32(result)); 5869 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5870 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5871 return N0; 5872 } 5873 5874 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 5875 EVT VT = Op.getValueType(); 5876 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5877 "unexpected type for custom-lowering ISD::SDIV"); 5878 5879 SDLoc dl(Op); 5880 SDValue N0 = Op.getOperand(0); 5881 SDValue N1 = Op.getOperand(1); 5882 SDValue N2, N3; 5883 5884 if (VT == MVT::v8i8) { 5885 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 5886 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 5887 5888 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5889 DAG.getIntPtrConstant(4)); 5890 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5891 DAG.getIntPtrConstant(4)); 5892 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5893 DAG.getIntPtrConstant(0)); 5894 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5895 DAG.getIntPtrConstant(0)); 5896 5897 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 5898 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 5899 5900 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5901 N0 = LowerCONCAT_VECTORS(N0, DAG); 5902 5903 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 5904 return N0; 5905 } 5906 return LowerSDIV_v4i16(N0, N1, dl, DAG); 5907 } 5908 5909 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 5910 EVT VT = Op.getValueType(); 5911 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5912 "unexpected type for custom-lowering ISD::UDIV"); 5913 5914 SDLoc dl(Op); 5915 SDValue N0 = Op.getOperand(0); 5916 SDValue N1 = Op.getOperand(1); 5917 SDValue N2, N3; 5918 5919 if (VT == MVT::v8i8) { 5920 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 5921 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 5922 5923 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5924 DAG.getIntPtrConstant(4)); 5925 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5926 DAG.getIntPtrConstant(4)); 5927 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5928 DAG.getIntPtrConstant(0)); 5929 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5930 DAG.getIntPtrConstant(0)); 5931 5932 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 5933 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 5934 5935 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5936 N0 = LowerCONCAT_VECTORS(N0, DAG); 5937 5938 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 5939 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 5940 N0); 5941 return N0; 5942 } 5943 5944 // v4i16 sdiv ... Convert to float. 5945 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 5946 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 5947 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 5948 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 5949 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5950 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5951 5952 // Use reciprocal estimate and two refinement steps. 5953 // float4 recip = vrecpeq_f32(yf); 5954 // recip *= vrecpsq_f32(yf, recip); 5955 // recip *= vrecpsq_f32(yf, recip); 5956 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5957 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 5958 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5959 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5960 BN1, N2); 5961 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5962 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5963 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5964 BN1, N2); 5965 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5966 // Simply multiplying by the reciprocal estimate can leave us a few ulps 5967 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 5968 // and that it will never cause us to return an answer too large). 5969 // float4 result = as_float4(as_int4(xf*recip) + 2); 5970 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5971 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5972 N1 = DAG.getConstant(2, MVT::i32); 5973 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5974 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5975 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5976 // Convert back to integer and return. 5977 // return vmovn_u32(vcvt_s32_f32(result)); 5978 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5979 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5980 return N0; 5981 } 5982 5983 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 5984 EVT VT = Op.getNode()->getValueType(0); 5985 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 5986 5987 unsigned Opc; 5988 bool ExtraOp = false; 5989 switch (Op.getOpcode()) { 5990 default: llvm_unreachable("Invalid code"); 5991 case ISD::ADDC: Opc = ARMISD::ADDC; break; 5992 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 5993 case ISD::SUBC: Opc = ARMISD::SUBC; break; 5994 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 5995 } 5996 5997 if (!ExtraOp) 5998 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 5999 Op.getOperand(1)); 6000 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 6001 Op.getOperand(1), Op.getOperand(2)); 6002 } 6003 6004 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 6005 // Monotonic load/store is legal for all targets 6006 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 6007 return Op; 6008 6009 // Aquire/Release load/store is not legal for targets without a 6010 // dmb or equivalent available. 6011 return SDValue(); 6012 } 6013 6014 static void 6015 ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 6016 SelectionDAG &DAG) { 6017 SDLoc dl(Node); 6018 assert (Node->getValueType(0) == MVT::i64 && 6019 "Only know how to expand i64 atomics"); 6020 AtomicSDNode *AN = cast<AtomicSDNode>(Node); 6021 6022 SmallVector<SDValue, 6> Ops; 6023 Ops.push_back(Node->getOperand(0)); // Chain 6024 Ops.push_back(Node->getOperand(1)); // Ptr 6025 for(unsigned i=2; i<Node->getNumOperands(); i++) { 6026 // Low part 6027 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 6028 Node->getOperand(i), DAG.getIntPtrConstant(0))); 6029 // High part 6030 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 6031 Node->getOperand(i), DAG.getIntPtrConstant(1))); 6032 } 6033 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6034 SDValue Result = 6035 DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), 6036 cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(), 6037 AN->getSynchScope()); 6038 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 6039 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 6040 Results.push_back(Result.getValue(2)); 6041 } 6042 6043 static void ReplaceREADCYCLECOUNTER(SDNode *N, 6044 SmallVectorImpl<SDValue> &Results, 6045 SelectionDAG &DAG, 6046 const ARMSubtarget *Subtarget) { 6047 SDLoc DL(N); 6048 SDValue Cycles32, OutChain; 6049 6050 if (Subtarget->hasPerfMon()) { 6051 // Under Power Management extensions, the cycle-count is: 6052 // mrc p15, #0, <Rt>, c9, c13, #0 6053 SDValue Ops[] = { N->getOperand(0), // Chain 6054 DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), 6055 DAG.getConstant(15, MVT::i32), 6056 DAG.getConstant(0, MVT::i32), 6057 DAG.getConstant(9, MVT::i32), 6058 DAG.getConstant(13, MVT::i32), 6059 DAG.getConstant(0, MVT::i32) 6060 }; 6061 6062 Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, 6063 DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], 6064 array_lengthof(Ops)); 6065 OutChain = Cycles32.getValue(1); 6066 } else { 6067 // Intrinsic is defined to return 0 on unsupported platforms. Technically 6068 // there are older ARM CPUs that have implementation-specific ways of 6069 // obtaining this information (FIXME!). 6070 Cycles32 = DAG.getConstant(0, MVT::i32); 6071 OutChain = DAG.getEntryNode(); 6072 } 6073 6074 6075 SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 6076 Cycles32, DAG.getConstant(0, MVT::i32)); 6077 Results.push_back(Cycles64); 6078 Results.push_back(OutChain); 6079 } 6080 6081 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6082 switch (Op.getOpcode()) { 6083 default: llvm_unreachable("Don't know how to custom lower this!"); 6084 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6085 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 6086 case ISD::GlobalAddress: 6087 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 6088 LowerGlobalAddressELF(Op, DAG); 6089 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6090 case ISD::SELECT: return LowerSELECT(Op, DAG); 6091 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 6092 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 6093 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 6094 case ISD::VASTART: return LowerVASTART(Op, DAG); 6095 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 6096 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 6097 case ISD::SINT_TO_FP: 6098 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 6099 case ISD::FP_TO_SINT: 6100 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 6101 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 6102 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6103 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6104 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 6105 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 6106 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 6107 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 6108 Subtarget); 6109 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 6110 case ISD::SHL: 6111 case ISD::SRL: 6112 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 6113 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 6114 case ISD::SRL_PARTS: 6115 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 6116 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 6117 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); 6118 case ISD::SETCC: return LowerVSETCC(Op, DAG); 6119 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); 6120 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 6121 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6122 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 6123 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 6124 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 6125 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6126 case ISD::MUL: return LowerMUL(Op, DAG); 6127 case ISD::SDIV: return LowerSDIV(Op, DAG); 6128 case ISD::UDIV: return LowerUDIV(Op, DAG); 6129 case ISD::ADDC: 6130 case ISD::ADDE: 6131 case ISD::SUBC: 6132 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 6133 case ISD::ATOMIC_LOAD: 6134 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 6135 case ISD::SDIVREM: 6136 case ISD::UDIVREM: return LowerDivRem(Op, DAG); 6137 } 6138 } 6139 6140 /// ReplaceNodeResults - Replace the results of node with an illegal result 6141 /// type with new values built out of custom code. 6142 void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 6143 SmallVectorImpl<SDValue>&Results, 6144 SelectionDAG &DAG) const { 6145 SDValue Res; 6146 switch (N->getOpcode()) { 6147 default: 6148 llvm_unreachable("Don't know how to custom expand this!"); 6149 case ISD::BITCAST: 6150 Res = ExpandBITCAST(N, DAG); 6151 break; 6152 case ISD::SIGN_EXTEND: 6153 case ISD::ZERO_EXTEND: 6154 Res = ExpandVectorExtension(N, DAG); 6155 break; 6156 case ISD::SRL: 6157 case ISD::SRA: 6158 Res = Expand64BitShift(N, DAG, Subtarget); 6159 break; 6160 case ISD::READCYCLECOUNTER: 6161 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); 6162 return; 6163 case ISD::ATOMIC_STORE: 6164 case ISD::ATOMIC_LOAD: 6165 case ISD::ATOMIC_LOAD_ADD: 6166 case ISD::ATOMIC_LOAD_AND: 6167 case ISD::ATOMIC_LOAD_NAND: 6168 case ISD::ATOMIC_LOAD_OR: 6169 case ISD::ATOMIC_LOAD_SUB: 6170 case ISD::ATOMIC_LOAD_XOR: 6171 case ISD::ATOMIC_SWAP: 6172 case ISD::ATOMIC_CMP_SWAP: 6173 case ISD::ATOMIC_LOAD_MIN: 6174 case ISD::ATOMIC_LOAD_UMIN: 6175 case ISD::ATOMIC_LOAD_MAX: 6176 case ISD::ATOMIC_LOAD_UMAX: 6177 ReplaceATOMIC_OP_64(N, Results, DAG); 6178 return; 6179 } 6180 if (Res.getNode()) 6181 Results.push_back(Res); 6182 } 6183 6184 //===----------------------------------------------------------------------===// 6185 // ARM Scheduler Hooks 6186 //===----------------------------------------------------------------------===// 6187 6188 MachineBasicBlock * 6189 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 6190 MachineBasicBlock *BB, 6191 unsigned Size) const { 6192 unsigned dest = MI->getOperand(0).getReg(); 6193 unsigned ptr = MI->getOperand(1).getReg(); 6194 unsigned oldval = MI->getOperand(2).getReg(); 6195 unsigned newval = MI->getOperand(3).getReg(); 6196 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6197 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); 6198 DebugLoc dl = MI->getDebugLoc(); 6199 bool isThumb2 = Subtarget->isThumb2(); 6200 6201 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6202 unsigned scratch = MRI.createVirtualRegister(isThumb2 ? 6203 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6204 (const TargetRegisterClass*)&ARM::GPRRegClass); 6205 6206 if (isThumb2) { 6207 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6208 MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); 6209 MRI.constrainRegClass(newval, &ARM::rGPRRegClass); 6210 } 6211 6212 unsigned ldrOpc, strOpc; 6213 getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); 6214 6215 MachineFunction *MF = BB->getParent(); 6216 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6217 MachineFunction::iterator It = BB; 6218 ++It; // insert the new blocks after the current block 6219 6220 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 6221 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 6222 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6223 MF->insert(It, loop1MBB); 6224 MF->insert(It, loop2MBB); 6225 MF->insert(It, exitMBB); 6226 6227 // Transfer the remainder of BB and its successor edges to exitMBB. 6228 exitMBB->splice(exitMBB->begin(), BB, 6229 llvm::next(MachineBasicBlock::iterator(MI)), 6230 BB->end()); 6231 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6232 6233 // thisMBB: 6234 // ... 6235 // fallthrough --> loop1MBB 6236 BB->addSuccessor(loop1MBB); 6237 6238 // loop1MBB: 6239 // ldrex dest, [ptr] 6240 // cmp dest, oldval 6241 // bne exitMBB 6242 BB = loop1MBB; 6243 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6244 if (ldrOpc == ARM::t2LDREX) 6245 MIB.addImm(0); 6246 AddDefaultPred(MIB); 6247 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6248 .addReg(dest).addReg(oldval)); 6249 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6250 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6251 BB->addSuccessor(loop2MBB); 6252 BB->addSuccessor(exitMBB); 6253 6254 // loop2MBB: 6255 // strex scratch, newval, [ptr] 6256 // cmp scratch, #0 6257 // bne loop1MBB 6258 BB = loop2MBB; 6259 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 6260 if (strOpc == ARM::t2STREX) 6261 MIB.addImm(0); 6262 AddDefaultPred(MIB); 6263 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6264 .addReg(scratch).addImm(0)); 6265 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6266 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6267 BB->addSuccessor(loop1MBB); 6268 BB->addSuccessor(exitMBB); 6269 6270 // exitMBB: 6271 // ... 6272 BB = exitMBB; 6273 6274 MI->eraseFromParent(); // The instruction is gone now. 6275 6276 return BB; 6277 } 6278 6279 MachineBasicBlock * 6280 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6281 unsigned Size, unsigned BinOpcode) const { 6282 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6283 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6284 6285 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6286 MachineFunction *MF = BB->getParent(); 6287 MachineFunction::iterator It = BB; 6288 ++It; 6289 6290 unsigned dest = MI->getOperand(0).getReg(); 6291 unsigned ptr = MI->getOperand(1).getReg(); 6292 unsigned incr = MI->getOperand(2).getReg(); 6293 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); 6294 DebugLoc dl = MI->getDebugLoc(); 6295 bool isThumb2 = Subtarget->isThumb2(); 6296 6297 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6298 if (isThumb2) { 6299 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6300 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6301 MRI.constrainRegClass(incr, &ARM::rGPRRegClass); 6302 } 6303 6304 unsigned ldrOpc, strOpc; 6305 getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); 6306 6307 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6308 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6309 MF->insert(It, loopMBB); 6310 MF->insert(It, exitMBB); 6311 6312 // Transfer the remainder of BB and its successor edges to exitMBB. 6313 exitMBB->splice(exitMBB->begin(), BB, 6314 llvm::next(MachineBasicBlock::iterator(MI)), 6315 BB->end()); 6316 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6317 6318 const TargetRegisterClass *TRC = isThumb2 ? 6319 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6320 (const TargetRegisterClass*)&ARM::GPRRegClass; 6321 unsigned scratch = MRI.createVirtualRegister(TRC); 6322 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 6323 6324 // thisMBB: 6325 // ... 6326 // fallthrough --> loopMBB 6327 BB->addSuccessor(loopMBB); 6328 6329 // loopMBB: 6330 // ldrex dest, ptr 6331 // <binop> scratch2, dest, incr 6332 // strex scratch, scratch2, ptr 6333 // cmp scratch, #0 6334 // bne- loopMBB 6335 // fallthrough --> exitMBB 6336 BB = loopMBB; 6337 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6338 if (ldrOpc == ARM::t2LDREX) 6339 MIB.addImm(0); 6340 AddDefaultPred(MIB); 6341 if (BinOpcode) { 6342 // operand order needs to go the other way for NAND 6343 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 6344 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 6345 addReg(incr).addReg(dest)).addReg(0); 6346 else 6347 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 6348 addReg(dest).addReg(incr)).addReg(0); 6349 } 6350 6351 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 6352 if (strOpc == ARM::t2STREX) 6353 MIB.addImm(0); 6354 AddDefaultPred(MIB); 6355 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6356 .addReg(scratch).addImm(0)); 6357 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6358 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6359 6360 BB->addSuccessor(loopMBB); 6361 BB->addSuccessor(exitMBB); 6362 6363 // exitMBB: 6364 // ... 6365 BB = exitMBB; 6366 6367 MI->eraseFromParent(); // The instruction is gone now. 6368 6369 return BB; 6370 } 6371 6372 MachineBasicBlock * 6373 ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 6374 MachineBasicBlock *BB, 6375 unsigned Size, 6376 bool signExtend, 6377 ARMCC::CondCodes Cond) const { 6378 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6379 6380 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6381 MachineFunction *MF = BB->getParent(); 6382 MachineFunction::iterator It = BB; 6383 ++It; 6384 6385 unsigned dest = MI->getOperand(0).getReg(); 6386 unsigned ptr = MI->getOperand(1).getReg(); 6387 unsigned incr = MI->getOperand(2).getReg(); 6388 unsigned oldval = dest; 6389 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); 6390 DebugLoc dl = MI->getDebugLoc(); 6391 bool isThumb2 = Subtarget->isThumb2(); 6392 6393 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6394 if (isThumb2) { 6395 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6396 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6397 MRI.constrainRegClass(incr, &ARM::rGPRRegClass); 6398 } 6399 6400 unsigned ldrOpc, strOpc, extendOpc; 6401 getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); 6402 switch (Size) { 6403 default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); 6404 case 1: 6405 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 6406 break; 6407 case 2: 6408 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 6409 break; 6410 case 4: 6411 extendOpc = 0; 6412 break; 6413 } 6414 6415 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6416 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6417 MF->insert(It, loopMBB); 6418 MF->insert(It, exitMBB); 6419 6420 // Transfer the remainder of BB and its successor edges to exitMBB. 6421 exitMBB->splice(exitMBB->begin(), BB, 6422 llvm::next(MachineBasicBlock::iterator(MI)), 6423 BB->end()); 6424 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6425 6426 const TargetRegisterClass *TRC = isThumb2 ? 6427 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6428 (const TargetRegisterClass*)&ARM::GPRRegClass; 6429 unsigned scratch = MRI.createVirtualRegister(TRC); 6430 unsigned scratch2 = MRI.createVirtualRegister(TRC); 6431 6432 // thisMBB: 6433 // ... 6434 // fallthrough --> loopMBB 6435 BB->addSuccessor(loopMBB); 6436 6437 // loopMBB: 6438 // ldrex dest, ptr 6439 // (sign extend dest, if required) 6440 // cmp dest, incr 6441 // cmov.cond scratch2, incr, dest 6442 // strex scratch, scratch2, ptr 6443 // cmp scratch, #0 6444 // bne- loopMBB 6445 // fallthrough --> exitMBB 6446 BB = loopMBB; 6447 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6448 if (ldrOpc == ARM::t2LDREX) 6449 MIB.addImm(0); 6450 AddDefaultPred(MIB); 6451 6452 // Sign extend the value, if necessary. 6453 if (signExtend && extendOpc) { 6454 oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass 6455 : &ARM::GPRnopcRegClass); 6456 if (!isThumb2) 6457 MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); 6458 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 6459 .addReg(dest) 6460 .addImm(0)); 6461 } 6462 6463 // Build compare and cmov instructions. 6464 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6465 .addReg(oldval).addReg(incr)); 6466 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 6467 .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); 6468 6469 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 6470 if (strOpc == ARM::t2STREX) 6471 MIB.addImm(0); 6472 AddDefaultPred(MIB); 6473 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6474 .addReg(scratch).addImm(0)); 6475 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6476 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6477 6478 BB->addSuccessor(loopMBB); 6479 BB->addSuccessor(exitMBB); 6480 6481 // exitMBB: 6482 // ... 6483 BB = exitMBB; 6484 6485 MI->eraseFromParent(); // The instruction is gone now. 6486 6487 return BB; 6488 } 6489 6490 MachineBasicBlock * 6491 ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 6492 unsigned Op1, unsigned Op2, 6493 bool NeedsCarry, bool IsCmpxchg, 6494 bool IsMinMax, ARMCC::CondCodes CC) const { 6495 // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. 6496 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6497 6498 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6499 MachineFunction *MF = BB->getParent(); 6500 MachineFunction::iterator It = BB; 6501 ++It; 6502 6503 bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); 6504 unsigned offset = (isStore ? -2 : 0); 6505 unsigned destlo = MI->getOperand(0).getReg(); 6506 unsigned desthi = MI->getOperand(1).getReg(); 6507 unsigned ptr = MI->getOperand(offset+2).getReg(); 6508 unsigned vallo = MI->getOperand(offset+3).getReg(); 6509 unsigned valhi = MI->getOperand(offset+4).getReg(); 6510 unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); 6511 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm()); 6512 DebugLoc dl = MI->getDebugLoc(); 6513 bool isThumb2 = Subtarget->isThumb2(); 6514 6515 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6516 if (isThumb2) { 6517 MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); 6518 MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); 6519 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6520 MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); 6521 MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); 6522 } 6523 6524 unsigned ldrOpc, strOpc; 6525 getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); 6526 6527 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6528 MachineBasicBlock *contBB = 0, *cont2BB = 0; 6529 if (IsCmpxchg || IsMinMax) 6530 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 6531 if (IsCmpxchg) 6532 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 6533 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6534 6535 MF->insert(It, loopMBB); 6536 if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); 6537 if (IsCmpxchg) MF->insert(It, cont2BB); 6538 MF->insert(It, exitMBB); 6539 6540 // Transfer the remainder of BB and its successor edges to exitMBB. 6541 exitMBB->splice(exitMBB->begin(), BB, 6542 llvm::next(MachineBasicBlock::iterator(MI)), 6543 BB->end()); 6544 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6545 6546 const TargetRegisterClass *TRC = isThumb2 ? 6547 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6548 (const TargetRegisterClass*)&ARM::GPRRegClass; 6549 unsigned storesuccess = MRI.createVirtualRegister(TRC); 6550 6551 // thisMBB: 6552 // ... 6553 // fallthrough --> loopMBB 6554 BB->addSuccessor(loopMBB); 6555 6556 // loopMBB: 6557 // ldrexd r2, r3, ptr 6558 // <binopa> r0, r2, incr 6559 // <binopb> r1, r3, incr 6560 // strexd storesuccess, r0, r1, ptr 6561 // cmp storesuccess, #0 6562 // bne- loopMBB 6563 // fallthrough --> exitMBB 6564 BB = loopMBB; 6565 6566 if (!isStore) { 6567 // Load 6568 if (isThumb2) { 6569 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 6570 .addReg(destlo, RegState::Define) 6571 .addReg(desthi, RegState::Define) 6572 .addReg(ptr)); 6573 } else { 6574 unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6575 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 6576 .addReg(GPRPair0, RegState::Define).addReg(ptr)); 6577 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 6578 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) 6579 .addReg(GPRPair0, 0, ARM::gsub_0); 6580 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) 6581 .addReg(GPRPair0, 0, ARM::gsub_1); 6582 } 6583 } 6584 6585 unsigned StoreLo, StoreHi; 6586 if (IsCmpxchg) { 6587 // Add early exit 6588 for (unsigned i = 0; i < 2; i++) { 6589 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 6590 ARM::CMPrr)) 6591 .addReg(i == 0 ? destlo : desthi) 6592 .addReg(i == 0 ? vallo : valhi)); 6593 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6594 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6595 BB->addSuccessor(exitMBB); 6596 BB->addSuccessor(i == 0 ? contBB : cont2BB); 6597 BB = (i == 0 ? contBB : cont2BB); 6598 } 6599 6600 // Copy to physregs for strexd 6601 StoreLo = MI->getOperand(5).getReg(); 6602 StoreHi = MI->getOperand(6).getReg(); 6603 } else if (Op1) { 6604 // Perform binary operation 6605 unsigned tmpRegLo = MRI.createVirtualRegister(TRC); 6606 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) 6607 .addReg(destlo).addReg(vallo)) 6608 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 6609 unsigned tmpRegHi = MRI.createVirtualRegister(TRC); 6610 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) 6611 .addReg(desthi).addReg(valhi)) 6612 .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); 6613 6614 StoreLo = tmpRegLo; 6615 StoreHi = tmpRegHi; 6616 } else { 6617 // Copy to physregs for strexd 6618 StoreLo = vallo; 6619 StoreHi = valhi; 6620 } 6621 if (IsMinMax) { 6622 // Compare and branch to exit block. 6623 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6624 .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); 6625 BB->addSuccessor(exitMBB); 6626 BB->addSuccessor(contBB); 6627 BB = contBB; 6628 StoreLo = vallo; 6629 StoreHi = valhi; 6630 } 6631 6632 // Store 6633 if (isThumb2) { 6634 MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); 6635 MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); 6636 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 6637 .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); 6638 } else { 6639 // Marshal a pair... 6640 unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6641 unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6642 unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6643 BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); 6644 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) 6645 .addReg(UndefPair) 6646 .addReg(StoreLo) 6647 .addImm(ARM::gsub_0); 6648 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) 6649 .addReg(r1) 6650 .addReg(StoreHi) 6651 .addImm(ARM::gsub_1); 6652 6653 // ...and store it 6654 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 6655 .addReg(StorePair).addReg(ptr)); 6656 } 6657 // Cmp+jump 6658 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6659 .addReg(storesuccess).addImm(0)); 6660 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6661 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6662 6663 BB->addSuccessor(loopMBB); 6664 BB->addSuccessor(exitMBB); 6665 6666 // exitMBB: 6667 // ... 6668 BB = exitMBB; 6669 6670 MI->eraseFromParent(); // The instruction is gone now. 6671 6672 return BB; 6673 } 6674 6675 MachineBasicBlock * 6676 ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { 6677 6678 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6679 6680 unsigned destlo = MI->getOperand(0).getReg(); 6681 unsigned desthi = MI->getOperand(1).getReg(); 6682 unsigned ptr = MI->getOperand(2).getReg(); 6683 AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); 6684 DebugLoc dl = MI->getDebugLoc(); 6685 bool isThumb2 = Subtarget->isThumb2(); 6686 6687 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6688 if (isThumb2) { 6689 MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); 6690 MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); 6691 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6692 } 6693 unsigned ldrOpc, strOpc; 6694 getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); 6695 6696 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); 6697 6698 if (isThumb2) { 6699 MIB.addReg(destlo, RegState::Define) 6700 .addReg(desthi, RegState::Define) 6701 .addReg(ptr); 6702 6703 } else { 6704 unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6705 MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); 6706 6707 // Copy GPRPair0 into dest. (This copy will normally be coalesced.) 6708 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) 6709 .addReg(GPRPair0, 0, ARM::gsub_0); 6710 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) 6711 .addReg(GPRPair0, 0, ARM::gsub_1); 6712 } 6713 AddDefaultPred(MIB); 6714 6715 MI->eraseFromParent(); // The instruction is gone now. 6716 6717 return BB; 6718 } 6719 6720 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 6721 /// registers the function context. 6722 void ARMTargetLowering:: 6723 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 6724 MachineBasicBlock *DispatchBB, int FI) const { 6725 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6726 DebugLoc dl = MI->getDebugLoc(); 6727 MachineFunction *MF = MBB->getParent(); 6728 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6729 MachineConstantPool *MCP = MF->getConstantPool(); 6730 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6731 const Function *F = MF->getFunction(); 6732 6733 bool isThumb = Subtarget->isThumb(); 6734 bool isThumb2 = Subtarget->isThumb2(); 6735 6736 unsigned PCLabelId = AFI->createPICLabelUId(); 6737 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 6738 ARMConstantPoolValue *CPV = 6739 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 6740 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 6741 6742 const TargetRegisterClass *TRC = isThumb ? 6743 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6744 (const TargetRegisterClass*)&ARM::GPRRegClass; 6745 6746 // Grab constant pool and fixed stack memory operands. 6747 MachineMemOperand *CPMMO = 6748 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 6749 MachineMemOperand::MOLoad, 4, 4); 6750 6751 MachineMemOperand *FIMMOSt = 6752 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6753 MachineMemOperand::MOStore, 4, 4); 6754 6755 // Load the address of the dispatch MBB into the jump buffer. 6756 if (isThumb2) { 6757 // Incoming value: jbuf 6758 // ldr.n r5, LCPI1_1 6759 // orr r5, r5, #1 6760 // add r5, pc 6761 // str r5, [$jbuf, #+4] ; &jbuf[1] 6762 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6763 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 6764 .addConstantPoolIndex(CPI) 6765 .addMemOperand(CPMMO)); 6766 // Set the low bit because of thumb mode. 6767 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6768 AddDefaultCC( 6769 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 6770 .addReg(NewVReg1, RegState::Kill) 6771 .addImm(0x01))); 6772 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6773 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 6774 .addReg(NewVReg2, RegState::Kill) 6775 .addImm(PCLabelId); 6776 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 6777 .addReg(NewVReg3, RegState::Kill) 6778 .addFrameIndex(FI) 6779 .addImm(36) // &jbuf[1] :: pc 6780 .addMemOperand(FIMMOSt)); 6781 } else if (isThumb) { 6782 // Incoming value: jbuf 6783 // ldr.n r1, LCPI1_4 6784 // add r1, pc 6785 // mov r2, #1 6786 // orrs r1, r2 6787 // add r2, $jbuf, #+4 ; &jbuf[1] 6788 // str r1, [r2] 6789 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6790 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 6791 .addConstantPoolIndex(CPI) 6792 .addMemOperand(CPMMO)); 6793 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6794 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 6795 .addReg(NewVReg1, RegState::Kill) 6796 .addImm(PCLabelId); 6797 // Set the low bit because of thumb mode. 6798 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6799 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 6800 .addReg(ARM::CPSR, RegState::Define) 6801 .addImm(1)); 6802 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6803 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 6804 .addReg(ARM::CPSR, RegState::Define) 6805 .addReg(NewVReg2, RegState::Kill) 6806 .addReg(NewVReg3, RegState::Kill)); 6807 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6808 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 6809 .addFrameIndex(FI) 6810 .addImm(36)); // &jbuf[1] :: pc 6811 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 6812 .addReg(NewVReg4, RegState::Kill) 6813 .addReg(NewVReg5, RegState::Kill) 6814 .addImm(0) 6815 .addMemOperand(FIMMOSt)); 6816 } else { 6817 // Incoming value: jbuf 6818 // ldr r1, LCPI1_1 6819 // add r1, pc, r1 6820 // str r1, [$jbuf, #+4] ; &jbuf[1] 6821 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6822 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 6823 .addConstantPoolIndex(CPI) 6824 .addImm(0) 6825 .addMemOperand(CPMMO)); 6826 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6827 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 6828 .addReg(NewVReg1, RegState::Kill) 6829 .addImm(PCLabelId)); 6830 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 6831 .addReg(NewVReg2, RegState::Kill) 6832 .addFrameIndex(FI) 6833 .addImm(36) // &jbuf[1] :: pc 6834 .addMemOperand(FIMMOSt)); 6835 } 6836 } 6837 6838 MachineBasicBlock *ARMTargetLowering:: 6839 EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 6840 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6841 DebugLoc dl = MI->getDebugLoc(); 6842 MachineFunction *MF = MBB->getParent(); 6843 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6844 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6845 MachineFrameInfo *MFI = MF->getFrameInfo(); 6846 int FI = MFI->getFunctionContextIndex(); 6847 6848 const TargetRegisterClass *TRC = Subtarget->isThumb() ? 6849 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6850 (const TargetRegisterClass*)&ARM::GPRnopcRegClass; 6851 6852 // Get a mapping of the call site numbers to all of the landing pads they're 6853 // associated with. 6854 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 6855 unsigned MaxCSNum = 0; 6856 MachineModuleInfo &MMI = MF->getMMI(); 6857 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; 6858 ++BB) { 6859 if (!BB->isLandingPad()) continue; 6860 6861 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 6862 // pad. 6863 for (MachineBasicBlock::iterator 6864 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 6865 if (!II->isEHLabel()) continue; 6866 6867 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 6868 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 6869 6870 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 6871 for (SmallVectorImpl<unsigned>::iterator 6872 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 6873 CSI != CSE; ++CSI) { 6874 CallSiteNumToLPad[*CSI].push_back(BB); 6875 MaxCSNum = std::max(MaxCSNum, *CSI); 6876 } 6877 break; 6878 } 6879 } 6880 6881 // Get an ordered list of the machine basic blocks for the jump table. 6882 std::vector<MachineBasicBlock*> LPadList; 6883 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 6884 LPadList.reserve(CallSiteNumToLPad.size()); 6885 for (unsigned I = 1; I <= MaxCSNum; ++I) { 6886 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 6887 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6888 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 6889 LPadList.push_back(*II); 6890 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 6891 } 6892 } 6893 6894 assert(!LPadList.empty() && 6895 "No landing pad destinations for the dispatch jump table!"); 6896 6897 // Create the jump table and associated information. 6898 MachineJumpTableInfo *JTI = 6899 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 6900 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 6901 unsigned UId = AFI->createJumpTableUId(); 6902 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 6903 6904 // Create the MBBs for the dispatch code. 6905 6906 // Shove the dispatch's address into the return slot in the function context. 6907 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 6908 DispatchBB->setIsLandingPad(); 6909 6910 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 6911 unsigned trap_opcode; 6912 if (Subtarget->isThumb()) 6913 trap_opcode = ARM::tTRAP; 6914 else 6915 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; 6916 6917 BuildMI(TrapBB, dl, TII->get(trap_opcode)); 6918 DispatchBB->addSuccessor(TrapBB); 6919 6920 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 6921 DispatchBB->addSuccessor(DispContBB); 6922 6923 // Insert and MBBs. 6924 MF->insert(MF->end(), DispatchBB); 6925 MF->insert(MF->end(), DispContBB); 6926 MF->insert(MF->end(), TrapBB); 6927 6928 // Insert code into the entry block that creates and registers the function 6929 // context. 6930 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 6931 6932 MachineMemOperand *FIMMOLd = 6933 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6934 MachineMemOperand::MOLoad | 6935 MachineMemOperand::MOVolatile, 4, 4); 6936 6937 MachineInstrBuilder MIB; 6938 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 6939 6940 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6941 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6942 6943 // Add a register mask with no preserved registers. This results in all 6944 // registers being marked as clobbered. 6945 MIB.addRegMask(RI.getNoPreservedMask()); 6946 6947 unsigned NumLPads = LPadList.size(); 6948 if (Subtarget->isThumb2()) { 6949 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6950 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 6951 .addFrameIndex(FI) 6952 .addImm(4) 6953 .addMemOperand(FIMMOLd)); 6954 6955 if (NumLPads < 256) { 6956 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 6957 .addReg(NewVReg1) 6958 .addImm(LPadList.size())); 6959 } else { 6960 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6961 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 6962 .addImm(NumLPads & 0xFFFF)); 6963 6964 unsigned VReg2 = VReg1; 6965 if ((NumLPads & 0xFFFF0000) != 0) { 6966 VReg2 = MRI->createVirtualRegister(TRC); 6967 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 6968 .addReg(VReg1) 6969 .addImm(NumLPads >> 16)); 6970 } 6971 6972 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 6973 .addReg(NewVReg1) 6974 .addReg(VReg2)); 6975 } 6976 6977 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 6978 .addMBB(TrapBB) 6979 .addImm(ARMCC::HI) 6980 .addReg(ARM::CPSR); 6981 6982 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6983 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 6984 .addJumpTableIndex(MJTI) 6985 .addImm(UId)); 6986 6987 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6988 AddDefaultCC( 6989 AddDefaultPred( 6990 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 6991 .addReg(NewVReg3, RegState::Kill) 6992 .addReg(NewVReg1) 6993 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6994 6995 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 6996 .addReg(NewVReg4, RegState::Kill) 6997 .addReg(NewVReg1) 6998 .addJumpTableIndex(MJTI) 6999 .addImm(UId); 7000 } else if (Subtarget->isThumb()) { 7001 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 7002 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 7003 .addFrameIndex(FI) 7004 .addImm(1) 7005 .addMemOperand(FIMMOLd)); 7006 7007 if (NumLPads < 256) { 7008 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 7009 .addReg(NewVReg1) 7010 .addImm(NumLPads)); 7011 } else { 7012 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7013 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7014 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 7015 7016 // MachineConstantPool wants an explicit alignment. 7017 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 7018 if (Align == 0) 7019 Align = getDataLayout()->getTypeAllocSize(C->getType()); 7020 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7021 7022 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7023 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 7024 .addReg(VReg1, RegState::Define) 7025 .addConstantPoolIndex(Idx)); 7026 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 7027 .addReg(NewVReg1) 7028 .addReg(VReg1)); 7029 } 7030 7031 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 7032 .addMBB(TrapBB) 7033 .addImm(ARMCC::HI) 7034 .addReg(ARM::CPSR); 7035 7036 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 7037 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 7038 .addReg(ARM::CPSR, RegState::Define) 7039 .addReg(NewVReg1) 7040 .addImm(2)); 7041 7042 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 7043 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 7044 .addJumpTableIndex(MJTI) 7045 .addImm(UId)); 7046 7047 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 7048 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 7049 .addReg(ARM::CPSR, RegState::Define) 7050 .addReg(NewVReg2, RegState::Kill) 7051 .addReg(NewVReg3)); 7052 7053 MachineMemOperand *JTMMOLd = 7054 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 7055 MachineMemOperand::MOLoad, 4, 4); 7056 7057 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 7058 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 7059 .addReg(NewVReg4, RegState::Kill) 7060 .addImm(0) 7061 .addMemOperand(JTMMOLd)); 7062 7063 unsigned NewVReg6 = NewVReg5; 7064 if (RelocM == Reloc::PIC_) { 7065 NewVReg6 = MRI->createVirtualRegister(TRC); 7066 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 7067 .addReg(ARM::CPSR, RegState::Define) 7068 .addReg(NewVReg5, RegState::Kill) 7069 .addReg(NewVReg3)); 7070 } 7071 7072 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 7073 .addReg(NewVReg6, RegState::Kill) 7074 .addJumpTableIndex(MJTI) 7075 .addImm(UId); 7076 } else { 7077 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 7078 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 7079 .addFrameIndex(FI) 7080 .addImm(4) 7081 .addMemOperand(FIMMOLd)); 7082 7083 if (NumLPads < 256) { 7084 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 7085 .addReg(NewVReg1) 7086 .addImm(NumLPads)); 7087 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 7088 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7089 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 7090 .addImm(NumLPads & 0xFFFF)); 7091 7092 unsigned VReg2 = VReg1; 7093 if ((NumLPads & 0xFFFF0000) != 0) { 7094 VReg2 = MRI->createVirtualRegister(TRC); 7095 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 7096 .addReg(VReg1) 7097 .addImm(NumLPads >> 16)); 7098 } 7099 7100 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 7101 .addReg(NewVReg1) 7102 .addReg(VReg2)); 7103 } else { 7104 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7105 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7106 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 7107 7108 // MachineConstantPool wants an explicit alignment. 7109 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 7110 if (Align == 0) 7111 Align = getDataLayout()->getTypeAllocSize(C->getType()); 7112 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7113 7114 unsigned VReg1 = MRI->createVirtualRegister(TRC); 7115 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 7116 .addReg(VReg1, RegState::Define) 7117 .addConstantPoolIndex(Idx) 7118 .addImm(0)); 7119 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 7120 .addReg(NewVReg1) 7121 .addReg(VReg1, RegState::Kill)); 7122 } 7123 7124 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 7125 .addMBB(TrapBB) 7126 .addImm(ARMCC::HI) 7127 .addReg(ARM::CPSR); 7128 7129 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 7130 AddDefaultCC( 7131 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 7132 .addReg(NewVReg1) 7133 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 7134 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 7135 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 7136 .addJumpTableIndex(MJTI) 7137 .addImm(UId)); 7138 7139 MachineMemOperand *JTMMOLd = 7140 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 7141 MachineMemOperand::MOLoad, 4, 4); 7142 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 7143 AddDefaultPred( 7144 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 7145 .addReg(NewVReg3, RegState::Kill) 7146 .addReg(NewVReg4) 7147 .addImm(0) 7148 .addMemOperand(JTMMOLd)); 7149 7150 if (RelocM == Reloc::PIC_) { 7151 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 7152 .addReg(NewVReg5, RegState::Kill) 7153 .addReg(NewVReg4) 7154 .addJumpTableIndex(MJTI) 7155 .addImm(UId); 7156 } else { 7157 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) 7158 .addReg(NewVReg5, RegState::Kill) 7159 .addJumpTableIndex(MJTI) 7160 .addImm(UId); 7161 } 7162 } 7163 7164 // Add the jump table entries as successors to the MBB. 7165 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; 7166 for (std::vector<MachineBasicBlock*>::iterator 7167 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 7168 MachineBasicBlock *CurMBB = *I; 7169 if (SeenMBBs.insert(CurMBB)) 7170 DispContBB->addSuccessor(CurMBB); 7171 } 7172 7173 // N.B. the order the invoke BBs are processed in doesn't matter here. 7174 const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); 7175 SmallVector<MachineBasicBlock*, 64> MBBLPads; 7176 for (SmallPtrSet<MachineBasicBlock*, 64>::iterator 7177 I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { 7178 MachineBasicBlock *BB = *I; 7179 7180 // Remove the landing pad successor from the invoke block and replace it 7181 // with the new dispatch block. 7182 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 7183 BB->succ_end()); 7184 while (!Successors.empty()) { 7185 MachineBasicBlock *SMBB = Successors.pop_back_val(); 7186 if (SMBB->isLandingPad()) { 7187 BB->removeSuccessor(SMBB); 7188 MBBLPads.push_back(SMBB); 7189 } 7190 } 7191 7192 BB->addSuccessor(DispatchBB); 7193 7194 // Find the invoke call and mark all of the callee-saved registers as 7195 // 'implicit defined' so that they're spilled. This prevents code from 7196 // moving instructions to before the EH block, where they will never be 7197 // executed. 7198 for (MachineBasicBlock::reverse_iterator 7199 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 7200 if (!II->isCall()) continue; 7201 7202 DenseMap<unsigned, bool> DefRegs; 7203 for (MachineInstr::mop_iterator 7204 OI = II->operands_begin(), OE = II->operands_end(); 7205 OI != OE; ++OI) { 7206 if (!OI->isReg()) continue; 7207 DefRegs[OI->getReg()] = true; 7208 } 7209 7210 MachineInstrBuilder MIB(*MF, &*II); 7211 7212 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 7213 unsigned Reg = SavedRegs[i]; 7214 if (Subtarget->isThumb2() && 7215 !ARM::tGPRRegClass.contains(Reg) && 7216 !ARM::hGPRRegClass.contains(Reg)) 7217 continue; 7218 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) 7219 continue; 7220 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) 7221 continue; 7222 if (!DefRegs[Reg]) 7223 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 7224 } 7225 7226 break; 7227 } 7228 } 7229 7230 // Mark all former landing pads as non-landing pads. The dispatch is the only 7231 // landing pad now. 7232 for (SmallVectorImpl<MachineBasicBlock*>::iterator 7233 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 7234 (*I)->setIsLandingPad(false); 7235 7236 // The instruction is gone now. 7237 MI->eraseFromParent(); 7238 7239 return MBB; 7240 } 7241 7242 static 7243 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 7244 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 7245 E = MBB->succ_end(); I != E; ++I) 7246 if (*I != Succ) 7247 return *I; 7248 llvm_unreachable("Expecting a BB with two successors!"); 7249 } 7250 7251 MachineBasicBlock * 7252 ARMTargetLowering::EmitStructByval(MachineInstr *MI, 7253 MachineBasicBlock *BB) const { 7254 // This pseudo instruction has 3 operands: dst, src, size 7255 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). 7256 // Otherwise, we will generate unrolled scalar copies. 7257 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7258 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7259 MachineFunction::iterator It = BB; 7260 ++It; 7261 7262 unsigned dest = MI->getOperand(0).getReg(); 7263 unsigned src = MI->getOperand(1).getReg(); 7264 unsigned SizeVal = MI->getOperand(2).getImm(); 7265 unsigned Align = MI->getOperand(3).getImm(); 7266 DebugLoc dl = MI->getDebugLoc(); 7267 7268 MachineFunction *MF = BB->getParent(); 7269 MachineRegisterInfo &MRI = MF->getRegInfo(); 7270 unsigned UnitSize = 0; 7271 unsigned UnitLdOpc = 0; 7272 unsigned UnitStOpc = 0; 7273 const TargetRegisterClass *TRC = 0; 7274 const TargetRegisterClass *VecTRC = 0; 7275 7276 bool IsThumb1 = Subtarget->isThumb1Only(); 7277 bool IsThumb2 = Subtarget->isThumb2(); 7278 7279 if (Align & 1) { 7280 UnitSize = 1; 7281 } else if (Align & 2) { 7282 UnitSize = 2; 7283 } else { 7284 // Check whether we can use NEON instructions. 7285 if (!MF->getFunction()->getAttributes(). 7286 hasAttribute(AttributeSet::FunctionIndex, 7287 Attribute::NoImplicitFloat) && 7288 Subtarget->hasNEON()) { 7289 if ((Align % 16 == 0) && SizeVal >= 16) 7290 UnitSize = 16; 7291 else if ((Align % 8 == 0) && SizeVal >= 8) 7292 UnitSize = 8; 7293 } 7294 // Can't use NEON instructions. 7295 if (UnitSize == 0) 7296 UnitSize = 4; 7297 } 7298 7299 // Select the correct opcode and register class for unit size load/store 7300 bool IsNeon = UnitSize >= 8; 7301 TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass 7302 : (const TargetRegisterClass *)&ARM::GPRRegClass; 7303 if (IsNeon) { 7304 UnitLdOpc = UnitSize == 16 ? ARM::VLD1q32wb_fixed 7305 : UnitSize == 8 ? ARM::VLD1d32wb_fixed : 0; 7306 UnitStOpc = UnitSize == 16 ? ARM::VST1q32wb_fixed 7307 : UnitSize == 8 ? ARM::VST1d32wb_fixed : 0; 7308 VecTRC = UnitSize == 16 7309 ? (const TargetRegisterClass *)&ARM::DPairRegClass 7310 : UnitSize == 8 7311 ? (const TargetRegisterClass *)&ARM::DPRRegClass 7312 : 0; 7313 } else if (IsThumb1) { 7314 UnitLdOpc = UnitSize == 4 ? ARM::tLDRi 7315 : UnitSize == 2 ? ARM::tLDRHi 7316 : UnitSize == 1 ? ARM::tLDRBi : 0; 7317 UnitStOpc = UnitSize == 4 ? ARM::tSTRi 7318 : UnitSize == 2 ? ARM::tSTRHi 7319 : UnitSize == 1 ? ARM::tSTRBi : 0; 7320 } else if (IsThumb2) { 7321 UnitLdOpc = UnitSize == 4 7322 ? ARM::t2LDR_POST 7323 : UnitSize == 2 ? ARM::t2LDRH_POST 7324 : UnitSize == 1 ? ARM::t2LDRB_POST : 0; 7325 UnitStOpc = UnitSize == 4 7326 ? ARM::t2STR_POST 7327 : UnitSize == 2 ? ARM::t2STRH_POST 7328 : UnitSize == 1 ? ARM::t2STRB_POST : 0; 7329 } else { 7330 UnitLdOpc = UnitSize == 4 7331 ? ARM::LDR_POST_IMM 7332 : UnitSize == 2 ? ARM::LDRH_POST 7333 : UnitSize == 1 ? ARM::LDRB_POST_IMM : 0; 7334 UnitStOpc = UnitSize == 4 7335 ? ARM::STR_POST_IMM 7336 : UnitSize == 2 ? ARM::STRH_POST 7337 : UnitSize == 1 ? ARM::STRB_POST_IMM : 0; 7338 } 7339 assert(UnitLdOpc != 0 && UnitStOpc != 0 && "Should have unit opcodes"); 7340 7341 unsigned BytesLeft = SizeVal % UnitSize; 7342 unsigned LoopSize = SizeVal - BytesLeft; 7343 7344 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { 7345 // Use LDR and STR to copy. 7346 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) 7347 // [destOut] = STR_POST(scratch, destIn, UnitSize) 7348 unsigned srcIn = src; 7349 unsigned destIn = dest; 7350 for (unsigned i = 0; i < LoopSize; i+=UnitSize) { 7351 unsigned srcOut = MRI.createVirtualRegister(TRC); 7352 unsigned destOut = MRI.createVirtualRegister(TRC); 7353 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 7354 if (IsNeon) { 7355 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch) 7356 .addReg(srcOut, RegState::Define).addReg(srcIn) 7357 .addImm(0)); 7358 7359 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc), destOut) 7360 .addReg(destIn).addImm(0).addReg(scratch)); 7361 } else if (IsThumb1) { 7362 // load + update srcIn 7363 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch) 7364 .addReg(srcIn).addImm(0)); 7365 MachineInstrBuilder MIB = 7366 BuildMI(*BB, MI, dl, TII->get(ARM::tADDi8), srcOut); 7367 MIB = AddDefaultT1CC(MIB); 7368 MIB.addReg(srcIn).addImm(UnitSize); 7369 AddDefaultPred(MIB); 7370 7371 // store + update destIn 7372 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc)).addReg(scratch) 7373 .addReg(destIn).addImm(0)); 7374 MIB = BuildMI(*BB, MI, dl, TII->get(ARM::tADDi8), destOut); 7375 MIB = AddDefaultT1CC(MIB); 7376 MIB.addReg(destIn).addImm(UnitSize); 7377 AddDefaultPred(MIB); 7378 } else if (IsThumb2) { 7379 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch) 7380 .addReg(srcOut, RegState::Define).addReg(srcIn) 7381 .addImm(UnitSize)); 7382 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc), destOut) 7383 .addReg(scratch).addReg(destIn).addImm(UnitSize)); 7384 } else { // arm 7385 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch) 7386 .addReg(srcOut, RegState::Define).addReg(srcIn) 7387 .addReg(0).addImm(UnitSize)); 7388 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc), destOut) 7389 .addReg(scratch).addReg(destIn).addReg(0) 7390 .addImm(UnitSize)); 7391 } 7392 srcIn = srcOut; 7393 destIn = destOut; 7394 } 7395 7396 // Handle the leftover bytes with LDRB and STRB. 7397 // [scratch, srcOut] = LDRB_POST(srcIn, 1) 7398 // [destOut] = STRB_POST(scratch, destIn, 1) 7399 for (unsigned i = 0; i < BytesLeft; i++) { 7400 unsigned srcOut = MRI.createVirtualRegister(TRC); 7401 unsigned destOut = MRI.createVirtualRegister(TRC); 7402 unsigned scratch = MRI.createVirtualRegister(TRC); 7403 if (IsThumb1) { 7404 // load into scratch 7405 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRBi), scratch) 7406 .addReg(srcIn).addImm(0)); 7407 7408 // update base pointer 7409 MachineInstrBuilder MIB = 7410 BuildMI(*BB, MI, dl, TII->get(ARM::tADDi8), srcOut); 7411 MIB = AddDefaultT1CC(MIB); 7412 MIB.addReg(srcIn).addImm(1); 7413 AddDefaultPred(MIB); 7414 7415 // store 7416 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tSTRBi)) 7417 .addReg(scratch).addReg(destIn).addImm(0)); 7418 7419 // update base pointer 7420 MIB = BuildMI(*BB, MI, dl, TII->get(ARM::tADDi8), destOut); 7421 MIB = AddDefaultT1CC(MIB); 7422 MIB.addReg(destIn).addImm(1); 7423 AddDefaultPred(MIB); 7424 } else if (IsThumb2) { 7425 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::t2LDRB_POST), scratch) 7426 .addReg(srcOut, RegState::Define).addReg(srcIn) 7427 .addImm(1)); 7428 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::t2STRB_POST), destOut) 7429 .addReg(scratch).addReg(destIn).addImm(1)); 7430 } else { // arm 7431 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRB_POST_IMM), 7432 scratch).addReg(srcOut, RegState::Define) 7433 .addReg(srcIn).addReg(0).addImm(1)); 7434 AddDefaultPred( 7435 BuildMI(*BB, MI, dl, TII->get(ARM::STRB_POST_IMM), destOut) 7436 .addReg(scratch).addReg(destIn).addReg(0).addImm(1)); 7437 } 7438 srcIn = srcOut; 7439 destIn = destOut; 7440 } 7441 MI->eraseFromParent(); // The instruction is gone now. 7442 return BB; 7443 } 7444 7445 // Expand the pseudo op to a loop. 7446 // thisMBB: 7447 // ... 7448 // movw varEnd, # --> with thumb2 7449 // movt varEnd, # 7450 // ldrcp varEnd, idx --> without thumb2 7451 // fallthrough --> loopMBB 7452 // loopMBB: 7453 // PHI varPhi, varEnd, varLoop 7454 // PHI srcPhi, src, srcLoop 7455 // PHI destPhi, dst, destLoop 7456 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7457 // [destLoop] = STR_POST(scratch, destPhi, UnitSize) 7458 // subs varLoop, varPhi, #UnitSize 7459 // bne loopMBB 7460 // fallthrough --> exitMBB 7461 // exitMBB: 7462 // epilogue to handle left-over bytes 7463 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7464 // [destOut] = STRB_POST(scratch, destLoop, 1) 7465 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7466 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7467 MF->insert(It, loopMBB); 7468 MF->insert(It, exitMBB); 7469 7470 // Transfer the remainder of BB and its successor edges to exitMBB. 7471 exitMBB->splice(exitMBB->begin(), BB, 7472 llvm::next(MachineBasicBlock::iterator(MI)), 7473 BB->end()); 7474 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7475 7476 // Load an immediate to varEnd. 7477 unsigned varEnd = MRI.createVirtualRegister(TRC); 7478 if (IsThumb2) { 7479 unsigned Vtmp = varEnd; 7480 if ((LoopSize & 0xFFFF0000) != 0) 7481 Vtmp = MRI.createVirtualRegister(TRC); 7482 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) 7483 .addImm(LoopSize & 0xFFFF)); 7484 7485 if ((LoopSize & 0xFFFF0000) != 0) 7486 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) 7487 .addReg(Vtmp).addImm(LoopSize >> 16)); 7488 } else { 7489 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7490 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7491 const Constant *C = ConstantInt::get(Int32Ty, LoopSize); 7492 7493 // MachineConstantPool wants an explicit alignment. 7494 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 7495 if (Align == 0) 7496 Align = getDataLayout()->getTypeAllocSize(C->getType()); 7497 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7498 7499 if (IsThumb1) 7500 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( 7501 varEnd, RegState::Define).addConstantPoolIndex(Idx)); 7502 else 7503 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( 7504 varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); 7505 } 7506 BB->addSuccessor(loopMBB); 7507 7508 // Generate the loop body: 7509 // varPhi = PHI(varLoop, varEnd) 7510 // srcPhi = PHI(srcLoop, src) 7511 // destPhi = PHI(destLoop, dst) 7512 MachineBasicBlock *entryBB = BB; 7513 BB = loopMBB; 7514 unsigned varLoop = MRI.createVirtualRegister(TRC); 7515 unsigned varPhi = MRI.createVirtualRegister(TRC); 7516 unsigned srcLoop = MRI.createVirtualRegister(TRC); 7517 unsigned srcPhi = MRI.createVirtualRegister(TRC); 7518 unsigned destLoop = MRI.createVirtualRegister(TRC); 7519 unsigned destPhi = MRI.createVirtualRegister(TRC); 7520 7521 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) 7522 .addReg(varLoop).addMBB(loopMBB) 7523 .addReg(varEnd).addMBB(entryBB); 7524 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) 7525 .addReg(srcLoop).addMBB(loopMBB) 7526 .addReg(src).addMBB(entryBB); 7527 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) 7528 .addReg(destLoop).addMBB(loopMBB) 7529 .addReg(dest).addMBB(entryBB); 7530 7531 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7532 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) 7533 unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); 7534 if (IsNeon) { 7535 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitLdOpc), scratch) 7536 .addReg(srcLoop, RegState::Define).addReg(srcPhi) 7537 .addImm(0)); 7538 7539 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitStOpc), destLoop) 7540 .addReg(destPhi).addImm(0).addReg(scratch)); 7541 } else if (IsThumb1) { 7542 // load + update srcIn 7543 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitLdOpc), scratch) 7544 .addReg(srcPhi).addImm(0)); 7545 MachineInstrBuilder MIB = 7546 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tADDi8), srcLoop); 7547 MIB = AddDefaultT1CC(MIB); 7548 MIB.addReg(srcPhi).addImm(UnitSize); 7549 AddDefaultPred(MIB); 7550 7551 // store + update destIn 7552 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitStOpc)) 7553 .addReg(scratch).addReg(destPhi).addImm(0)); 7554 MIB = BuildMI(*BB, BB->end(), dl, TII->get(ARM::tADDi8), destLoop); 7555 MIB = AddDefaultT1CC(MIB); 7556 MIB.addReg(destPhi).addImm(UnitSize); 7557 AddDefaultPred(MIB); 7558 } else if (IsThumb2) { 7559 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitLdOpc), scratch) 7560 .addReg(srcLoop, RegState::Define).addReg(srcPhi) 7561 .addImm(UnitSize)); 7562 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitStOpc), destLoop) 7563 .addReg(scratch).addReg(destPhi).addImm(UnitSize)); 7564 } else { // arm 7565 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitLdOpc), scratch) 7566 .addReg(srcLoop, RegState::Define).addReg(srcPhi) 7567 .addReg(0).addImm(UnitSize)); 7568 AddDefaultPred(BuildMI(*BB, BB->end(), dl, TII->get(UnitStOpc), destLoop) 7569 .addReg(scratch).addReg(destPhi).addReg(0) 7570 .addImm(UnitSize)); 7571 } 7572 7573 // Decrement loop variable by UnitSize. 7574 if (IsThumb1) { 7575 MachineInstrBuilder MIB = 7576 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); 7577 MIB = AddDefaultT1CC(MIB); 7578 MIB.addReg(varPhi).addImm(UnitSize); 7579 AddDefaultPred(MIB); 7580 } else { 7581 MachineInstrBuilder MIB = 7582 BuildMI(*BB, BB->end(), dl, 7583 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); 7584 AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); 7585 MIB->getOperand(5).setReg(ARM::CPSR); 7586 MIB->getOperand(5).setIsDef(true); 7587 } 7588 BuildMI(*BB, BB->end(), dl, 7589 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7590 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 7591 7592 // loopMBB can loop back to loopMBB or fall through to exitMBB. 7593 BB->addSuccessor(loopMBB); 7594 BB->addSuccessor(exitMBB); 7595 7596 // Add epilogue to handle BytesLeft. 7597 BB = exitMBB; 7598 MachineInstr *StartOfExit = exitMBB->begin(); 7599 7600 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7601 // [destOut] = STRB_POST(scratch, destLoop, 1) 7602 unsigned srcIn = srcLoop; 7603 unsigned destIn = destLoop; 7604 for (unsigned i = 0; i < BytesLeft; i++) { 7605 unsigned srcOut = MRI.createVirtualRegister(TRC); 7606 unsigned destOut = MRI.createVirtualRegister(TRC); 7607 unsigned scratch = MRI.createVirtualRegister(TRC); 7608 if (IsThumb1) { 7609 // load into scratch 7610 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(ARM::tLDRBi), 7611 scratch).addReg(srcIn).addImm(0)); 7612 7613 // update base pointer 7614 MachineInstrBuilder MIB = 7615 BuildMI(*BB, StartOfExit, dl, TII->get(ARM::tADDi8), srcOut); 7616 MIB = AddDefaultT1CC(MIB); 7617 MIB.addReg(srcIn).addImm(1); 7618 AddDefaultPred(MIB); 7619 7620 // store 7621 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(ARM::tSTRBi)) 7622 .addReg(scratch).addReg(destIn).addImm(0)); 7623 7624 // update base pointer 7625 MIB = BuildMI(*BB, StartOfExit, dl, TII->get(ARM::tADDi8), destOut); 7626 MIB = AddDefaultT1CC(MIB); 7627 MIB.addReg(destIn).addImm(1); 7628 AddDefaultPred(MIB); 7629 } else if (IsThumb2) { 7630 AddDefaultPred( 7631 BuildMI(*BB, StartOfExit, dl, TII->get(ARM::t2LDRB_POST), scratch) 7632 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 7633 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(ARM::t2STRB_POST), 7634 destOut).addReg(scratch).addReg(destIn).addImm(1)); 7635 } else { // arm 7636 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(ARM::LDRB_POST_IMM), 7637 scratch).addReg(srcOut, RegState::Define) 7638 .addReg(srcIn).addReg(0).addImm(1)); 7639 AddDefaultPred( 7640 BuildMI(*BB, StartOfExit, dl, TII->get(ARM::STRB_POST_IMM), destOut) 7641 .addReg(scratch).addReg(destIn).addReg(0).addImm(1)); 7642 } 7643 srcIn = srcOut; 7644 destIn = destOut; 7645 } 7646 7647 MI->eraseFromParent(); // The instruction is gone now. 7648 return BB; 7649 } 7650 7651 MachineBasicBlock * 7652 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7653 MachineBasicBlock *BB) const { 7654 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7655 DebugLoc dl = MI->getDebugLoc(); 7656 bool isThumb2 = Subtarget->isThumb2(); 7657 switch (MI->getOpcode()) { 7658 default: { 7659 MI->dump(); 7660 llvm_unreachable("Unexpected instr type to insert"); 7661 } 7662 // The Thumb2 pre-indexed stores have the same MI operands, they just 7663 // define them differently in the .td files from the isel patterns, so 7664 // they need pseudos. 7665 case ARM::t2STR_preidx: 7666 MI->setDesc(TII->get(ARM::t2STR_PRE)); 7667 return BB; 7668 case ARM::t2STRB_preidx: 7669 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 7670 return BB; 7671 case ARM::t2STRH_preidx: 7672 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 7673 return BB; 7674 7675 case ARM::STRi_preidx: 7676 case ARM::STRBi_preidx: { 7677 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 7678 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 7679 // Decode the offset. 7680 unsigned Offset = MI->getOperand(4).getImm(); 7681 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 7682 Offset = ARM_AM::getAM2Offset(Offset); 7683 if (isSub) 7684 Offset = -Offset; 7685 7686 MachineMemOperand *MMO = *MI->memoperands_begin(); 7687 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 7688 .addOperand(MI->getOperand(0)) // Rn_wb 7689 .addOperand(MI->getOperand(1)) // Rt 7690 .addOperand(MI->getOperand(2)) // Rn 7691 .addImm(Offset) // offset (skip GPR==zero_reg) 7692 .addOperand(MI->getOperand(5)) // pred 7693 .addOperand(MI->getOperand(6)) 7694 .addMemOperand(MMO); 7695 MI->eraseFromParent(); 7696 return BB; 7697 } 7698 case ARM::STRr_preidx: 7699 case ARM::STRBr_preidx: 7700 case ARM::STRH_preidx: { 7701 unsigned NewOpc; 7702 switch (MI->getOpcode()) { 7703 default: llvm_unreachable("unexpected opcode!"); 7704 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 7705 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 7706 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 7707 } 7708 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 7709 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 7710 MIB.addOperand(MI->getOperand(i)); 7711 MI->eraseFromParent(); 7712 return BB; 7713 } 7714 case ARM::ATOMIC_LOAD_ADD_I8: 7715 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7716 case ARM::ATOMIC_LOAD_ADD_I16: 7717 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7718 case ARM::ATOMIC_LOAD_ADD_I32: 7719 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7720 7721 case ARM::ATOMIC_LOAD_AND_I8: 7722 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7723 case ARM::ATOMIC_LOAD_AND_I16: 7724 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7725 case ARM::ATOMIC_LOAD_AND_I32: 7726 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7727 7728 case ARM::ATOMIC_LOAD_OR_I8: 7729 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7730 case ARM::ATOMIC_LOAD_OR_I16: 7731 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7732 case ARM::ATOMIC_LOAD_OR_I32: 7733 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7734 7735 case ARM::ATOMIC_LOAD_XOR_I8: 7736 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7737 case ARM::ATOMIC_LOAD_XOR_I16: 7738 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7739 case ARM::ATOMIC_LOAD_XOR_I32: 7740 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7741 7742 case ARM::ATOMIC_LOAD_NAND_I8: 7743 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7744 case ARM::ATOMIC_LOAD_NAND_I16: 7745 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7746 case ARM::ATOMIC_LOAD_NAND_I32: 7747 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7748 7749 case ARM::ATOMIC_LOAD_SUB_I8: 7750 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7751 case ARM::ATOMIC_LOAD_SUB_I16: 7752 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7753 case ARM::ATOMIC_LOAD_SUB_I32: 7754 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7755 7756 case ARM::ATOMIC_LOAD_MIN_I8: 7757 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 7758 case ARM::ATOMIC_LOAD_MIN_I16: 7759 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 7760 case ARM::ATOMIC_LOAD_MIN_I32: 7761 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 7762 7763 case ARM::ATOMIC_LOAD_MAX_I8: 7764 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 7765 case ARM::ATOMIC_LOAD_MAX_I16: 7766 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 7767 case ARM::ATOMIC_LOAD_MAX_I32: 7768 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 7769 7770 case ARM::ATOMIC_LOAD_UMIN_I8: 7771 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 7772 case ARM::ATOMIC_LOAD_UMIN_I16: 7773 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 7774 case ARM::ATOMIC_LOAD_UMIN_I32: 7775 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 7776 7777 case ARM::ATOMIC_LOAD_UMAX_I8: 7778 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 7779 case ARM::ATOMIC_LOAD_UMAX_I16: 7780 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 7781 case ARM::ATOMIC_LOAD_UMAX_I32: 7782 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 7783 7784 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 7785 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 7786 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 7787 7788 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 7789 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 7790 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 7791 7792 case ARM::ATOMIC_LOAD_I64: 7793 return EmitAtomicLoad64(MI, BB); 7794 7795 case ARM::ATOMIC_LOAD_ADD_I64: 7796 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 7797 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 7798 /*NeedsCarry*/ true); 7799 case ARM::ATOMIC_LOAD_SUB_I64: 7800 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7801 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7802 /*NeedsCarry*/ true); 7803 case ARM::ATOMIC_LOAD_OR_I64: 7804 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 7805 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7806 case ARM::ATOMIC_LOAD_XOR_I64: 7807 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 7808 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7809 case ARM::ATOMIC_LOAD_AND_I64: 7810 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 7811 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7812 case ARM::ATOMIC_STORE_I64: 7813 case ARM::ATOMIC_SWAP_I64: 7814 return EmitAtomicBinary64(MI, BB, 0, 0, false); 7815 case ARM::ATOMIC_CMP_SWAP_I64: 7816 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7817 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7818 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 7819 case ARM::ATOMIC_LOAD_MIN_I64: 7820 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7821 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7822 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7823 /*IsMinMax*/ true, ARMCC::LT); 7824 case ARM::ATOMIC_LOAD_MAX_I64: 7825 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7826 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7827 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7828 /*IsMinMax*/ true, ARMCC::GE); 7829 case ARM::ATOMIC_LOAD_UMIN_I64: 7830 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7831 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7832 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7833 /*IsMinMax*/ true, ARMCC::LO); 7834 case ARM::ATOMIC_LOAD_UMAX_I64: 7835 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7836 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7837 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7838 /*IsMinMax*/ true, ARMCC::HS); 7839 7840 case ARM::tMOVCCr_pseudo: { 7841 // To "insert" a SELECT_CC instruction, we actually have to insert the 7842 // diamond control-flow pattern. The incoming instruction knows the 7843 // destination vreg to set, the condition code register to branch on, the 7844 // true/false values to select between, and a branch opcode to use. 7845 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7846 MachineFunction::iterator It = BB; 7847 ++It; 7848 7849 // thisMBB: 7850 // ... 7851 // TrueVal = ... 7852 // cmpTY ccX, r1, r2 7853 // bCC copy1MBB 7854 // fallthrough --> copy0MBB 7855 MachineBasicBlock *thisMBB = BB; 7856 MachineFunction *F = BB->getParent(); 7857 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7858 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7859 F->insert(It, copy0MBB); 7860 F->insert(It, sinkMBB); 7861 7862 // Transfer the remainder of BB and its successor edges to sinkMBB. 7863 sinkMBB->splice(sinkMBB->begin(), BB, 7864 llvm::next(MachineBasicBlock::iterator(MI)), 7865 BB->end()); 7866 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7867 7868 BB->addSuccessor(copy0MBB); 7869 BB->addSuccessor(sinkMBB); 7870 7871 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 7872 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 7873 7874 // copy0MBB: 7875 // %FalseValue = ... 7876 // # fallthrough to sinkMBB 7877 BB = copy0MBB; 7878 7879 // Update machine-CFG edges 7880 BB->addSuccessor(sinkMBB); 7881 7882 // sinkMBB: 7883 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7884 // ... 7885 BB = sinkMBB; 7886 BuildMI(*BB, BB->begin(), dl, 7887 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 7888 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 7889 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7890 7891 MI->eraseFromParent(); // The pseudo instruction is gone now. 7892 return BB; 7893 } 7894 7895 case ARM::BCCi64: 7896 case ARM::BCCZi64: { 7897 // If there is an unconditional branch to the other successor, remove it. 7898 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 7899 7900 // Compare both parts that make up the double comparison separately for 7901 // equality. 7902 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 7903 7904 unsigned LHS1 = MI->getOperand(1).getReg(); 7905 unsigned LHS2 = MI->getOperand(2).getReg(); 7906 if (RHSisZero) { 7907 AddDefaultPred(BuildMI(BB, dl, 7908 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7909 .addReg(LHS1).addImm(0)); 7910 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7911 .addReg(LHS2).addImm(0) 7912 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7913 } else { 7914 unsigned RHS1 = MI->getOperand(3).getReg(); 7915 unsigned RHS2 = MI->getOperand(4).getReg(); 7916 AddDefaultPred(BuildMI(BB, dl, 7917 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7918 .addReg(LHS1).addReg(RHS1)); 7919 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7920 .addReg(LHS2).addReg(RHS2) 7921 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7922 } 7923 7924 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 7925 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 7926 if (MI->getOperand(0).getImm() == ARMCC::NE) 7927 std::swap(destMBB, exitMBB); 7928 7929 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7930 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 7931 if (isThumb2) 7932 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 7933 else 7934 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 7935 7936 MI->eraseFromParent(); // The pseudo instruction is gone now. 7937 return BB; 7938 } 7939 7940 case ARM::Int_eh_sjlj_setjmp: 7941 case ARM::Int_eh_sjlj_setjmp_nofp: 7942 case ARM::tInt_eh_sjlj_setjmp: 7943 case ARM::t2Int_eh_sjlj_setjmp: 7944 case ARM::t2Int_eh_sjlj_setjmp_nofp: 7945 EmitSjLjDispatchBlock(MI, BB); 7946 return BB; 7947 7948 case ARM::ABS: 7949 case ARM::t2ABS: { 7950 // To insert an ABS instruction, we have to insert the 7951 // diamond control-flow pattern. The incoming instruction knows the 7952 // source vreg to test against 0, the destination vreg to set, 7953 // the condition code register to branch on, the 7954 // true/false values to select between, and a branch opcode to use. 7955 // It transforms 7956 // V1 = ABS V0 7957 // into 7958 // V2 = MOVS V0 7959 // BCC (branch to SinkBB if V0 >= 0) 7960 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 7961 // SinkBB: V1 = PHI(V2, V3) 7962 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7963 MachineFunction::iterator BBI = BB; 7964 ++BBI; 7965 MachineFunction *Fn = BB->getParent(); 7966 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7967 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7968 Fn->insert(BBI, RSBBB); 7969 Fn->insert(BBI, SinkBB); 7970 7971 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 7972 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 7973 bool isThumb2 = Subtarget->isThumb2(); 7974 MachineRegisterInfo &MRI = Fn->getRegInfo(); 7975 // In Thumb mode S must not be specified if source register is the SP or 7976 // PC and if destination register is the SP, so restrict register class 7977 unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? 7978 (const TargetRegisterClass*)&ARM::rGPRRegClass : 7979 (const TargetRegisterClass*)&ARM::GPRRegClass); 7980 7981 // Transfer the remainder of BB and its successor edges to sinkMBB. 7982 SinkBB->splice(SinkBB->begin(), BB, 7983 llvm::next(MachineBasicBlock::iterator(MI)), 7984 BB->end()); 7985 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 7986 7987 BB->addSuccessor(RSBBB); 7988 BB->addSuccessor(SinkBB); 7989 7990 // fall through to SinkMBB 7991 RSBBB->addSuccessor(SinkBB); 7992 7993 // insert a cmp at the end of BB 7994 AddDefaultPred(BuildMI(BB, dl, 7995 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7996 .addReg(ABSSrcReg).addImm(0)); 7997 7998 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 7999 BuildMI(BB, dl, 8000 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 8001 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 8002 8003 // insert rsbri in RSBBB 8004 // Note: BCC and rsbri will be converted into predicated rsbmi 8005 // by if-conversion pass 8006 BuildMI(*RSBBB, RSBBB->begin(), dl, 8007 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 8008 .addReg(ABSSrcReg, RegState::Kill) 8009 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 8010 8011 // insert PHI in SinkBB, 8012 // reuse ABSDstReg to not change uses of ABS instruction 8013 BuildMI(*SinkBB, SinkBB->begin(), dl, 8014 TII->get(ARM::PHI), ABSDstReg) 8015 .addReg(NewRsbDstReg).addMBB(RSBBB) 8016 .addReg(ABSSrcReg).addMBB(BB); 8017 8018 // remove ABS instruction 8019 MI->eraseFromParent(); 8020 8021 // return last added BB 8022 return SinkBB; 8023 } 8024 case ARM::COPY_STRUCT_BYVAL_I32: 8025 ++NumLoopByVals; 8026 return EmitStructByval(MI, BB); 8027 } 8028 } 8029 8030 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 8031 SDNode *Node) const { 8032 if (!MI->hasPostISelHook()) { 8033 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 8034 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 8035 return; 8036 } 8037 8038 const MCInstrDesc *MCID = &MI->getDesc(); 8039 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 8040 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 8041 // operand is still set to noreg. If needed, set the optional operand's 8042 // register to CPSR, and remove the redundant implicit def. 8043 // 8044 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 8045 8046 // Rename pseudo opcodes. 8047 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 8048 if (NewOpc) { 8049 const ARMBaseInstrInfo *TII = 8050 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 8051 MCID = &TII->get(NewOpc); 8052 8053 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 8054 "converted opcode should be the same except for cc_out"); 8055 8056 MI->setDesc(*MCID); 8057 8058 // Add the optional cc_out operand 8059 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 8060 } 8061 unsigned ccOutIdx = MCID->getNumOperands() - 1; 8062 8063 // Any ARM instruction that sets the 's' bit should specify an optional 8064 // "cc_out" operand in the last operand position. 8065 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 8066 assert(!NewOpc && "Optional cc_out operand required"); 8067 return; 8068 } 8069 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 8070 // since we already have an optional CPSR def. 8071 bool definesCPSR = false; 8072 bool deadCPSR = false; 8073 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 8074 i != e; ++i) { 8075 const MachineOperand &MO = MI->getOperand(i); 8076 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 8077 definesCPSR = true; 8078 if (MO.isDead()) 8079 deadCPSR = true; 8080 MI->RemoveOperand(i); 8081 break; 8082 } 8083 } 8084 if (!definesCPSR) { 8085 assert(!NewOpc && "Optional cc_out operand required"); 8086 return; 8087 } 8088 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 8089 if (deadCPSR) { 8090 assert(!MI->getOperand(ccOutIdx).getReg() && 8091 "expect uninitialized optional cc_out operand"); 8092 return; 8093 } 8094 8095 // If this instruction was defined with an optional CPSR def and its dag node 8096 // had a live implicit CPSR def, then activate the optional CPSR def. 8097 MachineOperand &MO = MI->getOperand(ccOutIdx); 8098 MO.setReg(ARM::CPSR); 8099 MO.setIsDef(true); 8100 } 8101 8102 //===----------------------------------------------------------------------===// 8103 // ARM Optimization Hooks 8104 //===----------------------------------------------------------------------===// 8105 8106 // Helper function that checks if N is a null or all ones constant. 8107 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { 8108 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); 8109 if (!C) 8110 return false; 8111 return AllOnes ? C->isAllOnesValue() : C->isNullValue(); 8112 } 8113 8114 // Return true if N is conditionally 0 or all ones. 8115 // Detects these expressions where cc is an i1 value: 8116 // 8117 // (select cc 0, y) [AllOnes=0] 8118 // (select cc y, 0) [AllOnes=0] 8119 // (zext cc) [AllOnes=0] 8120 // (sext cc) [AllOnes=0/1] 8121 // (select cc -1, y) [AllOnes=1] 8122 // (select cc y, -1) [AllOnes=1] 8123 // 8124 // Invert is set when N is the null/all ones constant when CC is false. 8125 // OtherOp is set to the alternative value of N. 8126 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, 8127 SDValue &CC, bool &Invert, 8128 SDValue &OtherOp, 8129 SelectionDAG &DAG) { 8130 switch (N->getOpcode()) { 8131 default: return false; 8132 case ISD::SELECT: { 8133 CC = N->getOperand(0); 8134 SDValue N1 = N->getOperand(1); 8135 SDValue N2 = N->getOperand(2); 8136 if (isZeroOrAllOnes(N1, AllOnes)) { 8137 Invert = false; 8138 OtherOp = N2; 8139 return true; 8140 } 8141 if (isZeroOrAllOnes(N2, AllOnes)) { 8142 Invert = true; 8143 OtherOp = N1; 8144 return true; 8145 } 8146 return false; 8147 } 8148 case ISD::ZERO_EXTEND: 8149 // (zext cc) can never be the all ones value. 8150 if (AllOnes) 8151 return false; 8152 // Fall through. 8153 case ISD::SIGN_EXTEND: { 8154 EVT VT = N->getValueType(0); 8155 CC = N->getOperand(0); 8156 if (CC.getValueType() != MVT::i1) 8157 return false; 8158 Invert = !AllOnes; 8159 if (AllOnes) 8160 // When looking for an AllOnes constant, N is an sext, and the 'other' 8161 // value is 0. 8162 OtherOp = DAG.getConstant(0, VT); 8163 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8164 // When looking for a 0 constant, N can be zext or sext. 8165 OtherOp = DAG.getConstant(1, VT); 8166 else 8167 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); 8168 return true; 8169 } 8170 } 8171 } 8172 8173 // Combine a constant select operand into its use: 8174 // 8175 // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 8176 // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 8177 // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] 8178 // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 8179 // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 8180 // 8181 // The transform is rejected if the select doesn't have a constant operand that 8182 // is null, or all ones when AllOnes is set. 8183 // 8184 // Also recognize sext/zext from i1: 8185 // 8186 // (add (zext cc), x) -> (select cc (add x, 1), x) 8187 // (add (sext cc), x) -> (select cc (add x, -1), x) 8188 // 8189 // These transformations eventually create predicated instructions. 8190 // 8191 // @param N The node to transform. 8192 // @param Slct The N operand that is a select. 8193 // @param OtherOp The other N operand (x above). 8194 // @param DCI Context. 8195 // @param AllOnes Require the select constant to be all ones instead of null. 8196 // @returns The new node, or SDValue() on failure. 8197 static 8198 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 8199 TargetLowering::DAGCombinerInfo &DCI, 8200 bool AllOnes = false) { 8201 SelectionDAG &DAG = DCI.DAG; 8202 EVT VT = N->getValueType(0); 8203 SDValue NonConstantVal; 8204 SDValue CCOp; 8205 bool SwapSelectOps; 8206 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, 8207 NonConstantVal, DAG)) 8208 return SDValue(); 8209 8210 // Slct is now know to be the desired identity constant when CC is true. 8211 SDValue TrueVal = OtherOp; 8212 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 8213 OtherOp, NonConstantVal); 8214 // Unless SwapSelectOps says CC should be false. 8215 if (SwapSelectOps) 8216 std::swap(TrueVal, FalseVal); 8217 8218 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 8219 CCOp, TrueVal, FalseVal); 8220 } 8221 8222 // Attempt combineSelectAndUse on each operand of a commutative operator N. 8223 static 8224 SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, 8225 TargetLowering::DAGCombinerInfo &DCI) { 8226 SDValue N0 = N->getOperand(0); 8227 SDValue N1 = N->getOperand(1); 8228 if (N0.getNode()->hasOneUse()) { 8229 SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); 8230 if (Result.getNode()) 8231 return Result; 8232 } 8233 if (N1.getNode()->hasOneUse()) { 8234 SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); 8235 if (Result.getNode()) 8236 return Result; 8237 } 8238 return SDValue(); 8239 } 8240 8241 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 8242 // (only after legalization). 8243 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 8244 TargetLowering::DAGCombinerInfo &DCI, 8245 const ARMSubtarget *Subtarget) { 8246 8247 // Only perform optimization if after legalize, and if NEON is available. We 8248 // also expected both operands to be BUILD_VECTORs. 8249 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 8250 || N0.getOpcode() != ISD::BUILD_VECTOR 8251 || N1.getOpcode() != ISD::BUILD_VECTOR) 8252 return SDValue(); 8253 8254 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 8255 EVT VT = N->getValueType(0); 8256 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 8257 return SDValue(); 8258 8259 // Check that the vector operands are of the right form. 8260 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 8261 // operands, where N is the size of the formed vector. 8262 // Each EXTRACT_VECTOR should have the same input vector and odd or even 8263 // index such that we have a pair wise add pattern. 8264 8265 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 8266 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 8267 return SDValue(); 8268 SDValue Vec = N0->getOperand(0)->getOperand(0); 8269 SDNode *V = Vec.getNode(); 8270 unsigned nextIndex = 0; 8271 8272 // For each operands to the ADD which are BUILD_VECTORs, 8273 // check to see if each of their operands are an EXTRACT_VECTOR with 8274 // the same vector and appropriate index. 8275 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 8276 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 8277 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 8278 8279 SDValue ExtVec0 = N0->getOperand(i); 8280 SDValue ExtVec1 = N1->getOperand(i); 8281 8282 // First operand is the vector, verify its the same. 8283 if (V != ExtVec0->getOperand(0).getNode() || 8284 V != ExtVec1->getOperand(0).getNode()) 8285 return SDValue(); 8286 8287 // Second is the constant, verify its correct. 8288 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 8289 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 8290 8291 // For the constant, we want to see all the even or all the odd. 8292 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 8293 || C1->getZExtValue() != nextIndex+1) 8294 return SDValue(); 8295 8296 // Increment index. 8297 nextIndex+=2; 8298 } else 8299 return SDValue(); 8300 } 8301 8302 // Create VPADDL node. 8303 SelectionDAG &DAG = DCI.DAG; 8304 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8305 8306 // Build operand list. 8307 SmallVector<SDValue, 8> Ops; 8308 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 8309 TLI.getPointerTy())); 8310 8311 // Input is the vector. 8312 Ops.push_back(Vec); 8313 8314 // Get widened type and narrowed type. 8315 MVT widenType; 8316 unsigned numElem = VT.getVectorNumElements(); 8317 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 8318 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 8319 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 8320 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 8321 default: 8322 llvm_unreachable("Invalid vector element type for padd optimization."); 8323 } 8324 8325 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 8326 widenType, &Ops[0], Ops.size()); 8327 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); 8328 } 8329 8330 static SDValue findMUL_LOHI(SDValue V) { 8331 if (V->getOpcode() == ISD::UMUL_LOHI || 8332 V->getOpcode() == ISD::SMUL_LOHI) 8333 return V; 8334 return SDValue(); 8335 } 8336 8337 static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, 8338 TargetLowering::DAGCombinerInfo &DCI, 8339 const ARMSubtarget *Subtarget) { 8340 8341 if (Subtarget->isThumb1Only()) return SDValue(); 8342 8343 // Only perform the checks after legalize when the pattern is available. 8344 if (DCI.isBeforeLegalize()) return SDValue(); 8345 8346 // Look for multiply add opportunities. 8347 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where 8348 // each add nodes consumes a value from ISD::UMUL_LOHI and there is 8349 // a glue link from the first add to the second add. 8350 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by 8351 // a S/UMLAL instruction. 8352 // loAdd UMUL_LOHI 8353 // \ / :lo \ :hi 8354 // \ / \ [no multiline comment] 8355 // ADDC | hiAdd 8356 // \ :glue / / 8357 // \ / / 8358 // ADDE 8359 // 8360 assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); 8361 SDValue AddcOp0 = AddcNode->getOperand(0); 8362 SDValue AddcOp1 = AddcNode->getOperand(1); 8363 8364 // Check if the two operands are from the same mul_lohi node. 8365 if (AddcOp0.getNode() == AddcOp1.getNode()) 8366 return SDValue(); 8367 8368 assert(AddcNode->getNumValues() == 2 && 8369 AddcNode->getValueType(0) == MVT::i32 && 8370 "Expect ADDC with two result values. First: i32"); 8371 8372 // Check that we have a glued ADDC node. 8373 if (AddcNode->getValueType(1) != MVT::Glue) 8374 return SDValue(); 8375 8376 // Check that the ADDC adds the low result of the S/UMUL_LOHI. 8377 if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && 8378 AddcOp0->getOpcode() != ISD::SMUL_LOHI && 8379 AddcOp1->getOpcode() != ISD::UMUL_LOHI && 8380 AddcOp1->getOpcode() != ISD::SMUL_LOHI) 8381 return SDValue(); 8382 8383 // Look for the glued ADDE. 8384 SDNode* AddeNode = AddcNode->getGluedUser(); 8385 if (AddeNode == NULL) 8386 return SDValue(); 8387 8388 // Make sure it is really an ADDE. 8389 if (AddeNode->getOpcode() != ISD::ADDE) 8390 return SDValue(); 8391 8392 assert(AddeNode->getNumOperands() == 3 && 8393 AddeNode->getOperand(2).getValueType() == MVT::Glue && 8394 "ADDE node has the wrong inputs"); 8395 8396 // Check for the triangle shape. 8397 SDValue AddeOp0 = AddeNode->getOperand(0); 8398 SDValue AddeOp1 = AddeNode->getOperand(1); 8399 8400 // Make sure that the ADDE operands are not coming from the same node. 8401 if (AddeOp0.getNode() == AddeOp1.getNode()) 8402 return SDValue(); 8403 8404 // Find the MUL_LOHI node walking up ADDE's operands. 8405 bool IsLeftOperandMUL = false; 8406 SDValue MULOp = findMUL_LOHI(AddeOp0); 8407 if (MULOp == SDValue()) 8408 MULOp = findMUL_LOHI(AddeOp1); 8409 else 8410 IsLeftOperandMUL = true; 8411 if (MULOp == SDValue()) 8412 return SDValue(); 8413 8414 // Figure out the right opcode. 8415 unsigned Opc = MULOp->getOpcode(); 8416 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; 8417 8418 // Figure out the high and low input values to the MLAL node. 8419 SDValue* HiMul = &MULOp; 8420 SDValue* HiAdd = NULL; 8421 SDValue* LoMul = NULL; 8422 SDValue* LowAdd = NULL; 8423 8424 if (IsLeftOperandMUL) 8425 HiAdd = &AddeOp1; 8426 else 8427 HiAdd = &AddeOp0; 8428 8429 8430 if (AddcOp0->getOpcode() == Opc) { 8431 LoMul = &AddcOp0; 8432 LowAdd = &AddcOp1; 8433 } 8434 if (AddcOp1->getOpcode() == Opc) { 8435 LoMul = &AddcOp1; 8436 LowAdd = &AddcOp0; 8437 } 8438 8439 if (LoMul == NULL) 8440 return SDValue(); 8441 8442 if (LoMul->getNode() != HiMul->getNode()) 8443 return SDValue(); 8444 8445 // Create the merged node. 8446 SelectionDAG &DAG = DCI.DAG; 8447 8448 // Build operand list. 8449 SmallVector<SDValue, 8> Ops; 8450 Ops.push_back(LoMul->getOperand(0)); 8451 Ops.push_back(LoMul->getOperand(1)); 8452 Ops.push_back(*LowAdd); 8453 Ops.push_back(*HiAdd); 8454 8455 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), 8456 DAG.getVTList(MVT::i32, MVT::i32), 8457 &Ops[0], Ops.size()); 8458 8459 // Replace the ADDs' nodes uses by the MLA node's values. 8460 SDValue HiMLALResult(MLALNode.getNode(), 1); 8461 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); 8462 8463 SDValue LoMLALResult(MLALNode.getNode(), 0); 8464 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); 8465 8466 // Return original node to notify the driver to stop replacing. 8467 SDValue resNode(AddcNode, 0); 8468 return resNode; 8469 } 8470 8471 /// PerformADDCCombine - Target-specific dag combine transform from 8472 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. 8473 static SDValue PerformADDCCombine(SDNode *N, 8474 TargetLowering::DAGCombinerInfo &DCI, 8475 const ARMSubtarget *Subtarget) { 8476 8477 return AddCombineTo64bitMLAL(N, DCI, Subtarget); 8478 8479 } 8480 8481 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 8482 /// operands N0 and N1. This is a helper for PerformADDCombine that is 8483 /// called with the default operands, and if that fails, with commuted 8484 /// operands. 8485 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 8486 TargetLowering::DAGCombinerInfo &DCI, 8487 const ARMSubtarget *Subtarget){ 8488 8489 // Attempt to create vpaddl for this add. 8490 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 8491 if (Result.getNode()) 8492 return Result; 8493 8494 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 8495 if (N0.getNode()->hasOneUse()) { 8496 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 8497 if (Result.getNode()) return Result; 8498 } 8499 return SDValue(); 8500 } 8501 8502 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 8503 /// 8504 static SDValue PerformADDCombine(SDNode *N, 8505 TargetLowering::DAGCombinerInfo &DCI, 8506 const ARMSubtarget *Subtarget) { 8507 SDValue N0 = N->getOperand(0); 8508 SDValue N1 = N->getOperand(1); 8509 8510 // First try with the default operand order. 8511 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 8512 if (Result.getNode()) 8513 return Result; 8514 8515 // If that didn't work, try again with the operands commuted. 8516 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 8517 } 8518 8519 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 8520 /// 8521 static SDValue PerformSUBCombine(SDNode *N, 8522 TargetLowering::DAGCombinerInfo &DCI) { 8523 SDValue N0 = N->getOperand(0); 8524 SDValue N1 = N->getOperand(1); 8525 8526 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 8527 if (N1.getNode()->hasOneUse()) { 8528 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 8529 if (Result.getNode()) return Result; 8530 } 8531 8532 return SDValue(); 8533 } 8534 8535 /// PerformVMULCombine 8536 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 8537 /// special multiplier accumulator forwarding. 8538 /// vmul d3, d0, d2 8539 /// vmla d3, d1, d2 8540 /// is faster than 8541 /// vadd d3, d0, d1 8542 /// vmul d3, d3, d2 8543 // However, for (A + B) * (A + B), 8544 // vadd d2, d0, d1 8545 // vmul d3, d0, d2 8546 // vmla d3, d1, d2 8547 // is slower than 8548 // vadd d2, d0, d1 8549 // vmul d3, d2, d2 8550 static SDValue PerformVMULCombine(SDNode *N, 8551 TargetLowering::DAGCombinerInfo &DCI, 8552 const ARMSubtarget *Subtarget) { 8553 if (!Subtarget->hasVMLxForwarding()) 8554 return SDValue(); 8555 8556 SelectionDAG &DAG = DCI.DAG; 8557 SDValue N0 = N->getOperand(0); 8558 SDValue N1 = N->getOperand(1); 8559 unsigned Opcode = N0.getOpcode(); 8560 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8561 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 8562 Opcode = N1.getOpcode(); 8563 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8564 Opcode != ISD::FADD && Opcode != ISD::FSUB) 8565 return SDValue(); 8566 std::swap(N0, N1); 8567 } 8568 8569 if (N0 == N1) 8570 return SDValue(); 8571 8572 EVT VT = N->getValueType(0); 8573 SDLoc DL(N); 8574 SDValue N00 = N0->getOperand(0); 8575 SDValue N01 = N0->getOperand(1); 8576 return DAG.getNode(Opcode, DL, VT, 8577 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 8578 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 8579 } 8580 8581 static SDValue PerformMULCombine(SDNode *N, 8582 TargetLowering::DAGCombinerInfo &DCI, 8583 const ARMSubtarget *Subtarget) { 8584 SelectionDAG &DAG = DCI.DAG; 8585 8586 if (Subtarget->isThumb1Only()) 8587 return SDValue(); 8588 8589 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 8590 return SDValue(); 8591 8592 EVT VT = N->getValueType(0); 8593 if (VT.is64BitVector() || VT.is128BitVector()) 8594 return PerformVMULCombine(N, DCI, Subtarget); 8595 if (VT != MVT::i32) 8596 return SDValue(); 8597 8598 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8599 if (!C) 8600 return SDValue(); 8601 8602 int64_t MulAmt = C->getSExtValue(); 8603 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt); 8604 8605 ShiftAmt = ShiftAmt & (32 - 1); 8606 SDValue V = N->getOperand(0); 8607 SDLoc DL(N); 8608 8609 SDValue Res; 8610 MulAmt >>= ShiftAmt; 8611 8612 if (MulAmt >= 0) { 8613 if (isPowerOf2_32(MulAmt - 1)) { 8614 // (mul x, 2^N + 1) => (add (shl x, N), x) 8615 Res = DAG.getNode(ISD::ADD, DL, VT, 8616 V, 8617 DAG.getNode(ISD::SHL, DL, VT, 8618 V, 8619 DAG.getConstant(Log2_32(MulAmt - 1), 8620 MVT::i32))); 8621 } else if (isPowerOf2_32(MulAmt + 1)) { 8622 // (mul x, 2^N - 1) => (sub (shl x, N), x) 8623 Res = DAG.getNode(ISD::SUB, DL, VT, 8624 DAG.getNode(ISD::SHL, DL, VT, 8625 V, 8626 DAG.getConstant(Log2_32(MulAmt + 1), 8627 MVT::i32)), 8628 V); 8629 } else 8630 return SDValue(); 8631 } else { 8632 uint64_t MulAmtAbs = -MulAmt; 8633 if (isPowerOf2_32(MulAmtAbs + 1)) { 8634 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 8635 Res = DAG.getNode(ISD::SUB, DL, VT, 8636 V, 8637 DAG.getNode(ISD::SHL, DL, VT, 8638 V, 8639 DAG.getConstant(Log2_32(MulAmtAbs + 1), 8640 MVT::i32))); 8641 } else if (isPowerOf2_32(MulAmtAbs - 1)) { 8642 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 8643 Res = DAG.getNode(ISD::ADD, DL, VT, 8644 V, 8645 DAG.getNode(ISD::SHL, DL, VT, 8646 V, 8647 DAG.getConstant(Log2_32(MulAmtAbs-1), 8648 MVT::i32))); 8649 Res = DAG.getNode(ISD::SUB, DL, VT, 8650 DAG.getConstant(0, MVT::i32),Res); 8651 8652 } else 8653 return SDValue(); 8654 } 8655 8656 if (ShiftAmt != 0) 8657 Res = DAG.getNode(ISD::SHL, DL, VT, 8658 Res, DAG.getConstant(ShiftAmt, MVT::i32)); 8659 8660 // Do not add new nodes to DAG combiner worklist. 8661 DCI.CombineTo(N, Res, false); 8662 return SDValue(); 8663 } 8664 8665 static SDValue PerformANDCombine(SDNode *N, 8666 TargetLowering::DAGCombinerInfo &DCI, 8667 const ARMSubtarget *Subtarget) { 8668 8669 // Attempt to use immediate-form VBIC 8670 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8671 SDLoc dl(N); 8672 EVT VT = N->getValueType(0); 8673 SelectionDAG &DAG = DCI.DAG; 8674 8675 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8676 return SDValue(); 8677 8678 APInt SplatBits, SplatUndef; 8679 unsigned SplatBitSize; 8680 bool HasAnyUndefs; 8681 if (BVN && 8682 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8683 if (SplatBitSize <= 64) { 8684 EVT VbicVT; 8685 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 8686 SplatUndef.getZExtValue(), SplatBitSize, 8687 DAG, VbicVT, VT.is128BitVector(), 8688 OtherModImm); 8689 if (Val.getNode()) { 8690 SDValue Input = 8691 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 8692 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 8693 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 8694 } 8695 } 8696 } 8697 8698 if (!Subtarget->isThumb1Only()) { 8699 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) 8700 SDValue Result = combineSelectAndUseCommutative(N, true, DCI); 8701 if (Result.getNode()) 8702 return Result; 8703 } 8704 8705 return SDValue(); 8706 } 8707 8708 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR 8709 static SDValue PerformORCombine(SDNode *N, 8710 TargetLowering::DAGCombinerInfo &DCI, 8711 const ARMSubtarget *Subtarget) { 8712 // Attempt to use immediate-form VORR 8713 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8714 SDLoc dl(N); 8715 EVT VT = N->getValueType(0); 8716 SelectionDAG &DAG = DCI.DAG; 8717 8718 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8719 return SDValue(); 8720 8721 APInt SplatBits, SplatUndef; 8722 unsigned SplatBitSize; 8723 bool HasAnyUndefs; 8724 if (BVN && Subtarget->hasNEON() && 8725 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8726 if (SplatBitSize <= 64) { 8727 EVT VorrVT; 8728 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 8729 SplatUndef.getZExtValue(), SplatBitSize, 8730 DAG, VorrVT, VT.is128BitVector(), 8731 OtherModImm); 8732 if (Val.getNode()) { 8733 SDValue Input = 8734 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 8735 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 8736 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 8737 } 8738 } 8739 } 8740 8741 if (!Subtarget->isThumb1Only()) { 8742 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 8743 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8744 if (Result.getNode()) 8745 return Result; 8746 } 8747 8748 // The code below optimizes (or (and X, Y), Z). 8749 // The AND operand needs to have a single user to make these optimizations 8750 // profitable. 8751 SDValue N0 = N->getOperand(0); 8752 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) 8753 return SDValue(); 8754 SDValue N1 = N->getOperand(1); 8755 8756 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 8757 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 8758 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 8759 APInt SplatUndef; 8760 unsigned SplatBitSize; 8761 bool HasAnyUndefs; 8762 8763 APInt SplatBits0, SplatBits1; 8764 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 8765 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 8766 // Ensure that the second operand of both ands are constants 8767 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 8768 HasAnyUndefs) && !HasAnyUndefs) { 8769 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 8770 HasAnyUndefs) && !HasAnyUndefs) { 8771 // Ensure that the bit width of the constants are the same and that 8772 // the splat arguments are logical inverses as per the pattern we 8773 // are trying to simplify. 8774 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && 8775 SplatBits0 == ~SplatBits1) { 8776 // Canonicalize the vector type to make instruction selection 8777 // simpler. 8778 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 8779 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 8780 N0->getOperand(1), 8781 N0->getOperand(0), 8782 N1->getOperand(0)); 8783 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 8784 } 8785 } 8786 } 8787 } 8788 8789 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 8790 // reasonable. 8791 8792 // BFI is only available on V6T2+ 8793 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 8794 return SDValue(); 8795 8796 SDLoc DL(N); 8797 // 1) or (and A, mask), val => ARMbfi A, val, mask 8798 // iff (val & mask) == val 8799 // 8800 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8801 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 8802 // && mask == ~mask2 8803 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 8804 // && ~mask == mask2 8805 // (i.e., copy a bitfield value into another bitfield of the same width) 8806 8807 if (VT != MVT::i32) 8808 return SDValue(); 8809 8810 SDValue N00 = N0.getOperand(0); 8811 8812 // The value and the mask need to be constants so we can verify this is 8813 // actually a bitfield set. If the mask is 0xffff, we can do better 8814 // via a movt instruction, so don't use BFI in that case. 8815 SDValue MaskOp = N0.getOperand(1); 8816 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 8817 if (!MaskC) 8818 return SDValue(); 8819 unsigned Mask = MaskC->getZExtValue(); 8820 if (Mask == 0xffff) 8821 return SDValue(); 8822 SDValue Res; 8823 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 8824 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 8825 if (N1C) { 8826 unsigned Val = N1C->getZExtValue(); 8827 if ((Val & ~Mask) != Val) 8828 return SDValue(); 8829 8830 if (ARM::isBitFieldInvertedMask(Mask)) { 8831 Val >>= countTrailingZeros(~Mask); 8832 8833 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 8834 DAG.getConstant(Val, MVT::i32), 8835 DAG.getConstant(Mask, MVT::i32)); 8836 8837 // Do not add new nodes to DAG combiner worklist. 8838 DCI.CombineTo(N, Res, false); 8839 return SDValue(); 8840 } 8841 } else if (N1.getOpcode() == ISD::AND) { 8842 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8843 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8844 if (!N11C) 8845 return SDValue(); 8846 unsigned Mask2 = N11C->getZExtValue(); 8847 8848 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 8849 // as is to match. 8850 if (ARM::isBitFieldInvertedMask(Mask) && 8851 (Mask == ~Mask2)) { 8852 // The pack halfword instruction works better for masks that fit it, 8853 // so use that when it's available. 8854 if (Subtarget->hasT2ExtractPack() && 8855 (Mask == 0xffff || Mask == 0xffff0000)) 8856 return SDValue(); 8857 // 2a 8858 unsigned amt = countTrailingZeros(Mask2); 8859 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 8860 DAG.getConstant(amt, MVT::i32)); 8861 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 8862 DAG.getConstant(Mask, MVT::i32)); 8863 // Do not add new nodes to DAG combiner worklist. 8864 DCI.CombineTo(N, Res, false); 8865 return SDValue(); 8866 } else if (ARM::isBitFieldInvertedMask(~Mask) && 8867 (~Mask == Mask2)) { 8868 // The pack halfword instruction works better for masks that fit it, 8869 // so use that when it's available. 8870 if (Subtarget->hasT2ExtractPack() && 8871 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 8872 return SDValue(); 8873 // 2b 8874 unsigned lsb = countTrailingZeros(Mask); 8875 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 8876 DAG.getConstant(lsb, MVT::i32)); 8877 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 8878 DAG.getConstant(Mask2, MVT::i32)); 8879 // Do not add new nodes to DAG combiner worklist. 8880 DCI.CombineTo(N, Res, false); 8881 return SDValue(); 8882 } 8883 } 8884 8885 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 8886 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 8887 ARM::isBitFieldInvertedMask(~Mask)) { 8888 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 8889 // where lsb(mask) == #shamt and masked bits of B are known zero. 8890 SDValue ShAmt = N00.getOperand(1); 8891 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 8892 unsigned LSB = countTrailingZeros(Mask); 8893 if (ShAmtC != LSB) 8894 return SDValue(); 8895 8896 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 8897 DAG.getConstant(~Mask, MVT::i32)); 8898 8899 // Do not add new nodes to DAG combiner worklist. 8900 DCI.CombineTo(N, Res, false); 8901 } 8902 8903 return SDValue(); 8904 } 8905 8906 static SDValue PerformXORCombine(SDNode *N, 8907 TargetLowering::DAGCombinerInfo &DCI, 8908 const ARMSubtarget *Subtarget) { 8909 EVT VT = N->getValueType(0); 8910 SelectionDAG &DAG = DCI.DAG; 8911 8912 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8913 return SDValue(); 8914 8915 if (!Subtarget->isThumb1Only()) { 8916 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 8917 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8918 if (Result.getNode()) 8919 return Result; 8920 } 8921 8922 return SDValue(); 8923 } 8924 8925 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 8926 /// the bits being cleared by the AND are not demanded by the BFI. 8927 static SDValue PerformBFICombine(SDNode *N, 8928 TargetLowering::DAGCombinerInfo &DCI) { 8929 SDValue N1 = N->getOperand(1); 8930 if (N1.getOpcode() == ISD::AND) { 8931 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8932 if (!N11C) 8933 return SDValue(); 8934 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 8935 unsigned LSB = countTrailingZeros(~InvMask); 8936 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; 8937 unsigned Mask = (1 << Width)-1; 8938 unsigned Mask2 = N11C->getZExtValue(); 8939 if ((Mask & (~Mask2)) == 0) 8940 return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), 8941 N->getOperand(0), N1.getOperand(0), 8942 N->getOperand(2)); 8943 } 8944 return SDValue(); 8945 } 8946 8947 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for 8948 /// ARMISD::VMOVRRD. 8949 static SDValue PerformVMOVRRDCombine(SDNode *N, 8950 TargetLowering::DAGCombinerInfo &DCI) { 8951 // vmovrrd(vmovdrr x, y) -> x,y 8952 SDValue InDouble = N->getOperand(0); 8953 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 8954 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 8955 8956 // vmovrrd(load f64) -> (load i32), (load i32) 8957 SDNode *InNode = InDouble.getNode(); 8958 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 8959 InNode->getValueType(0) == MVT::f64 && 8960 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 8961 !cast<LoadSDNode>(InNode)->isVolatile()) { 8962 // TODO: Should this be done for non-FrameIndex operands? 8963 LoadSDNode *LD = cast<LoadSDNode>(InNode); 8964 8965 SelectionDAG &DAG = DCI.DAG; 8966 SDLoc DL(LD); 8967 SDValue BasePtr = LD->getBasePtr(); 8968 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 8969 LD->getPointerInfo(), LD->isVolatile(), 8970 LD->isNonTemporal(), LD->isInvariant(), 8971 LD->getAlignment()); 8972 8973 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 8974 DAG.getConstant(4, MVT::i32)); 8975 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 8976 LD->getPointerInfo(), LD->isVolatile(), 8977 LD->isNonTemporal(), LD->isInvariant(), 8978 std::min(4U, LD->getAlignment() / 2)); 8979 8980 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 8981 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 8982 DCI.RemoveFromWorklist(LD); 8983 DAG.DeleteNode(LD); 8984 return Result; 8985 } 8986 8987 return SDValue(); 8988 } 8989 8990 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for 8991 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 8992 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 8993 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 8994 SDValue Op0 = N->getOperand(0); 8995 SDValue Op1 = N->getOperand(1); 8996 if (Op0.getOpcode() == ISD::BITCAST) 8997 Op0 = Op0.getOperand(0); 8998 if (Op1.getOpcode() == ISD::BITCAST) 8999 Op1 = Op1.getOperand(0); 9000 if (Op0.getOpcode() == ARMISD::VMOVRRD && 9001 Op0.getNode() == Op1.getNode() && 9002 Op0.getResNo() == 0 && Op1.getResNo() == 1) 9003 return DAG.getNode(ISD::BITCAST, SDLoc(N), 9004 N->getValueType(0), Op0.getOperand(0)); 9005 return SDValue(); 9006 } 9007 9008 /// PerformSTORECombine - Target-specific dag combine xforms for 9009 /// ISD::STORE. 9010 static SDValue PerformSTORECombine(SDNode *N, 9011 TargetLowering::DAGCombinerInfo &DCI) { 9012 StoreSDNode *St = cast<StoreSDNode>(N); 9013 if (St->isVolatile()) 9014 return SDValue(); 9015 9016 // Optimize trunc store (of multiple scalars) to shuffle and store. First, 9017 // pack all of the elements in one place. Next, store to memory in fewer 9018 // chunks. 9019 SDValue StVal = St->getValue(); 9020 EVT VT = StVal.getValueType(); 9021 if (St->isTruncatingStore() && VT.isVector()) { 9022 SelectionDAG &DAG = DCI.DAG; 9023 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9024 EVT StVT = St->getMemoryVT(); 9025 unsigned NumElems = VT.getVectorNumElements(); 9026 assert(StVT != VT && "Cannot truncate to the same type"); 9027 unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); 9028 unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); 9029 9030 // From, To sizes and ElemCount must be pow of two 9031 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); 9032 9033 // We are going to use the original vector elt for storing. 9034 // Accumulated smaller vector elements must be a multiple of the store size. 9035 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); 9036 9037 unsigned SizeRatio = FromEltSz / ToEltSz; 9038 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); 9039 9040 // Create a type on which we perform the shuffle. 9041 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), 9042 NumElems*SizeRatio); 9043 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); 9044 9045 SDLoc DL(St); 9046 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); 9047 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); 9048 for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; 9049 9050 // Can't shuffle using an illegal type. 9051 if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); 9052 9053 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, 9054 DAG.getUNDEF(WideVec.getValueType()), 9055 ShuffleVec.data()); 9056 // At this point all of the data is stored at the bottom of the 9057 // register. We now need to save it to mem. 9058 9059 // Find the largest store unit 9060 MVT StoreType = MVT::i8; 9061 for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; 9062 tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { 9063 MVT Tp = (MVT::SimpleValueType)tp; 9064 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) 9065 StoreType = Tp; 9066 } 9067 // Didn't find a legal store type. 9068 if (!TLI.isTypeLegal(StoreType)) 9069 return SDValue(); 9070 9071 // Bitcast the original vector into a vector of store-size units 9072 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), 9073 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); 9074 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); 9075 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); 9076 SmallVector<SDValue, 8> Chains; 9077 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, 9078 TLI.getPointerTy()); 9079 SDValue BasePtr = St->getBasePtr(); 9080 9081 // Perform one or more big stores into memory. 9082 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); 9083 for (unsigned I = 0; I < E; I++) { 9084 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 9085 StoreType, ShuffWide, 9086 DAG.getIntPtrConstant(I)); 9087 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, 9088 St->getPointerInfo(), St->isVolatile(), 9089 St->isNonTemporal(), St->getAlignment()); 9090 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 9091 Increment); 9092 Chains.push_back(Ch); 9093 } 9094 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], 9095 Chains.size()); 9096 } 9097 9098 if (!ISD::isNormalStore(St)) 9099 return SDValue(); 9100 9101 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and 9102 // ARM stores of arguments in the same cache line. 9103 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 9104 StVal.getNode()->hasOneUse()) { 9105 SelectionDAG &DAG = DCI.DAG; 9106 SDLoc DL(St); 9107 SDValue BasePtr = St->getBasePtr(); 9108 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 9109 StVal.getNode()->getOperand(0), BasePtr, 9110 St->getPointerInfo(), St->isVolatile(), 9111 St->isNonTemporal(), St->getAlignment()); 9112 9113 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 9114 DAG.getConstant(4, MVT::i32)); 9115 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 9116 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 9117 St->isNonTemporal(), 9118 std::min(4U, St->getAlignment() / 2)); 9119 } 9120 9121 if (StVal.getValueType() != MVT::i64 || 9122 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 9123 return SDValue(); 9124 9125 // Bitcast an i64 store extracted from a vector to f64. 9126 // Otherwise, the i64 value will be legalized to a pair of i32 values. 9127 SelectionDAG &DAG = DCI.DAG; 9128 SDLoc dl(StVal); 9129 SDValue IntVec = StVal.getOperand(0); 9130 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 9131 IntVec.getValueType().getVectorNumElements()); 9132 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 9133 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 9134 Vec, StVal.getOperand(1)); 9135 dl = SDLoc(N); 9136 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 9137 // Make the DAGCombiner fold the bitcasts. 9138 DCI.AddToWorklist(Vec.getNode()); 9139 DCI.AddToWorklist(ExtElt.getNode()); 9140 DCI.AddToWorklist(V.getNode()); 9141 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 9142 St->getPointerInfo(), St->isVolatile(), 9143 St->isNonTemporal(), St->getAlignment(), 9144 St->getTBAAInfo()); 9145 } 9146 9147 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 9148 /// are normal, non-volatile loads. If so, it is profitable to bitcast an 9149 /// i64 vector to have f64 elements, since the value can then be loaded 9150 /// directly into a VFP register. 9151 static bool hasNormalLoadOperand(SDNode *N) { 9152 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 9153 for (unsigned i = 0; i < NumElts; ++i) { 9154 SDNode *Elt = N->getOperand(i).getNode(); 9155 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 9156 return true; 9157 } 9158 return false; 9159 } 9160 9161 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 9162 /// ISD::BUILD_VECTOR. 9163 static SDValue PerformBUILD_VECTORCombine(SDNode *N, 9164 TargetLowering::DAGCombinerInfo &DCI){ 9165 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 9166 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 9167 // into a pair of GPRs, which is fine when the value is used as a scalar, 9168 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 9169 SelectionDAG &DAG = DCI.DAG; 9170 if (N->getNumOperands() == 2) { 9171 SDValue RV = PerformVMOVDRRCombine(N, DAG); 9172 if (RV.getNode()) 9173 return RV; 9174 } 9175 9176 // Load i64 elements as f64 values so that type legalization does not split 9177 // them up into i32 values. 9178 EVT VT = N->getValueType(0); 9179 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 9180 return SDValue(); 9181 SDLoc dl(N); 9182 SmallVector<SDValue, 8> Ops; 9183 unsigned NumElts = VT.getVectorNumElements(); 9184 for (unsigned i = 0; i < NumElts; ++i) { 9185 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 9186 Ops.push_back(V); 9187 // Make the DAGCombiner fold the bitcast. 9188 DCI.AddToWorklist(V.getNode()); 9189 } 9190 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 9191 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 9192 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 9193 } 9194 9195 /// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. 9196 static SDValue 9197 PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 9198 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. 9199 // At that time, we may have inserted bitcasts from integer to float. 9200 // If these bitcasts have survived DAGCombine, change the lowering of this 9201 // BUILD_VECTOR in something more vector friendly, i.e., that does not 9202 // force to use floating point types. 9203 9204 // Make sure we can change the type of the vector. 9205 // This is possible iff: 9206 // 1. The vector is only used in a bitcast to a integer type. I.e., 9207 // 1.1. Vector is used only once. 9208 // 1.2. Use is a bit convert to an integer type. 9209 // 2. The size of its operands are 32-bits (64-bits are not legal). 9210 EVT VT = N->getValueType(0); 9211 EVT EltVT = VT.getVectorElementType(); 9212 9213 // Check 1.1. and 2. 9214 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) 9215 return SDValue(); 9216 9217 // By construction, the input type must be float. 9218 assert(EltVT == MVT::f32 && "Unexpected type!"); 9219 9220 // Check 1.2. 9221 SDNode *Use = *N->use_begin(); 9222 if (Use->getOpcode() != ISD::BITCAST || 9223 Use->getValueType(0).isFloatingPoint()) 9224 return SDValue(); 9225 9226 // Check profitability. 9227 // Model is, if more than half of the relevant operands are bitcast from 9228 // i32, turn the build_vector into a sequence of insert_vector_elt. 9229 // Relevant operands are everything that is not statically 9230 // (i.e., at compile time) bitcasted. 9231 unsigned NumOfBitCastedElts = 0; 9232 unsigned NumElts = VT.getVectorNumElements(); 9233 unsigned NumOfRelevantElts = NumElts; 9234 for (unsigned Idx = 0; Idx < NumElts; ++Idx) { 9235 SDValue Elt = N->getOperand(Idx); 9236 if (Elt->getOpcode() == ISD::BITCAST) { 9237 // Assume only bit cast to i32 will go away. 9238 if (Elt->getOperand(0).getValueType() == MVT::i32) 9239 ++NumOfBitCastedElts; 9240 } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) 9241 // Constants are statically casted, thus do not count them as 9242 // relevant operands. 9243 --NumOfRelevantElts; 9244 } 9245 9246 // Check if more than half of the elements require a non-free bitcast. 9247 if (NumOfBitCastedElts <= NumOfRelevantElts / 2) 9248 return SDValue(); 9249 9250 SelectionDAG &DAG = DCI.DAG; 9251 // Create the new vector type. 9252 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 9253 // Check if the type is legal. 9254 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9255 if (!TLI.isTypeLegal(VecVT)) 9256 return SDValue(); 9257 9258 // Combine: 9259 // ARMISD::BUILD_VECTOR E1, E2, ..., EN. 9260 // => BITCAST INSERT_VECTOR_ELT 9261 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), 9262 // (BITCAST EN), N. 9263 SDValue Vec = DAG.getUNDEF(VecVT); 9264 SDLoc dl(N); 9265 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { 9266 SDValue V = N->getOperand(Idx); 9267 if (V.getOpcode() == ISD::UNDEF) 9268 continue; 9269 if (V.getOpcode() == ISD::BITCAST && 9270 V->getOperand(0).getValueType() == MVT::i32) 9271 // Fold obvious case. 9272 V = V.getOperand(0); 9273 else { 9274 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); 9275 // Make the DAGCombiner fold the bitcasts. 9276 DCI.AddToWorklist(V.getNode()); 9277 } 9278 SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32); 9279 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); 9280 } 9281 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); 9282 // Make the DAGCombiner fold the bitcasts. 9283 DCI.AddToWorklist(Vec.getNode()); 9284 return Vec; 9285 } 9286 9287 /// PerformInsertEltCombine - Target-specific dag combine xforms for 9288 /// ISD::INSERT_VECTOR_ELT. 9289 static SDValue PerformInsertEltCombine(SDNode *N, 9290 TargetLowering::DAGCombinerInfo &DCI) { 9291 // Bitcast an i64 load inserted into a vector to f64. 9292 // Otherwise, the i64 value will be legalized to a pair of i32 values. 9293 EVT VT = N->getValueType(0); 9294 SDNode *Elt = N->getOperand(1).getNode(); 9295 if (VT.getVectorElementType() != MVT::i64 || 9296 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 9297 return SDValue(); 9298 9299 SelectionDAG &DAG = DCI.DAG; 9300 SDLoc dl(N); 9301 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 9302 VT.getVectorNumElements()); 9303 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 9304 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 9305 // Make the DAGCombiner fold the bitcasts. 9306 DCI.AddToWorklist(Vec.getNode()); 9307 DCI.AddToWorklist(V.getNode()); 9308 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 9309 Vec, V, N->getOperand(2)); 9310 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 9311 } 9312 9313 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 9314 /// ISD::VECTOR_SHUFFLE. 9315 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 9316 // The LLVM shufflevector instruction does not require the shuffle mask 9317 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 9318 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 9319 // operands do not match the mask length, they are extended by concatenating 9320 // them with undef vectors. That is probably the right thing for other 9321 // targets, but for NEON it is better to concatenate two double-register 9322 // size vector operands into a single quad-register size vector. Do that 9323 // transformation here: 9324 // shuffle(concat(v1, undef), concat(v2, undef)) -> 9325 // shuffle(concat(v1, v2), undef) 9326 SDValue Op0 = N->getOperand(0); 9327 SDValue Op1 = N->getOperand(1); 9328 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 9329 Op1.getOpcode() != ISD::CONCAT_VECTORS || 9330 Op0.getNumOperands() != 2 || 9331 Op1.getNumOperands() != 2) 9332 return SDValue(); 9333 SDValue Concat0Op1 = Op0.getOperand(1); 9334 SDValue Concat1Op1 = Op1.getOperand(1); 9335 if (Concat0Op1.getOpcode() != ISD::UNDEF || 9336 Concat1Op1.getOpcode() != ISD::UNDEF) 9337 return SDValue(); 9338 // Skip the transformation if any of the types are illegal. 9339 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9340 EVT VT = N->getValueType(0); 9341 if (!TLI.isTypeLegal(VT) || 9342 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 9343 !TLI.isTypeLegal(Concat1Op1.getValueType())) 9344 return SDValue(); 9345 9346 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 9347 Op0.getOperand(0), Op1.getOperand(0)); 9348 // Translate the shuffle mask. 9349 SmallVector<int, 16> NewMask; 9350 unsigned NumElts = VT.getVectorNumElements(); 9351 unsigned HalfElts = NumElts/2; 9352 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 9353 for (unsigned n = 0; n < NumElts; ++n) { 9354 int MaskElt = SVN->getMaskElt(n); 9355 int NewElt = -1; 9356 if (MaskElt < (int)HalfElts) 9357 NewElt = MaskElt; 9358 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 9359 NewElt = HalfElts + MaskElt - NumElts; 9360 NewMask.push_back(NewElt); 9361 } 9362 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, 9363 DAG.getUNDEF(VT), NewMask.data()); 9364 } 9365 9366 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 9367 /// NEON load/store intrinsics to merge base address updates. 9368 static SDValue CombineBaseUpdate(SDNode *N, 9369 TargetLowering::DAGCombinerInfo &DCI) { 9370 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 9371 return SDValue(); 9372 9373 SelectionDAG &DAG = DCI.DAG; 9374 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 9375 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 9376 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 9377 SDValue Addr = N->getOperand(AddrOpIdx); 9378 9379 // Search for a use of the address operand that is an increment. 9380 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 9381 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 9382 SDNode *User = *UI; 9383 if (User->getOpcode() != ISD::ADD || 9384 UI.getUse().getResNo() != Addr.getResNo()) 9385 continue; 9386 9387 // Check that the add is independent of the load/store. Otherwise, folding 9388 // it would create a cycle. 9389 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 9390 continue; 9391 9392 // Find the new opcode for the updating load/store. 9393 bool isLoad = true; 9394 bool isLaneOp = false; 9395 unsigned NewOpc = 0; 9396 unsigned NumVecs = 0; 9397 if (isIntrinsic) { 9398 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 9399 switch (IntNo) { 9400 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 9401 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 9402 NumVecs = 1; break; 9403 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 9404 NumVecs = 2; break; 9405 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 9406 NumVecs = 3; break; 9407 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 9408 NumVecs = 4; break; 9409 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 9410 NumVecs = 2; isLaneOp = true; break; 9411 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 9412 NumVecs = 3; isLaneOp = true; break; 9413 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 9414 NumVecs = 4; isLaneOp = true; break; 9415 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 9416 NumVecs = 1; isLoad = false; break; 9417 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 9418 NumVecs = 2; isLoad = false; break; 9419 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 9420 NumVecs = 3; isLoad = false; break; 9421 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 9422 NumVecs = 4; isLoad = false; break; 9423 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 9424 NumVecs = 2; isLoad = false; isLaneOp = true; break; 9425 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 9426 NumVecs = 3; isLoad = false; isLaneOp = true; break; 9427 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 9428 NumVecs = 4; isLoad = false; isLaneOp = true; break; 9429 } 9430 } else { 9431 isLaneOp = true; 9432 switch (N->getOpcode()) { 9433 default: llvm_unreachable("unexpected opcode for Neon base update"); 9434 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 9435 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 9436 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 9437 } 9438 } 9439 9440 // Find the size of memory referenced by the load/store. 9441 EVT VecTy; 9442 if (isLoad) 9443 VecTy = N->getValueType(0); 9444 else 9445 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 9446 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 9447 if (isLaneOp) 9448 NumBytes /= VecTy.getVectorNumElements(); 9449 9450 // If the increment is a constant, it must match the memory ref size. 9451 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 9452 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 9453 uint64_t IncVal = CInc->getZExtValue(); 9454 if (IncVal != NumBytes) 9455 continue; 9456 } else if (NumBytes >= 3 * 16) { 9457 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 9458 // separate instructions that make it harder to use a non-constant update. 9459 continue; 9460 } 9461 9462 // Create the new updating load/store node. 9463 EVT Tys[6]; 9464 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 9465 unsigned n; 9466 for (n = 0; n < NumResultVecs; ++n) 9467 Tys[n] = VecTy; 9468 Tys[n++] = MVT::i32; 9469 Tys[n] = MVT::Other; 9470 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 9471 SmallVector<SDValue, 8> Ops; 9472 Ops.push_back(N->getOperand(0)); // incoming chain 9473 Ops.push_back(N->getOperand(AddrOpIdx)); 9474 Ops.push_back(Inc); 9475 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 9476 Ops.push_back(N->getOperand(i)); 9477 } 9478 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 9479 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, 9480 Ops.data(), Ops.size(), 9481 MemInt->getMemoryVT(), 9482 MemInt->getMemOperand()); 9483 9484 // Update the uses. 9485 std::vector<SDValue> NewResults; 9486 for (unsigned i = 0; i < NumResultVecs; ++i) { 9487 NewResults.push_back(SDValue(UpdN.getNode(), i)); 9488 } 9489 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 9490 DCI.CombineTo(N, NewResults); 9491 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 9492 9493 break; 9494 } 9495 return SDValue(); 9496 } 9497 9498 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 9499 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 9500 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 9501 /// return true. 9502 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 9503 SelectionDAG &DAG = DCI.DAG; 9504 EVT VT = N->getValueType(0); 9505 // vldN-dup instructions only support 64-bit vectors for N > 1. 9506 if (!VT.is64BitVector()) 9507 return false; 9508 9509 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 9510 SDNode *VLD = N->getOperand(0).getNode(); 9511 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 9512 return false; 9513 unsigned NumVecs = 0; 9514 unsigned NewOpc = 0; 9515 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 9516 if (IntNo == Intrinsic::arm_neon_vld2lane) { 9517 NumVecs = 2; 9518 NewOpc = ARMISD::VLD2DUP; 9519 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 9520 NumVecs = 3; 9521 NewOpc = ARMISD::VLD3DUP; 9522 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 9523 NumVecs = 4; 9524 NewOpc = ARMISD::VLD4DUP; 9525 } else { 9526 return false; 9527 } 9528 9529 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 9530 // numbers match the load. 9531 unsigned VLDLaneNo = 9532 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 9533 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9534 UI != UE; ++UI) { 9535 // Ignore uses of the chain result. 9536 if (UI.getUse().getResNo() == NumVecs) 9537 continue; 9538 SDNode *User = *UI; 9539 if (User->getOpcode() != ARMISD::VDUPLANE || 9540 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 9541 return false; 9542 } 9543 9544 // Create the vldN-dup node. 9545 EVT Tys[5]; 9546 unsigned n; 9547 for (n = 0; n < NumVecs; ++n) 9548 Tys[n] = VT; 9549 Tys[n] = MVT::Other; 9550 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 9551 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 9552 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 9553 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, 9554 Ops, 2, VLDMemInt->getMemoryVT(), 9555 VLDMemInt->getMemOperand()); 9556 9557 // Update the uses. 9558 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9559 UI != UE; ++UI) { 9560 unsigned ResNo = UI.getUse().getResNo(); 9561 // Ignore uses of the chain result. 9562 if (ResNo == NumVecs) 9563 continue; 9564 SDNode *User = *UI; 9565 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 9566 } 9567 9568 // Now the vldN-lane intrinsic is dead except for its chain result. 9569 // Update uses of the chain. 9570 std::vector<SDValue> VLDDupResults; 9571 for (unsigned n = 0; n < NumVecs; ++n) 9572 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 9573 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 9574 DCI.CombineTo(VLD, VLDDupResults); 9575 9576 return true; 9577 } 9578 9579 /// PerformVDUPLANECombine - Target-specific dag combine xforms for 9580 /// ARMISD::VDUPLANE. 9581 static SDValue PerformVDUPLANECombine(SDNode *N, 9582 TargetLowering::DAGCombinerInfo &DCI) { 9583 SDValue Op = N->getOperand(0); 9584 9585 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 9586 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 9587 if (CombineVLDDUP(N, DCI)) 9588 return SDValue(N, 0); 9589 9590 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 9591 // redundant. Ignore bit_converts for now; element sizes are checked below. 9592 while (Op.getOpcode() == ISD::BITCAST) 9593 Op = Op.getOperand(0); 9594 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 9595 return SDValue(); 9596 9597 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 9598 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 9599 // The canonical VMOV for a zero vector uses a 32-bit element size. 9600 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9601 unsigned EltBits; 9602 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 9603 EltSize = 8; 9604 EVT VT = N->getValueType(0); 9605 if (EltSize > VT.getVectorElementType().getSizeInBits()) 9606 return SDValue(); 9607 9608 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 9609 } 9610 9611 // isConstVecPow2 - Return true if each vector element is a power of 2, all 9612 // elements are the same constant, C, and Log2(C) ranges from 1 to 32. 9613 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 9614 { 9615 integerPart cN; 9616 integerPart c0 = 0; 9617 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 9618 I != E; I++) { 9619 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 9620 if (!C) 9621 return false; 9622 9623 bool isExact; 9624 APFloat APF = C->getValueAPF(); 9625 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 9626 != APFloat::opOK || !isExact) 9627 return false; 9628 9629 c0 = (I == 0) ? cN : c0; 9630 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 9631 return false; 9632 } 9633 C = c0; 9634 return true; 9635 } 9636 9637 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 9638 /// can replace combinations of VMUL and VCVT (floating-point to integer) 9639 /// when the VMUL has a constant operand that is a power of 2. 9640 /// 9641 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9642 /// vmul.f32 d16, d17, d16 9643 /// vcvt.s32.f32 d16, d16 9644 /// becomes: 9645 /// vcvt.s32.f32 d16, d16, #3 9646 static SDValue PerformVCVTCombine(SDNode *N, 9647 TargetLowering::DAGCombinerInfo &DCI, 9648 const ARMSubtarget *Subtarget) { 9649 SelectionDAG &DAG = DCI.DAG; 9650 SDValue Op = N->getOperand(0); 9651 9652 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 9653 Op.getOpcode() != ISD::FMUL) 9654 return SDValue(); 9655 9656 uint64_t C; 9657 SDValue N0 = Op->getOperand(0); 9658 SDValue ConstVec = Op->getOperand(1); 9659 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 9660 9661 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9662 !isConstVecPow2(ConstVec, isSigned, C)) 9663 return SDValue(); 9664 9665 MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); 9666 MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); 9667 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { 9668 // These instructions only exist converting from f32 to i32. We can handle 9669 // smaller integers by generating an extra truncate, but larger ones would 9670 // be lossy. 9671 return SDValue(); 9672 } 9673 9674 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 9675 Intrinsic::arm_neon_vcvtfp2fxu; 9676 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 9677 SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 9678 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 9679 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 9680 DAG.getConstant(Log2_64(C), MVT::i32)); 9681 9682 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 9683 FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); 9684 9685 return FixConv; 9686 } 9687 9688 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 9689 /// can replace combinations of VCVT (integer to floating-point) and VDIV 9690 /// when the VDIV has a constant operand that is a power of 2. 9691 /// 9692 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9693 /// vcvt.f32.s32 d16, d16 9694 /// vdiv.f32 d16, d17, d16 9695 /// becomes: 9696 /// vcvt.f32.s32 d16, d16, #3 9697 static SDValue PerformVDIVCombine(SDNode *N, 9698 TargetLowering::DAGCombinerInfo &DCI, 9699 const ARMSubtarget *Subtarget) { 9700 SelectionDAG &DAG = DCI.DAG; 9701 SDValue Op = N->getOperand(0); 9702 unsigned OpOpcode = Op.getNode()->getOpcode(); 9703 9704 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 9705 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 9706 return SDValue(); 9707 9708 uint64_t C; 9709 SDValue ConstVec = N->getOperand(1); 9710 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 9711 9712 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9713 !isConstVecPow2(ConstVec, isSigned, C)) 9714 return SDValue(); 9715 9716 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); 9717 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); 9718 if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { 9719 // These instructions only exist converting from i32 to f32. We can handle 9720 // smaller integers by generating an extra extend, but larger ones would 9721 // be lossy. 9722 return SDValue(); 9723 } 9724 9725 SDValue ConvInput = Op.getOperand(0); 9726 unsigned NumLanes = Op.getValueType().getVectorNumElements(); 9727 if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) 9728 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 9729 SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, 9730 ConvInput); 9731 9732 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 9733 Intrinsic::arm_neon_vcvtfxu2fp; 9734 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 9735 Op.getValueType(), 9736 DAG.getConstant(IntrinsicOpcode, MVT::i32), 9737 ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); 9738 } 9739 9740 /// Getvshiftimm - Check if this is a valid build_vector for the immediate 9741 /// operand of a vector shift operation, where all the elements of the 9742 /// build_vector must have the same constant integer value. 9743 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 9744 // Ignore bit_converts. 9745 while (Op.getOpcode() == ISD::BITCAST) 9746 Op = Op.getOperand(0); 9747 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 9748 APInt SplatBits, SplatUndef; 9749 unsigned SplatBitSize; 9750 bool HasAnyUndefs; 9751 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 9752 HasAnyUndefs, ElementBits) || 9753 SplatBitSize > ElementBits) 9754 return false; 9755 Cnt = SplatBits.getSExtValue(); 9756 return true; 9757 } 9758 9759 /// isVShiftLImm - Check if this is a valid build_vector for the immediate 9760 /// operand of a vector shift left operation. That value must be in the range: 9761 /// 0 <= Value < ElementBits for a left shift; or 9762 /// 0 <= Value <= ElementBits for a long left shift. 9763 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 9764 assert(VT.isVector() && "vector shift count is not a vector type"); 9765 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9766 if (! getVShiftImm(Op, ElementBits, Cnt)) 9767 return false; 9768 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 9769 } 9770 9771 /// isVShiftRImm - Check if this is a valid build_vector for the immediate 9772 /// operand of a vector shift right operation. For a shift opcode, the value 9773 /// is positive, but for an intrinsic the value count must be negative. The 9774 /// absolute value must be in the range: 9775 /// 1 <= |Value| <= ElementBits for a right shift; or 9776 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 9777 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 9778 int64_t &Cnt) { 9779 assert(VT.isVector() && "vector shift count is not a vector type"); 9780 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9781 if (! getVShiftImm(Op, ElementBits, Cnt)) 9782 return false; 9783 if (isIntrinsic) 9784 Cnt = -Cnt; 9785 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 9786 } 9787 9788 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 9789 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 9790 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 9791 switch (IntNo) { 9792 default: 9793 // Don't do anything for most intrinsics. 9794 break; 9795 9796 // Vector shifts: check for immediate versions and lower them. 9797 // Note: This is done during DAG combining instead of DAG legalizing because 9798 // the build_vectors for 64-bit vector element shift counts are generally 9799 // not legal, and it is hard to see their values after they get legalized to 9800 // loads from a constant pool. 9801 case Intrinsic::arm_neon_vshifts: 9802 case Intrinsic::arm_neon_vshiftu: 9803 case Intrinsic::arm_neon_vshiftls: 9804 case Intrinsic::arm_neon_vshiftlu: 9805 case Intrinsic::arm_neon_vshiftn: 9806 case Intrinsic::arm_neon_vrshifts: 9807 case Intrinsic::arm_neon_vrshiftu: 9808 case Intrinsic::arm_neon_vrshiftn: 9809 case Intrinsic::arm_neon_vqshifts: 9810 case Intrinsic::arm_neon_vqshiftu: 9811 case Intrinsic::arm_neon_vqshiftsu: 9812 case Intrinsic::arm_neon_vqshiftns: 9813 case Intrinsic::arm_neon_vqshiftnu: 9814 case Intrinsic::arm_neon_vqshiftnsu: 9815 case Intrinsic::arm_neon_vqrshiftns: 9816 case Intrinsic::arm_neon_vqrshiftnu: 9817 case Intrinsic::arm_neon_vqrshiftnsu: { 9818 EVT VT = N->getOperand(1).getValueType(); 9819 int64_t Cnt; 9820 unsigned VShiftOpc = 0; 9821 9822 switch (IntNo) { 9823 case Intrinsic::arm_neon_vshifts: 9824 case Intrinsic::arm_neon_vshiftu: 9825 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 9826 VShiftOpc = ARMISD::VSHL; 9827 break; 9828 } 9829 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 9830 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 9831 ARMISD::VSHRs : ARMISD::VSHRu); 9832 break; 9833 } 9834 return SDValue(); 9835 9836 case Intrinsic::arm_neon_vshiftls: 9837 case Intrinsic::arm_neon_vshiftlu: 9838 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 9839 break; 9840 llvm_unreachable("invalid shift count for vshll intrinsic"); 9841 9842 case Intrinsic::arm_neon_vrshifts: 9843 case Intrinsic::arm_neon_vrshiftu: 9844 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 9845 break; 9846 return SDValue(); 9847 9848 case Intrinsic::arm_neon_vqshifts: 9849 case Intrinsic::arm_neon_vqshiftu: 9850 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9851 break; 9852 return SDValue(); 9853 9854 case Intrinsic::arm_neon_vqshiftsu: 9855 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9856 break; 9857 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 9858 9859 case Intrinsic::arm_neon_vshiftn: 9860 case Intrinsic::arm_neon_vrshiftn: 9861 case Intrinsic::arm_neon_vqshiftns: 9862 case Intrinsic::arm_neon_vqshiftnu: 9863 case Intrinsic::arm_neon_vqshiftnsu: 9864 case Intrinsic::arm_neon_vqrshiftns: 9865 case Intrinsic::arm_neon_vqrshiftnu: 9866 case Intrinsic::arm_neon_vqrshiftnsu: 9867 // Narrowing shifts require an immediate right shift. 9868 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 9869 break; 9870 llvm_unreachable("invalid shift count for narrowing vector shift " 9871 "intrinsic"); 9872 9873 default: 9874 llvm_unreachable("unhandled vector shift"); 9875 } 9876 9877 switch (IntNo) { 9878 case Intrinsic::arm_neon_vshifts: 9879 case Intrinsic::arm_neon_vshiftu: 9880 // Opcode already set above. 9881 break; 9882 case Intrinsic::arm_neon_vshiftls: 9883 case Intrinsic::arm_neon_vshiftlu: 9884 if (Cnt == VT.getVectorElementType().getSizeInBits()) 9885 VShiftOpc = ARMISD::VSHLLi; 9886 else 9887 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 9888 ARMISD::VSHLLs : ARMISD::VSHLLu); 9889 break; 9890 case Intrinsic::arm_neon_vshiftn: 9891 VShiftOpc = ARMISD::VSHRN; break; 9892 case Intrinsic::arm_neon_vrshifts: 9893 VShiftOpc = ARMISD::VRSHRs; break; 9894 case Intrinsic::arm_neon_vrshiftu: 9895 VShiftOpc = ARMISD::VRSHRu; break; 9896 case Intrinsic::arm_neon_vrshiftn: 9897 VShiftOpc = ARMISD::VRSHRN; break; 9898 case Intrinsic::arm_neon_vqshifts: 9899 VShiftOpc = ARMISD::VQSHLs; break; 9900 case Intrinsic::arm_neon_vqshiftu: 9901 VShiftOpc = ARMISD::VQSHLu; break; 9902 case Intrinsic::arm_neon_vqshiftsu: 9903 VShiftOpc = ARMISD::VQSHLsu; break; 9904 case Intrinsic::arm_neon_vqshiftns: 9905 VShiftOpc = ARMISD::VQSHRNs; break; 9906 case Intrinsic::arm_neon_vqshiftnu: 9907 VShiftOpc = ARMISD::VQSHRNu; break; 9908 case Intrinsic::arm_neon_vqshiftnsu: 9909 VShiftOpc = ARMISD::VQSHRNsu; break; 9910 case Intrinsic::arm_neon_vqrshiftns: 9911 VShiftOpc = ARMISD::VQRSHRNs; break; 9912 case Intrinsic::arm_neon_vqrshiftnu: 9913 VShiftOpc = ARMISD::VQRSHRNu; break; 9914 case Intrinsic::arm_neon_vqrshiftnsu: 9915 VShiftOpc = ARMISD::VQRSHRNsu; break; 9916 } 9917 9918 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 9919 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 9920 } 9921 9922 case Intrinsic::arm_neon_vshiftins: { 9923 EVT VT = N->getOperand(1).getValueType(); 9924 int64_t Cnt; 9925 unsigned VShiftOpc = 0; 9926 9927 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 9928 VShiftOpc = ARMISD::VSLI; 9929 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 9930 VShiftOpc = ARMISD::VSRI; 9931 else { 9932 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 9933 } 9934 9935 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 9936 N->getOperand(1), N->getOperand(2), 9937 DAG.getConstant(Cnt, MVT::i32)); 9938 } 9939 9940 case Intrinsic::arm_neon_vqrshifts: 9941 case Intrinsic::arm_neon_vqrshiftu: 9942 // No immediate versions of these to check for. 9943 break; 9944 } 9945 9946 return SDValue(); 9947 } 9948 9949 /// PerformShiftCombine - Checks for immediate versions of vector shifts and 9950 /// lowers them. As with the vector shift intrinsics, this is done during DAG 9951 /// combining instead of DAG legalizing because the build_vectors for 64-bit 9952 /// vector element shift counts are generally not legal, and it is hard to see 9953 /// their values after they get legalized to loads from a constant pool. 9954 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 9955 const ARMSubtarget *ST) { 9956 EVT VT = N->getValueType(0); 9957 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 9958 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 9959 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 9960 SDValue N1 = N->getOperand(1); 9961 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 9962 SDValue N0 = N->getOperand(0); 9963 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 9964 DAG.MaskedValueIsZero(N0.getOperand(0), 9965 APInt::getHighBitsSet(32, 16))) 9966 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); 9967 } 9968 } 9969 9970 // Nothing to be done for scalar shifts. 9971 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9972 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 9973 return SDValue(); 9974 9975 assert(ST->hasNEON() && "unexpected vector shift"); 9976 int64_t Cnt; 9977 9978 switch (N->getOpcode()) { 9979 default: llvm_unreachable("unexpected shift opcode"); 9980 9981 case ISD::SHL: 9982 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 9983 return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0), 9984 DAG.getConstant(Cnt, MVT::i32)); 9985 break; 9986 9987 case ISD::SRA: 9988 case ISD::SRL: 9989 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 9990 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 9991 ARMISD::VSHRs : ARMISD::VSHRu); 9992 return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0), 9993 DAG.getConstant(Cnt, MVT::i32)); 9994 } 9995 } 9996 return SDValue(); 9997 } 9998 9999 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 10000 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 10001 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 10002 const ARMSubtarget *ST) { 10003 SDValue N0 = N->getOperand(0); 10004 10005 // Check for sign- and zero-extensions of vector extract operations of 8- 10006 // and 16-bit vector elements. NEON supports these directly. They are 10007 // handled during DAG combining because type legalization will promote them 10008 // to 32-bit types and it is messy to recognize the operations after that. 10009 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 10010 SDValue Vec = N0.getOperand(0); 10011 SDValue Lane = N0.getOperand(1); 10012 EVT VT = N->getValueType(0); 10013 EVT EltVT = N0.getValueType(); 10014 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 10015 10016 if (VT == MVT::i32 && 10017 (EltVT == MVT::i8 || EltVT == MVT::i16) && 10018 TLI.isTypeLegal(Vec.getValueType()) && 10019 isa<ConstantSDNode>(Lane)) { 10020 10021 unsigned Opc = 0; 10022 switch (N->getOpcode()) { 10023 default: llvm_unreachable("unexpected opcode"); 10024 case ISD::SIGN_EXTEND: 10025 Opc = ARMISD::VGETLANEs; 10026 break; 10027 case ISD::ZERO_EXTEND: 10028 case ISD::ANY_EXTEND: 10029 Opc = ARMISD::VGETLANEu; 10030 break; 10031 } 10032 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); 10033 } 10034 } 10035 10036 return SDValue(); 10037 } 10038 10039 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 10040 /// to match f32 max/min patterns to use NEON vmax/vmin instructions. 10041 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 10042 const ARMSubtarget *ST) { 10043 // If the target supports NEON, try to use vmax/vmin instructions for f32 10044 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 10045 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 10046 // a NaN; only do the transformation when it matches that behavior. 10047 10048 // For now only do this when using NEON for FP operations; if using VFP, it 10049 // is not obvious that the benefit outweighs the cost of switching to the 10050 // NEON pipeline. 10051 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 10052 N->getValueType(0) != MVT::f32) 10053 return SDValue(); 10054 10055 SDValue CondLHS = N->getOperand(0); 10056 SDValue CondRHS = N->getOperand(1); 10057 SDValue LHS = N->getOperand(2); 10058 SDValue RHS = N->getOperand(3); 10059 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 10060 10061 unsigned Opcode = 0; 10062 bool IsReversed; 10063 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 10064 IsReversed = false; // x CC y ? x : y 10065 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 10066 IsReversed = true ; // x CC y ? y : x 10067 } else { 10068 return SDValue(); 10069 } 10070 10071 bool IsUnordered; 10072 switch (CC) { 10073 default: break; 10074 case ISD::SETOLT: 10075 case ISD::SETOLE: 10076 case ISD::SETLT: 10077 case ISD::SETLE: 10078 case ISD::SETULT: 10079 case ISD::SETULE: 10080 // If LHS is NaN, an ordered comparison will be false and the result will 10081 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 10082 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 10083 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 10084 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 10085 break; 10086 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 10087 // will return -0, so vmin can only be used for unsafe math or if one of 10088 // the operands is known to be nonzero. 10089 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 10090 !DAG.getTarget().Options.UnsafeFPMath && 10091 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 10092 break; 10093 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 10094 break; 10095 10096 case ISD::SETOGT: 10097 case ISD::SETOGE: 10098 case ISD::SETGT: 10099 case ISD::SETGE: 10100 case ISD::SETUGT: 10101 case ISD::SETUGE: 10102 // If LHS is NaN, an ordered comparison will be false and the result will 10103 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 10104 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 10105 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 10106 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 10107 break; 10108 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 10109 // will return +0, so vmax can only be used for unsafe math or if one of 10110 // the operands is known to be nonzero. 10111 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 10112 !DAG.getTarget().Options.UnsafeFPMath && 10113 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 10114 break; 10115 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 10116 break; 10117 } 10118 10119 if (!Opcode) 10120 return SDValue(); 10121 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); 10122 } 10123 10124 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 10125 SDValue 10126 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 10127 SDValue Cmp = N->getOperand(4); 10128 if (Cmp.getOpcode() != ARMISD::CMPZ) 10129 // Only looking at EQ and NE cases. 10130 return SDValue(); 10131 10132 EVT VT = N->getValueType(0); 10133 SDLoc dl(N); 10134 SDValue LHS = Cmp.getOperand(0); 10135 SDValue RHS = Cmp.getOperand(1); 10136 SDValue FalseVal = N->getOperand(0); 10137 SDValue TrueVal = N->getOperand(1); 10138 SDValue ARMcc = N->getOperand(2); 10139 ARMCC::CondCodes CC = 10140 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 10141 10142 // Simplify 10143 // mov r1, r0 10144 // cmp r1, x 10145 // mov r0, y 10146 // moveq r0, x 10147 // to 10148 // cmp r0, x 10149 // movne r0, y 10150 // 10151 // mov r1, r0 10152 // cmp r1, x 10153 // mov r0, x 10154 // movne r0, y 10155 // to 10156 // cmp r0, x 10157 // movne r0, y 10158 /// FIXME: Turn this into a target neutral optimization? 10159 SDValue Res; 10160 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 10161 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 10162 N->getOperand(3), Cmp); 10163 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 10164 SDValue ARMcc; 10165 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 10166 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 10167 N->getOperand(3), NewCmp); 10168 } 10169 10170 if (Res.getNode()) { 10171 APInt KnownZero, KnownOne; 10172 DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); 10173 // Capture demanded bits information that would be otherwise lost. 10174 if (KnownZero == 0xfffffffe) 10175 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10176 DAG.getValueType(MVT::i1)); 10177 else if (KnownZero == 0xffffff00) 10178 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10179 DAG.getValueType(MVT::i8)); 10180 else if (KnownZero == 0xffff0000) 10181 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 10182 DAG.getValueType(MVT::i16)); 10183 } 10184 10185 return Res; 10186 } 10187 10188 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 10189 DAGCombinerInfo &DCI) const { 10190 switch (N->getOpcode()) { 10191 default: break; 10192 case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); 10193 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 10194 case ISD::SUB: return PerformSUBCombine(N, DCI); 10195 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 10196 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 10197 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 10198 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 10199 case ARMISD::BFI: return PerformBFICombine(N, DCI); 10200 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 10201 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 10202 case ISD::STORE: return PerformSTORECombine(N, DCI); 10203 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 10204 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 10205 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 10206 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 10207 case ISD::FP_TO_SINT: 10208 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 10209 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 10210 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 10211 case ISD::SHL: 10212 case ISD::SRA: 10213 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 10214 case ISD::SIGN_EXTEND: 10215 case ISD::ZERO_EXTEND: 10216 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 10217 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 10218 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 10219 case ARMISD::VLD2DUP: 10220 case ARMISD::VLD3DUP: 10221 case ARMISD::VLD4DUP: 10222 return CombineBaseUpdate(N, DCI); 10223 case ARMISD::BUILD_VECTOR: 10224 return PerformARMBUILD_VECTORCombine(N, DCI); 10225 case ISD::INTRINSIC_VOID: 10226 case ISD::INTRINSIC_W_CHAIN: 10227 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 10228 case Intrinsic::arm_neon_vld1: 10229 case Intrinsic::arm_neon_vld2: 10230 case Intrinsic::arm_neon_vld3: 10231 case Intrinsic::arm_neon_vld4: 10232 case Intrinsic::arm_neon_vld2lane: 10233 case Intrinsic::arm_neon_vld3lane: 10234 case Intrinsic::arm_neon_vld4lane: 10235 case Intrinsic::arm_neon_vst1: 10236 case Intrinsic::arm_neon_vst2: 10237 case Intrinsic::arm_neon_vst3: 10238 case Intrinsic::arm_neon_vst4: 10239 case Intrinsic::arm_neon_vst2lane: 10240 case Intrinsic::arm_neon_vst3lane: 10241 case Intrinsic::arm_neon_vst4lane: 10242 return CombineBaseUpdate(N, DCI); 10243 default: break; 10244 } 10245 break; 10246 } 10247 return SDValue(); 10248 } 10249 10250 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 10251 EVT VT) const { 10252 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 10253 } 10254 10255 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { 10256 // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus 10257 bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); 10258 10259 switch (VT.getSimpleVT().SimpleTy) { 10260 default: 10261 return false; 10262 case MVT::i8: 10263 case MVT::i16: 10264 case MVT::i32: { 10265 // Unaligned access can use (for example) LRDB, LRDH, LDR 10266 if (AllowsUnaligned) { 10267 if (Fast) 10268 *Fast = Subtarget->hasV7Ops(); 10269 return true; 10270 } 10271 return false; 10272 } 10273 case MVT::f64: 10274 case MVT::v2f64: { 10275 // For any little-endian targets with neon, we can support unaligned ld/st 10276 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. 10277 // A big-endian target may also explictly support unaligned accesses 10278 if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { 10279 if (Fast) 10280 *Fast = true; 10281 return true; 10282 } 10283 return false; 10284 } 10285 } 10286 } 10287 10288 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 10289 unsigned AlignCheck) { 10290 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 10291 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 10292 } 10293 10294 EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 10295 unsigned DstAlign, unsigned SrcAlign, 10296 bool IsMemset, bool ZeroMemset, 10297 bool MemcpyStrSrc, 10298 MachineFunction &MF) const { 10299 const Function *F = MF.getFunction(); 10300 10301 // See if we can use NEON instructions for this... 10302 if ((!IsMemset || ZeroMemset) && 10303 Subtarget->hasNEON() && 10304 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 10305 Attribute::NoImplicitFloat)) { 10306 bool Fast; 10307 if (Size >= 16 && 10308 (memOpAlign(SrcAlign, DstAlign, 16) || 10309 (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { 10310 return MVT::v2f64; 10311 } else if (Size >= 8 && 10312 (memOpAlign(SrcAlign, DstAlign, 8) || 10313 (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { 10314 return MVT::f64; 10315 } 10316 } 10317 10318 // Lowering to i32/i16 if the size permits. 10319 if (Size >= 4) 10320 return MVT::i32; 10321 else if (Size >= 2) 10322 return MVT::i16; 10323 10324 // Let the target-independent logic figure it out. 10325 return MVT::Other; 10326 } 10327 10328 bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 10329 if (Val.getOpcode() != ISD::LOAD) 10330 return false; 10331 10332 EVT VT1 = Val.getValueType(); 10333 if (!VT1.isSimple() || !VT1.isInteger() || 10334 !VT2.isSimple() || !VT2.isInteger()) 10335 return false; 10336 10337 switch (VT1.getSimpleVT().SimpleTy) { 10338 default: break; 10339 case MVT::i1: 10340 case MVT::i8: 10341 case MVT::i16: 10342 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. 10343 return true; 10344 } 10345 10346 return false; 10347 } 10348 10349 bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { 10350 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 10351 return false; 10352 10353 if (!isTypeLegal(EVT::getEVT(Ty1))) 10354 return false; 10355 10356 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); 10357 10358 // Assuming the caller doesn't have a zeroext or signext return parameter, 10359 // truncation all the way down to i1 is valid. 10360 return true; 10361 } 10362 10363 10364 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 10365 if (V < 0) 10366 return false; 10367 10368 unsigned Scale = 1; 10369 switch (VT.getSimpleVT().SimpleTy) { 10370 default: return false; 10371 case MVT::i1: 10372 case MVT::i8: 10373 // Scale == 1; 10374 break; 10375 case MVT::i16: 10376 // Scale == 2; 10377 Scale = 2; 10378 break; 10379 case MVT::i32: 10380 // Scale == 4; 10381 Scale = 4; 10382 break; 10383 } 10384 10385 if ((V & (Scale - 1)) != 0) 10386 return false; 10387 V /= Scale; 10388 return V == (V & ((1LL << 5) - 1)); 10389 } 10390 10391 static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 10392 const ARMSubtarget *Subtarget) { 10393 bool isNeg = false; 10394 if (V < 0) { 10395 isNeg = true; 10396 V = - V; 10397 } 10398 10399 switch (VT.getSimpleVT().SimpleTy) { 10400 default: return false; 10401 case MVT::i1: 10402 case MVT::i8: 10403 case MVT::i16: 10404 case MVT::i32: 10405 // + imm12 or - imm8 10406 if (isNeg) 10407 return V == (V & ((1LL << 8) - 1)); 10408 return V == (V & ((1LL << 12) - 1)); 10409 case MVT::f32: 10410 case MVT::f64: 10411 // Same as ARM mode. FIXME: NEON? 10412 if (!Subtarget->hasVFP2()) 10413 return false; 10414 if ((V & 3) != 0) 10415 return false; 10416 V >>= 2; 10417 return V == (V & ((1LL << 8) - 1)); 10418 } 10419 } 10420 10421 /// isLegalAddressImmediate - Return true if the integer value can be used 10422 /// as the offset of the target addressing mode for load / store of the 10423 /// given type. 10424 static bool isLegalAddressImmediate(int64_t V, EVT VT, 10425 const ARMSubtarget *Subtarget) { 10426 if (V == 0) 10427 return true; 10428 10429 if (!VT.isSimple()) 10430 return false; 10431 10432 if (Subtarget->isThumb1Only()) 10433 return isLegalT1AddressImmediate(V, VT); 10434 else if (Subtarget->isThumb2()) 10435 return isLegalT2AddressImmediate(V, VT, Subtarget); 10436 10437 // ARM mode. 10438 if (V < 0) 10439 V = - V; 10440 switch (VT.getSimpleVT().SimpleTy) { 10441 default: return false; 10442 case MVT::i1: 10443 case MVT::i8: 10444 case MVT::i32: 10445 // +- imm12 10446 return V == (V & ((1LL << 12) - 1)); 10447 case MVT::i16: 10448 // +- imm8 10449 return V == (V & ((1LL << 8) - 1)); 10450 case MVT::f32: 10451 case MVT::f64: 10452 if (!Subtarget->hasVFP2()) // FIXME: NEON? 10453 return false; 10454 if ((V & 3) != 0) 10455 return false; 10456 V >>= 2; 10457 return V == (V & ((1LL << 8) - 1)); 10458 } 10459 } 10460 10461 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 10462 EVT VT) const { 10463 int Scale = AM.Scale; 10464 if (Scale < 0) 10465 return false; 10466 10467 switch (VT.getSimpleVT().SimpleTy) { 10468 default: return false; 10469 case MVT::i1: 10470 case MVT::i8: 10471 case MVT::i16: 10472 case MVT::i32: 10473 if (Scale == 1) 10474 return true; 10475 // r + r << imm 10476 Scale = Scale & ~1; 10477 return Scale == 2 || Scale == 4 || Scale == 8; 10478 case MVT::i64: 10479 // r + r 10480 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10481 return true; 10482 return false; 10483 case MVT::isVoid: 10484 // Note, we allow "void" uses (basically, uses that aren't loads or 10485 // stores), because arm allows folding a scale into many arithmetic 10486 // operations. This should be made more precise and revisited later. 10487 10488 // Allow r << imm, but the imm has to be a multiple of two. 10489 if (Scale & 1) return false; 10490 return isPowerOf2_32(Scale); 10491 } 10492 } 10493 10494 /// isLegalAddressingMode - Return true if the addressing mode represented 10495 /// by AM is legal for this target, for a load/store of the specified type. 10496 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 10497 Type *Ty) const { 10498 EVT VT = getValueType(Ty, true); 10499 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 10500 return false; 10501 10502 // Can never fold addr of global into load/store. 10503 if (AM.BaseGV) 10504 return false; 10505 10506 switch (AM.Scale) { 10507 case 0: // no scale reg, must be "r+i" or "r", or "i". 10508 break; 10509 case 1: 10510 if (Subtarget->isThumb1Only()) 10511 return false; 10512 // FALL THROUGH. 10513 default: 10514 // ARM doesn't support any R+R*scale+imm addr modes. 10515 if (AM.BaseOffs) 10516 return false; 10517 10518 if (!VT.isSimple()) 10519 return false; 10520 10521 if (Subtarget->isThumb2()) 10522 return isLegalT2ScaledAddressingMode(AM, VT); 10523 10524 int Scale = AM.Scale; 10525 switch (VT.getSimpleVT().SimpleTy) { 10526 default: return false; 10527 case MVT::i1: 10528 case MVT::i8: 10529 case MVT::i32: 10530 if (Scale < 0) Scale = -Scale; 10531 if (Scale == 1) 10532 return true; 10533 // r + r << imm 10534 return isPowerOf2_32(Scale & ~1); 10535 case MVT::i16: 10536 case MVT::i64: 10537 // r + r 10538 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 10539 return true; 10540 return false; 10541 10542 case MVT::isVoid: 10543 // Note, we allow "void" uses (basically, uses that aren't loads or 10544 // stores), because arm allows folding a scale into many arithmetic 10545 // operations. This should be made more precise and revisited later. 10546 10547 // Allow r << imm, but the imm has to be a multiple of two. 10548 if (Scale & 1) return false; 10549 return isPowerOf2_32(Scale); 10550 } 10551 } 10552 return true; 10553 } 10554 10555 /// isLegalICmpImmediate - Return true if the specified immediate is legal 10556 /// icmp immediate, that is the target has icmp instructions which can compare 10557 /// a register against the immediate without having to materialize the 10558 /// immediate into a register. 10559 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 10560 // Thumb2 and ARM modes can use cmn for negative immediates. 10561 if (!Subtarget->isThumb()) 10562 return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; 10563 if (Subtarget->isThumb2()) 10564 return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; 10565 // Thumb1 doesn't have cmn, and only 8-bit immediates. 10566 return Imm >= 0 && Imm <= 255; 10567 } 10568 10569 /// isLegalAddImmediate - Return true if the specified immediate is a legal add 10570 /// *or sub* immediate, that is the target has add or sub instructions which can 10571 /// add a register with the immediate without having to materialize the 10572 /// immediate into a register. 10573 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 10574 // Same encoding for add/sub, just flip the sign. 10575 int64_t AbsImm = llvm::abs64(Imm); 10576 if (!Subtarget->isThumb()) 10577 return ARM_AM::getSOImmVal(AbsImm) != -1; 10578 if (Subtarget->isThumb2()) 10579 return ARM_AM::getT2SOImmVal(AbsImm) != -1; 10580 // Thumb1 only has 8-bit unsigned immediate. 10581 return AbsImm >= 0 && AbsImm <= 255; 10582 } 10583 10584 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 10585 bool isSEXTLoad, SDValue &Base, 10586 SDValue &Offset, bool &isInc, 10587 SelectionDAG &DAG) { 10588 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10589 return false; 10590 10591 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 10592 // AddressingMode 3 10593 Base = Ptr->getOperand(0); 10594 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10595 int RHSC = (int)RHS->getZExtValue(); 10596 if (RHSC < 0 && RHSC > -256) { 10597 assert(Ptr->getOpcode() == ISD::ADD); 10598 isInc = false; 10599 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10600 return true; 10601 } 10602 } 10603 isInc = (Ptr->getOpcode() == ISD::ADD); 10604 Offset = Ptr->getOperand(1); 10605 return true; 10606 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 10607 // AddressingMode 2 10608 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10609 int RHSC = (int)RHS->getZExtValue(); 10610 if (RHSC < 0 && RHSC > -0x1000) { 10611 assert(Ptr->getOpcode() == ISD::ADD); 10612 isInc = false; 10613 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10614 Base = Ptr->getOperand(0); 10615 return true; 10616 } 10617 } 10618 10619 if (Ptr->getOpcode() == ISD::ADD) { 10620 isInc = true; 10621 ARM_AM::ShiftOpc ShOpcVal= 10622 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 10623 if (ShOpcVal != ARM_AM::no_shift) { 10624 Base = Ptr->getOperand(1); 10625 Offset = Ptr->getOperand(0); 10626 } else { 10627 Base = Ptr->getOperand(0); 10628 Offset = Ptr->getOperand(1); 10629 } 10630 return true; 10631 } 10632 10633 isInc = (Ptr->getOpcode() == ISD::ADD); 10634 Base = Ptr->getOperand(0); 10635 Offset = Ptr->getOperand(1); 10636 return true; 10637 } 10638 10639 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 10640 return false; 10641 } 10642 10643 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 10644 bool isSEXTLoad, SDValue &Base, 10645 SDValue &Offset, bool &isInc, 10646 SelectionDAG &DAG) { 10647 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10648 return false; 10649 10650 Base = Ptr->getOperand(0); 10651 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10652 int RHSC = (int)RHS->getZExtValue(); 10653 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 10654 assert(Ptr->getOpcode() == ISD::ADD); 10655 isInc = false; 10656 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10657 return true; 10658 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 10659 isInc = Ptr->getOpcode() == ISD::ADD; 10660 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 10661 return true; 10662 } 10663 } 10664 10665 return false; 10666 } 10667 10668 /// getPreIndexedAddressParts - returns true by value, base pointer and 10669 /// offset pointer and addressing mode by reference if the node's address 10670 /// can be legally represented as pre-indexed load / store address. 10671 bool 10672 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 10673 SDValue &Offset, 10674 ISD::MemIndexedMode &AM, 10675 SelectionDAG &DAG) const { 10676 if (Subtarget->isThumb1Only()) 10677 return false; 10678 10679 EVT VT; 10680 SDValue Ptr; 10681 bool isSEXTLoad = false; 10682 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10683 Ptr = LD->getBasePtr(); 10684 VT = LD->getMemoryVT(); 10685 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10686 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10687 Ptr = ST->getBasePtr(); 10688 VT = ST->getMemoryVT(); 10689 } else 10690 return false; 10691 10692 bool isInc; 10693 bool isLegal = false; 10694 if (Subtarget->isThumb2()) 10695 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10696 Offset, isInc, DAG); 10697 else 10698 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10699 Offset, isInc, DAG); 10700 if (!isLegal) 10701 return false; 10702 10703 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 10704 return true; 10705 } 10706 10707 /// getPostIndexedAddressParts - returns true by value, base pointer and 10708 /// offset pointer and addressing mode by reference if this node can be 10709 /// combined with a load / store to form a post-indexed load / store. 10710 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 10711 SDValue &Base, 10712 SDValue &Offset, 10713 ISD::MemIndexedMode &AM, 10714 SelectionDAG &DAG) const { 10715 if (Subtarget->isThumb1Only()) 10716 return false; 10717 10718 EVT VT; 10719 SDValue Ptr; 10720 bool isSEXTLoad = false; 10721 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10722 VT = LD->getMemoryVT(); 10723 Ptr = LD->getBasePtr(); 10724 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10725 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10726 VT = ST->getMemoryVT(); 10727 Ptr = ST->getBasePtr(); 10728 } else 10729 return false; 10730 10731 bool isInc; 10732 bool isLegal = false; 10733 if (Subtarget->isThumb2()) 10734 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10735 isInc, DAG); 10736 else 10737 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10738 isInc, DAG); 10739 if (!isLegal) 10740 return false; 10741 10742 if (Ptr != Base) { 10743 // Swap base ptr and offset to catch more post-index load / store when 10744 // it's legal. In Thumb2 mode, offset must be an immediate. 10745 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 10746 !Subtarget->isThumb2()) 10747 std::swap(Base, Offset); 10748 10749 // Post-indexed load / store update the base pointer. 10750 if (Ptr != Base) 10751 return false; 10752 } 10753 10754 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 10755 return true; 10756 } 10757 10758 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 10759 APInt &KnownZero, 10760 APInt &KnownOne, 10761 const SelectionDAG &DAG, 10762 unsigned Depth) const { 10763 unsigned BitWidth = KnownOne.getBitWidth(); 10764 KnownZero = KnownOne = APInt(BitWidth, 0); 10765 switch (Op.getOpcode()) { 10766 default: break; 10767 case ARMISD::ADDC: 10768 case ARMISD::ADDE: 10769 case ARMISD::SUBC: 10770 case ARMISD::SUBE: 10771 // These nodes' second result is a boolean 10772 if (Op.getResNo() == 0) 10773 break; 10774 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); 10775 break; 10776 case ARMISD::CMOV: { 10777 // Bits are known zero/one if known on the LHS and RHS. 10778 DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); 10779 if (KnownZero == 0 && KnownOne == 0) return; 10780 10781 APInt KnownZeroRHS, KnownOneRHS; 10782 DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); 10783 KnownZero &= KnownZeroRHS; 10784 KnownOne &= KnownOneRHS; 10785 return; 10786 } 10787 } 10788 } 10789 10790 //===----------------------------------------------------------------------===// 10791 // ARM Inline Assembly Support 10792 //===----------------------------------------------------------------------===// 10793 10794 bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 10795 // Looking for "rev" which is V6+. 10796 if (!Subtarget->hasV6Ops()) 10797 return false; 10798 10799 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 10800 std::string AsmStr = IA->getAsmString(); 10801 SmallVector<StringRef, 4> AsmPieces; 10802 SplitString(AsmStr, AsmPieces, ";\n"); 10803 10804 switch (AsmPieces.size()) { 10805 default: return false; 10806 case 1: 10807 AsmStr = AsmPieces[0]; 10808 AsmPieces.clear(); 10809 SplitString(AsmStr, AsmPieces, " \t,"); 10810 10811 // rev $0, $1 10812 if (AsmPieces.size() == 3 && 10813 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 10814 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 10815 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 10816 if (Ty && Ty->getBitWidth() == 32) 10817 return IntrinsicLowering::LowerToByteSwap(CI); 10818 } 10819 break; 10820 } 10821 10822 return false; 10823 } 10824 10825 /// getConstraintType - Given a constraint letter, return the type of 10826 /// constraint it is for this target. 10827 ARMTargetLowering::ConstraintType 10828 ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 10829 if (Constraint.size() == 1) { 10830 switch (Constraint[0]) { 10831 default: break; 10832 case 'l': return C_RegisterClass; 10833 case 'w': return C_RegisterClass; 10834 case 'h': return C_RegisterClass; 10835 case 'x': return C_RegisterClass; 10836 case 't': return C_RegisterClass; 10837 case 'j': return C_Other; // Constant for movw. 10838 // An address with a single base register. Due to the way we 10839 // currently handle addresses it is the same as an 'r' memory constraint. 10840 case 'Q': return C_Memory; 10841 } 10842 } else if (Constraint.size() == 2) { 10843 switch (Constraint[0]) { 10844 default: break; 10845 // All 'U+' constraints are addresses. 10846 case 'U': return C_Memory; 10847 } 10848 } 10849 return TargetLowering::getConstraintType(Constraint); 10850 } 10851 10852 /// Examine constraint type and operand type and determine a weight value. 10853 /// This object must already have been set up with the operand type 10854 /// and the current alternative constraint selected. 10855 TargetLowering::ConstraintWeight 10856 ARMTargetLowering::getSingleConstraintMatchWeight( 10857 AsmOperandInfo &info, const char *constraint) const { 10858 ConstraintWeight weight = CW_Invalid; 10859 Value *CallOperandVal = info.CallOperandVal; 10860 // If we don't have a value, we can't do a match, 10861 // but allow it at the lowest weight. 10862 if (CallOperandVal == NULL) 10863 return CW_Default; 10864 Type *type = CallOperandVal->getType(); 10865 // Look at the constraint type. 10866 switch (*constraint) { 10867 default: 10868 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 10869 break; 10870 case 'l': 10871 if (type->isIntegerTy()) { 10872 if (Subtarget->isThumb()) 10873 weight = CW_SpecificReg; 10874 else 10875 weight = CW_Register; 10876 } 10877 break; 10878 case 'w': 10879 if (type->isFloatingPointTy()) 10880 weight = CW_Register; 10881 break; 10882 } 10883 return weight; 10884 } 10885 10886 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 10887 RCPair 10888 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 10889 MVT VT) const { 10890 if (Constraint.size() == 1) { 10891 // GCC ARM Constraint Letters 10892 switch (Constraint[0]) { 10893 case 'l': // Low regs or general regs. 10894 if (Subtarget->isThumb()) 10895 return RCPair(0U, &ARM::tGPRRegClass); 10896 return RCPair(0U, &ARM::GPRRegClass); 10897 case 'h': // High regs or no regs. 10898 if (Subtarget->isThumb()) 10899 return RCPair(0U, &ARM::hGPRRegClass); 10900 break; 10901 case 'r': 10902 return RCPair(0U, &ARM::GPRRegClass); 10903 case 'w': 10904 if (VT == MVT::f32) 10905 return RCPair(0U, &ARM::SPRRegClass); 10906 if (VT.getSizeInBits() == 64) 10907 return RCPair(0U, &ARM::DPRRegClass); 10908 if (VT.getSizeInBits() == 128) 10909 return RCPair(0U, &ARM::QPRRegClass); 10910 break; 10911 case 'x': 10912 if (VT == MVT::f32) 10913 return RCPair(0U, &ARM::SPR_8RegClass); 10914 if (VT.getSizeInBits() == 64) 10915 return RCPair(0U, &ARM::DPR_8RegClass); 10916 if (VT.getSizeInBits() == 128) 10917 return RCPair(0U, &ARM::QPR_8RegClass); 10918 break; 10919 case 't': 10920 if (VT == MVT::f32) 10921 return RCPair(0U, &ARM::SPRRegClass); 10922 break; 10923 } 10924 } 10925 if (StringRef("{cc}").equals_lower(Constraint)) 10926 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); 10927 10928 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 10929 } 10930 10931 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10932 /// vector. If it is invalid, don't add anything to Ops. 10933 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10934 std::string &Constraint, 10935 std::vector<SDValue>&Ops, 10936 SelectionDAG &DAG) const { 10937 SDValue Result(0, 0); 10938 10939 // Currently only support length 1 constraints. 10940 if (Constraint.length() != 1) return; 10941 10942 char ConstraintLetter = Constraint[0]; 10943 switch (ConstraintLetter) { 10944 default: break; 10945 case 'j': 10946 case 'I': case 'J': case 'K': case 'L': 10947 case 'M': case 'N': case 'O': 10948 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 10949 if (!C) 10950 return; 10951 10952 int64_t CVal64 = C->getSExtValue(); 10953 int CVal = (int) CVal64; 10954 // None of these constraints allow values larger than 32 bits. Check 10955 // that the value fits in an int. 10956 if (CVal != CVal64) 10957 return; 10958 10959 switch (ConstraintLetter) { 10960 case 'j': 10961 // Constant suitable for movw, must be between 0 and 10962 // 65535. 10963 if (Subtarget->hasV6T2Ops()) 10964 if (CVal >= 0 && CVal <= 65535) 10965 break; 10966 return; 10967 case 'I': 10968 if (Subtarget->isThumb1Only()) { 10969 // This must be a constant between 0 and 255, for ADD 10970 // immediates. 10971 if (CVal >= 0 && CVal <= 255) 10972 break; 10973 } else if (Subtarget->isThumb2()) { 10974 // A constant that can be used as an immediate value in a 10975 // data-processing instruction. 10976 if (ARM_AM::getT2SOImmVal(CVal) != -1) 10977 break; 10978 } else { 10979 // A constant that can be used as an immediate value in a 10980 // data-processing instruction. 10981 if (ARM_AM::getSOImmVal(CVal) != -1) 10982 break; 10983 } 10984 return; 10985 10986 case 'J': 10987 if (Subtarget->isThumb()) { // FIXME thumb2 10988 // This must be a constant between -255 and -1, for negated ADD 10989 // immediates. This can be used in GCC with an "n" modifier that 10990 // prints the negated value, for use with SUB instructions. It is 10991 // not useful otherwise but is implemented for compatibility. 10992 if (CVal >= -255 && CVal <= -1) 10993 break; 10994 } else { 10995 // This must be a constant between -4095 and 4095. It is not clear 10996 // what this constraint is intended for. Implemented for 10997 // compatibility with GCC. 10998 if (CVal >= -4095 && CVal <= 4095) 10999 break; 11000 } 11001 return; 11002 11003 case 'K': 11004 if (Subtarget->isThumb1Only()) { 11005 // A 32-bit value where only one byte has a nonzero value. Exclude 11006 // zero to match GCC. This constraint is used by GCC internally for 11007 // constants that can be loaded with a move/shift combination. 11008 // It is not useful otherwise but is implemented for compatibility. 11009 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 11010 break; 11011 } else if (Subtarget->isThumb2()) { 11012 // A constant whose bitwise inverse can be used as an immediate 11013 // value in a data-processing instruction. This can be used in GCC 11014 // with a "B" modifier that prints the inverted value, for use with 11015 // BIC and MVN instructions. It is not useful otherwise but is 11016 // implemented for compatibility. 11017 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 11018 break; 11019 } else { 11020 // A constant whose bitwise inverse can be used as an immediate 11021 // value in a data-processing instruction. This can be used in GCC 11022 // with a "B" modifier that prints the inverted value, for use with 11023 // BIC and MVN instructions. It is not useful otherwise but is 11024 // implemented for compatibility. 11025 if (ARM_AM::getSOImmVal(~CVal) != -1) 11026 break; 11027 } 11028 return; 11029 11030 case 'L': 11031 if (Subtarget->isThumb1Only()) { 11032 // This must be a constant between -7 and 7, 11033 // for 3-operand ADD/SUB immediate instructions. 11034 if (CVal >= -7 && CVal < 7) 11035 break; 11036 } else if (Subtarget->isThumb2()) { 11037 // A constant whose negation can be used as an immediate value in a 11038 // data-processing instruction. This can be used in GCC with an "n" 11039 // modifier that prints the negated value, for use with SUB 11040 // instructions. It is not useful otherwise but is implemented for 11041 // compatibility. 11042 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 11043 break; 11044 } else { 11045 // A constant whose negation can be used as an immediate value in a 11046 // data-processing instruction. This can be used in GCC with an "n" 11047 // modifier that prints the negated value, for use with SUB 11048 // instructions. It is not useful otherwise but is implemented for 11049 // compatibility. 11050 if (ARM_AM::getSOImmVal(-CVal) != -1) 11051 break; 11052 } 11053 return; 11054 11055 case 'M': 11056 if (Subtarget->isThumb()) { // FIXME thumb2 11057 // This must be a multiple of 4 between 0 and 1020, for 11058 // ADD sp + immediate. 11059 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 11060 break; 11061 } else { 11062 // A power of two or a constant between 0 and 32. This is used in 11063 // GCC for the shift amount on shifted register operands, but it is 11064 // useful in general for any shift amounts. 11065 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 11066 break; 11067 } 11068 return; 11069 11070 case 'N': 11071 if (Subtarget->isThumb()) { // FIXME thumb2 11072 // This must be a constant between 0 and 31, for shift amounts. 11073 if (CVal >= 0 && CVal <= 31) 11074 break; 11075 } 11076 return; 11077 11078 case 'O': 11079 if (Subtarget->isThumb()) { // FIXME thumb2 11080 // This must be a multiple of 4 between -508 and 508, for 11081 // ADD/SUB sp = sp + immediate. 11082 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 11083 break; 11084 } 11085 return; 11086 } 11087 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 11088 break; 11089 } 11090 11091 if (Result.getNode()) { 11092 Ops.push_back(Result); 11093 return; 11094 } 11095 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 11096 } 11097 11098 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { 11099 assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); 11100 unsigned Opcode = Op->getOpcode(); 11101 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && 11102 "Invalid opcode for Div/Rem lowering"); 11103 bool isSigned = (Opcode == ISD::SDIVREM); 11104 EVT VT = Op->getValueType(0); 11105 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 11106 11107 RTLIB::Libcall LC; 11108 switch (VT.getSimpleVT().SimpleTy) { 11109 default: llvm_unreachable("Unexpected request for libcall!"); 11110 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 11111 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 11112 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 11113 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 11114 } 11115 11116 SDValue InChain = DAG.getEntryNode(); 11117 11118 TargetLowering::ArgListTy Args; 11119 TargetLowering::ArgListEntry Entry; 11120 for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { 11121 EVT ArgVT = Op->getOperand(i).getValueType(); 11122 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 11123 Entry.Node = Op->getOperand(i); 11124 Entry.Ty = ArgTy; 11125 Entry.isSExt = isSigned; 11126 Entry.isZExt = !isSigned; 11127 Args.push_back(Entry); 11128 } 11129 11130 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), 11131 getPointerTy()); 11132 11133 Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); 11134 11135 SDLoc dl(Op); 11136 TargetLowering:: 11137 CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, 11138 0, getLibcallCallingConv(LC), /*isTailCall=*/false, 11139 /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, 11140 Callee, Args, DAG, dl); 11141 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); 11142 11143 return CallInfo.first; 11144 } 11145 11146 bool 11147 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 11148 // The ARM target isn't yet aware of offsets. 11149 return false; 11150 } 11151 11152 bool ARM::isBitFieldInvertedMask(unsigned v) { 11153 if (v == 0xffffffff) 11154 return false; 11155 11156 // there can be 1's on either or both "outsides", all the "inside" 11157 // bits must be 0's 11158 unsigned TO = CountTrailingOnes_32(v); 11159 unsigned LO = CountLeadingOnes_32(v); 11160 v = (v >> TO) << TO; 11161 v = (v << LO) >> LO; 11162 return v == 0; 11163 } 11164 11165 /// isFPImmLegal - Returns true if the target can instruction select the 11166 /// specified FP immediate natively. If false, the legalizer will 11167 /// materialize the FP immediate as a load from a constant pool. 11168 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 11169 if (!Subtarget->hasVFP3()) 11170 return false; 11171 if (VT == MVT::f32) 11172 return ARM_AM::getFP32Imm(Imm) != -1; 11173 if (VT == MVT::f64) 11174 return ARM_AM::getFP64Imm(Imm) != -1; 11175 return false; 11176 } 11177 11178 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 11179 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 11180 /// specified in the intrinsic calls. 11181 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 11182 const CallInst &I, 11183 unsigned Intrinsic) const { 11184 switch (Intrinsic) { 11185 case Intrinsic::arm_neon_vld1: 11186 case Intrinsic::arm_neon_vld2: 11187 case Intrinsic::arm_neon_vld3: 11188 case Intrinsic::arm_neon_vld4: 11189 case Intrinsic::arm_neon_vld2lane: 11190 case Intrinsic::arm_neon_vld3lane: 11191 case Intrinsic::arm_neon_vld4lane: { 11192 Info.opc = ISD::INTRINSIC_W_CHAIN; 11193 // Conservatively set memVT to the entire set of vectors loaded. 11194 uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; 11195 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 11196 Info.ptrVal = I.getArgOperand(0); 11197 Info.offset = 0; 11198 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 11199 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 11200 Info.vol = false; // volatile loads with NEON intrinsics not supported 11201 Info.readMem = true; 11202 Info.writeMem = false; 11203 return true; 11204 } 11205 case Intrinsic::arm_neon_vst1: 11206 case Intrinsic::arm_neon_vst2: 11207 case Intrinsic::arm_neon_vst3: 11208 case Intrinsic::arm_neon_vst4: 11209 case Intrinsic::arm_neon_vst2lane: 11210 case Intrinsic::arm_neon_vst3lane: 11211 case Intrinsic::arm_neon_vst4lane: { 11212 Info.opc = ISD::INTRINSIC_VOID; 11213 // Conservatively set memVT to the entire set of vectors stored. 11214 unsigned NumElts = 0; 11215 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 11216 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 11217 if (!ArgTy->isVectorTy()) 11218 break; 11219 NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; 11220 } 11221 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 11222 Info.ptrVal = I.getArgOperand(0); 11223 Info.offset = 0; 11224 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 11225 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 11226 Info.vol = false; // volatile stores with NEON intrinsics not supported 11227 Info.readMem = false; 11228 Info.writeMem = true; 11229 return true; 11230 } 11231 case Intrinsic::arm_ldrex: { 11232 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 11233 Info.opc = ISD::INTRINSIC_W_CHAIN; 11234 Info.memVT = MVT::getVT(PtrTy->getElementType()); 11235 Info.ptrVal = I.getArgOperand(0); 11236 Info.offset = 0; 11237 Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); 11238 Info.vol = true; 11239 Info.readMem = true; 11240 Info.writeMem = false; 11241 return true; 11242 } 11243 case Intrinsic::arm_strex: { 11244 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); 11245 Info.opc = ISD::INTRINSIC_W_CHAIN; 11246 Info.memVT = MVT::getVT(PtrTy->getElementType()); 11247 Info.ptrVal = I.getArgOperand(1); 11248 Info.offset = 0; 11249 Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); 11250 Info.vol = true; 11251 Info.readMem = false; 11252 Info.writeMem = true; 11253 return true; 11254 } 11255 case Intrinsic::arm_strexd: { 11256 Info.opc = ISD::INTRINSIC_W_CHAIN; 11257 Info.memVT = MVT::i64; 11258 Info.ptrVal = I.getArgOperand(2); 11259 Info.offset = 0; 11260 Info.align = 8; 11261 Info.vol = true; 11262 Info.readMem = false; 11263 Info.writeMem = true; 11264 return true; 11265 } 11266 case Intrinsic::arm_ldrexd: { 11267 Info.opc = ISD::INTRINSIC_W_CHAIN; 11268 Info.memVT = MVT::i64; 11269 Info.ptrVal = I.getArgOperand(0); 11270 Info.offset = 0; 11271 Info.align = 8; 11272 Info.vol = true; 11273 Info.readMem = true; 11274 Info.writeMem = false; 11275 return true; 11276 } 11277 default: 11278 break; 11279 } 11280 11281 return false; 11282 } 11283