1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that ARM uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "arm-isel" 16 #include "ARM.h" 17 #include "ARMCallingConv.h" 18 #include "ARMConstantPoolValue.h" 19 #include "ARMISelLowering.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMPerfectShuffle.h" 22 #include "ARMRegisterInfo.h" 23 #include "ARMSubtarget.h" 24 #include "ARMTargetMachine.h" 25 #include "ARMTargetObjectFile.h" 26 #include "MCTargetDesc/ARMAddressingModes.h" 27 #include "llvm/CallingConv.h" 28 #include "llvm/Constants.h" 29 #include "llvm/Function.h" 30 #include "llvm/GlobalValue.h" 31 #include "llvm/Instruction.h" 32 #include "llvm/Instructions.h" 33 #include "llvm/Intrinsics.h" 34 #include "llvm/Type.h" 35 #include "llvm/CodeGen/CallingConvLower.h" 36 #include "llvm/CodeGen/IntrinsicLowering.h" 37 #include "llvm/CodeGen/MachineBasicBlock.h" 38 #include "llvm/CodeGen/MachineFrameInfo.h" 39 #include "llvm/CodeGen/MachineFunction.h" 40 #include "llvm/CodeGen/MachineInstrBuilder.h" 41 #include "llvm/CodeGen/MachineRegisterInfo.h" 42 #include "llvm/CodeGen/PseudoSourceValue.h" 43 #include "llvm/CodeGen/SelectionDAG.h" 44 #include "llvm/MC/MCSectionMachO.h" 45 #include "llvm/Target/TargetOptions.h" 46 #include "llvm/ADT/VectorExtras.h" 47 #include "llvm/ADT/StringExtras.h" 48 #include "llvm/ADT/Statistic.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/Support/MathExtras.h" 52 #include "llvm/Support/raw_ostream.h" 53 #include <sstream> 54 using namespace llvm; 55 56 STATISTIC(NumTailCalls, "Number of tail calls"); 57 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 58 59 // This option should go away when tail calls fully work. 60 static cl::opt<bool> 61 EnableARMTailCalls("arm-tail-calls", cl::Hidden, 62 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 63 cl::init(false)); 64 65 cl::opt<bool> 66 EnableARMLongCalls("arm-long-calls", cl::Hidden, 67 cl::desc("Generate calls via indirect call instructions"), 68 cl::init(false)); 69 70 static cl::opt<bool> 71 ARMInterworking("arm-interworking", cl::Hidden, 72 cl::desc("Enable / disable ARM interworking (for debugging only)"), 73 cl::init(true)); 74 75 namespace llvm { 76 class ARMCCState : public CCState { 77 public: 78 ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 79 const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 80 LLVMContext &C, ParmContext PC) 81 : CCState(CC, isVarArg, MF, TM, locs, C) { 82 assert(((PC == Call) || (PC == Prologue)) && 83 "ARMCCState users must specify whether their context is call" 84 "or prologue generation."); 85 CallOrPrologue = PC; 86 } 87 }; 88 } 89 90 // The APCS parameter registers. 91 static const unsigned GPRArgRegs[] = { 92 ARM::R0, ARM::R1, ARM::R2, ARM::R3 93 }; 94 95 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 96 EVT PromotedBitwiseVT) { 97 if (VT != PromotedLdStVT) { 98 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 99 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 100 PromotedLdStVT.getSimpleVT()); 101 102 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 103 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 104 PromotedLdStVT.getSimpleVT()); 105 } 106 107 EVT ElemTy = VT.getVectorElementType(); 108 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 109 setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); 110 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 111 if (ElemTy != MVT::i32) { 112 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 113 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 114 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 115 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 116 } 117 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 118 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 119 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 120 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); 121 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 122 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 123 if (VT.isInteger()) { 124 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 125 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 126 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 127 setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); 128 setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); 129 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 130 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 131 setTruncStoreAction(VT.getSimpleVT(), 132 (MVT::SimpleValueType)InnerVT, Expand); 133 } 134 setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); 135 136 // Promote all bit-wise operations. 137 if (VT.isInteger() && VT != PromotedBitwiseVT) { 138 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 139 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 140 PromotedBitwiseVT.getSimpleVT()); 141 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 142 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 143 PromotedBitwiseVT.getSimpleVT()); 144 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 145 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 146 PromotedBitwiseVT.getSimpleVT()); 147 } 148 149 // Neon does not support vector divide/remainder operations. 150 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 151 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 152 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 153 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 154 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 155 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 156 } 157 158 void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 159 addRegisterClass(VT, ARM::DPRRegisterClass); 160 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 161 } 162 163 void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 164 addRegisterClass(VT, ARM::QPRRegisterClass); 165 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 166 } 167 168 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 169 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 170 return new TargetLoweringObjectFileMachO(); 171 172 return new ARMElfTargetObjectFile(); 173 } 174 175 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 176 : TargetLowering(TM, createTLOF(TM)) { 177 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 178 RegInfo = TM.getRegisterInfo(); 179 Itins = TM.getInstrItineraryData(); 180 181 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 182 183 if (Subtarget->isTargetDarwin()) { 184 // Uses VFP for Thumb libfuncs if available. 185 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 186 // Single-precision floating-point arithmetic. 187 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 188 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 189 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 190 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 191 192 // Double-precision floating-point arithmetic. 193 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 194 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 195 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 196 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 197 198 // Single-precision comparisons. 199 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 200 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 201 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 202 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 203 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 204 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 205 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 206 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 207 208 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 213 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 214 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 215 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 216 217 // Double-precision comparisons. 218 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 219 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 220 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 221 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 222 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 223 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 224 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 225 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 226 227 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 228 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 229 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 230 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 231 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 232 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 233 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 234 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 235 236 // Floating-point to integer conversions. 237 // i64 conversions are done via library routines even when generating VFP 238 // instructions, so use the same ones. 239 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 240 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 241 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 242 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 243 244 // Conversions between floating types. 245 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 246 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 247 248 // Integer to floating-point conversions. 249 // i64 conversions are done via library routines even when generating VFP 250 // instructions, so use the same ones. 251 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 252 // e.g., __floatunsidf vs. __floatunssidfvfp. 253 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 254 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 255 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 256 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 257 } 258 } 259 260 // These libcalls are not available in 32-bit. 261 setLibcallName(RTLIB::SHL_I128, 0); 262 setLibcallName(RTLIB::SRL_I128, 0); 263 setLibcallName(RTLIB::SRA_I128, 0); 264 265 if (Subtarget->isAAPCS_ABI()) { 266 // Double-precision floating-point arithmetic helper functions 267 // RTABI chapter 4.1.2, Table 2 268 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 269 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 270 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 271 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 272 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 273 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 274 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 275 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 276 277 // Double-precision floating-point comparison helper functions 278 // RTABI chapter 4.1.2, Table 3 279 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 280 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 281 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 282 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 283 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 284 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 285 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 286 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 287 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 288 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 289 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 290 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 291 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 292 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 293 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 294 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 295 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 297 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 298 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 299 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 300 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 301 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 302 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 303 304 // Single-precision floating-point arithmetic helper functions 305 // RTABI chapter 4.1.2, Table 4 306 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 307 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 308 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 309 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 310 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 311 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 312 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 313 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 314 315 // Single-precision floating-point comparison helper functions 316 // RTABI chapter 4.1.2, Table 5 317 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 318 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 319 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 320 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 321 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 322 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 323 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 324 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 325 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 326 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 327 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 328 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 329 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 330 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 331 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 332 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 333 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 337 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 338 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 339 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 340 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 341 342 // Floating-point to integer conversions. 343 // RTABI chapter 4.1.2, Table 6 344 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 345 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 346 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 347 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 348 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 349 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 350 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 351 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 352 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 353 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 354 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 355 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 356 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 357 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 358 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 359 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 360 361 // Conversions between floating types. 362 // RTABI chapter 4.1.2, Table 7 363 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 364 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 365 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 366 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 367 368 // Integer to floating-point conversions. 369 // RTABI chapter 4.1.2, Table 8 370 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 371 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 372 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 373 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 374 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 375 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 376 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 377 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 378 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 380 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 381 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 382 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 383 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 384 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 385 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 386 387 // Long long helper functions 388 // RTABI chapter 4.2, Table 9 389 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 390 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 391 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 392 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 393 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 394 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 395 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 396 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 397 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 398 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 399 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 400 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 401 402 // Integer division functions 403 // RTABI chapter 4.3.1 404 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 405 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 406 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 407 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 408 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 409 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 410 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 411 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 412 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 413 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 414 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 415 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 416 417 // Memory operations 418 // RTABI chapter 4.3.4 419 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 420 setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 421 setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 422 } 423 424 if (Subtarget->isThumb1Only()) 425 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 426 else 427 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 428 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 429 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 430 if (!Subtarget->isFPOnlySP()) 431 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 432 433 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 434 } 435 436 if (Subtarget->hasNEON()) { 437 addDRTypeForNEON(MVT::v2f32); 438 addDRTypeForNEON(MVT::v8i8); 439 addDRTypeForNEON(MVT::v4i16); 440 addDRTypeForNEON(MVT::v2i32); 441 addDRTypeForNEON(MVT::v1i64); 442 443 addQRTypeForNEON(MVT::v4f32); 444 addQRTypeForNEON(MVT::v2f64); 445 addQRTypeForNEON(MVT::v16i8); 446 addQRTypeForNEON(MVT::v8i16); 447 addQRTypeForNEON(MVT::v4i32); 448 addQRTypeForNEON(MVT::v2i64); 449 450 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 451 // neither Neon nor VFP support any arithmetic operations on it. 452 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 453 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 454 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 455 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 456 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 457 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 458 setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 459 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 460 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 461 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 462 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 463 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 464 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 465 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 466 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 467 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 468 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 469 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 470 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 471 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 472 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 473 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 474 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 475 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 476 477 setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 478 479 // Neon does not support some operations on v1i64 and v2i64 types. 480 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 481 // Custom handling for some quad-vector types to detect VMULL. 482 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 483 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 484 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 485 // Custom handling for some vector types to avoid expensive expansions 486 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 487 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 488 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 489 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 490 setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 491 setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 492 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 493 // a destination type that is wider than the source. 494 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 495 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 496 497 setTargetDAGCombine(ISD::INTRINSIC_VOID); 498 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 499 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 500 setTargetDAGCombine(ISD::SHL); 501 setTargetDAGCombine(ISD::SRL); 502 setTargetDAGCombine(ISD::SRA); 503 setTargetDAGCombine(ISD::SIGN_EXTEND); 504 setTargetDAGCombine(ISD::ZERO_EXTEND); 505 setTargetDAGCombine(ISD::ANY_EXTEND); 506 setTargetDAGCombine(ISD::SELECT_CC); 507 setTargetDAGCombine(ISD::BUILD_VECTOR); 508 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 509 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 510 setTargetDAGCombine(ISD::STORE); 511 setTargetDAGCombine(ISD::FP_TO_SINT); 512 setTargetDAGCombine(ISD::FP_TO_UINT); 513 setTargetDAGCombine(ISD::FDIV); 514 } 515 516 computeRegisterProperties(); 517 518 // ARM does not have f32 extending load. 519 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 520 521 // ARM does not have i1 sign extending load. 522 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 523 524 // ARM supports all 4 flavors of integer indexed load / store. 525 if (!Subtarget->isThumb1Only()) { 526 for (unsigned im = (unsigned)ISD::PRE_INC; 527 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 528 setIndexedLoadAction(im, MVT::i1, Legal); 529 setIndexedLoadAction(im, MVT::i8, Legal); 530 setIndexedLoadAction(im, MVT::i16, Legal); 531 setIndexedLoadAction(im, MVT::i32, Legal); 532 setIndexedStoreAction(im, MVT::i1, Legal); 533 setIndexedStoreAction(im, MVT::i8, Legal); 534 setIndexedStoreAction(im, MVT::i16, Legal); 535 setIndexedStoreAction(im, MVT::i32, Legal); 536 } 537 } 538 539 // i64 operation support. 540 setOperationAction(ISD::MUL, MVT::i64, Expand); 541 setOperationAction(ISD::MULHU, MVT::i32, Expand); 542 if (Subtarget->isThumb1Only()) { 543 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 544 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 545 } 546 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 547 || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 548 setOperationAction(ISD::MULHS, MVT::i32, Expand); 549 550 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 551 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 552 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 553 setOperationAction(ISD::SRL, MVT::i64, Custom); 554 setOperationAction(ISD::SRA, MVT::i64, Custom); 555 556 if (!Subtarget->isThumb1Only()) { 557 // FIXME: We should do this for Thumb1 as well. 558 setOperationAction(ISD::ADDC, MVT::i32, Custom); 559 setOperationAction(ISD::ADDE, MVT::i32, Custom); 560 setOperationAction(ISD::SUBC, MVT::i32, Custom); 561 setOperationAction(ISD::SUBE, MVT::i32, Custom); 562 } 563 564 // ARM does not have ROTL. 565 setOperationAction(ISD::ROTL, MVT::i32, Expand); 566 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 567 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 568 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 569 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 570 571 // Only ARMv6 has BSWAP. 572 if (!Subtarget->hasV6Ops()) 573 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 574 575 // These are expanded into libcalls. 576 if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { 577 // v7M has a hardware divider 578 setOperationAction(ISD::SDIV, MVT::i32, Expand); 579 setOperationAction(ISD::UDIV, MVT::i32, Expand); 580 } 581 setOperationAction(ISD::SREM, MVT::i32, Expand); 582 setOperationAction(ISD::UREM, MVT::i32, Expand); 583 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 584 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 585 586 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 587 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 588 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 589 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 590 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 591 592 setOperationAction(ISD::TRAP, MVT::Other, Legal); 593 594 // Use the default implementation. 595 setOperationAction(ISD::VASTART, MVT::Other, Custom); 596 setOperationAction(ISD::VAARG, MVT::Other, Expand); 597 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 598 setOperationAction(ISD::VAEND, MVT::Other, Expand); 599 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 600 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 601 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 602 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 603 setExceptionPointerRegister(ARM::R0); 604 setExceptionSelectorRegister(ARM::R1); 605 606 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 607 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 608 // the default expansion. 609 // FIXME: This should be checking for v6k, not just v6. 610 if (Subtarget->hasDataBarrier() || 611 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 612 // membarrier needs custom lowering; the rest are legal and handled 613 // normally. 614 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 615 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 616 // Custom lowering for 64-bit ops 617 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 618 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 619 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 620 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 621 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 622 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 623 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 624 // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 625 setInsertFencesForAtomic(true); 626 } else { 627 // Set them all for expansion, which will force libcalls. 628 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 629 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 630 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 631 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 632 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 633 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 634 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 635 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 636 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 637 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 638 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 639 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 640 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 641 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 642 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 643 // Unordered/Monotonic case. 644 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 645 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 646 // Since the libcalls include locking, fold in the fences 647 setShouldFoldAtomicFences(true); 648 } 649 650 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 651 652 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 653 if (!Subtarget->hasV6Ops()) { 654 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 655 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 656 } 657 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 658 659 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 660 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 661 // iff target supports vfp2. 662 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 663 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 664 } 665 666 // We want to custom lower some of our intrinsics. 667 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 668 if (Subtarget->isTargetDarwin()) { 669 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 670 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 671 setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); 672 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 673 } 674 675 setOperationAction(ISD::SETCC, MVT::i32, Expand); 676 setOperationAction(ISD::SETCC, MVT::f32, Expand); 677 setOperationAction(ISD::SETCC, MVT::f64, Expand); 678 setOperationAction(ISD::SELECT, MVT::i32, Custom); 679 setOperationAction(ISD::SELECT, MVT::f32, Custom); 680 setOperationAction(ISD::SELECT, MVT::f64, Custom); 681 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 682 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 683 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 684 685 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 686 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 687 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 688 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 689 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 690 691 // We don't support sin/cos/fmod/copysign/pow 692 setOperationAction(ISD::FSIN, MVT::f64, Expand); 693 setOperationAction(ISD::FSIN, MVT::f32, Expand); 694 setOperationAction(ISD::FCOS, MVT::f32, Expand); 695 setOperationAction(ISD::FCOS, MVT::f64, Expand); 696 setOperationAction(ISD::FREM, MVT::f64, Expand); 697 setOperationAction(ISD::FREM, MVT::f32, Expand); 698 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 699 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 700 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 701 } 702 setOperationAction(ISD::FPOW, MVT::f64, Expand); 703 setOperationAction(ISD::FPOW, MVT::f32, Expand); 704 705 setOperationAction(ISD::FMA, MVT::f64, Expand); 706 setOperationAction(ISD::FMA, MVT::f32, Expand); 707 708 // Various VFP goodness 709 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 710 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 711 if (Subtarget->hasVFP2()) { 712 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 713 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 714 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 715 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 716 } 717 // Special handling for half-precision FP. 718 if (!Subtarget->hasFP16()) { 719 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 720 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 721 } 722 } 723 724 // We have target-specific dag combine patterns for the following nodes: 725 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 726 setTargetDAGCombine(ISD::ADD); 727 setTargetDAGCombine(ISD::SUB); 728 setTargetDAGCombine(ISD::MUL); 729 730 if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) 731 setTargetDAGCombine(ISD::OR); 732 if (Subtarget->hasNEON()) 733 setTargetDAGCombine(ISD::AND); 734 735 setStackPointerRegisterToSaveRestore(ARM::SP); 736 737 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 738 setSchedulingPreference(Sched::RegPressure); 739 else 740 setSchedulingPreference(Sched::Hybrid); 741 742 //// temporary - rewrite interface to use type 743 maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; 744 745 // On ARM arguments smaller than 4 bytes are extended, so all arguments 746 // are at least 4 bytes aligned. 747 setMinStackArgumentAlignment(4); 748 749 benefitFromCodePlacementOpt = true; 750 751 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 752 } 753 754 // FIXME: It might make sense to define the representative register class as the 755 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is 756 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 757 // SPR's representative would be DPR_VFP2. This should work well if register 758 // pressure tracking were modified such that a register use would increment the 759 // pressure of the register class's representative and all of it's super 760 // classes' representatives transitively. We have not implemented this because 761 // of the difficulty prior to coalescing of modeling operand register classes 762 // due to the common occurrence of cross class copies and subregister insertions 763 // and extractions. 764 std::pair<const TargetRegisterClass*, uint8_t> 765 ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 766 const TargetRegisterClass *RRC = 0; 767 uint8_t Cost = 1; 768 switch (VT.getSimpleVT().SimpleTy) { 769 default: 770 return TargetLowering::findRepresentativeClass(VT); 771 // Use DPR as representative register class for all floating point 772 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 773 // the cost is 1 for both f32 and f64. 774 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 775 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 776 RRC = ARM::DPRRegisterClass; 777 // When NEON is used for SP, only half of the register file is available 778 // because operations that define both SP and DP results will be constrained 779 // to the VFP2 class (D0-D15). We currently model this constraint prior to 780 // coalescing by double-counting the SP regs. See the FIXME above. 781 if (Subtarget->useNEONForSinglePrecisionFP()) 782 Cost = 2; 783 break; 784 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 785 case MVT::v4f32: case MVT::v2f64: 786 RRC = ARM::DPRRegisterClass; 787 Cost = 2; 788 break; 789 case MVT::v4i64: 790 RRC = ARM::DPRRegisterClass; 791 Cost = 4; 792 break; 793 case MVT::v8i64: 794 RRC = ARM::DPRRegisterClass; 795 Cost = 8; 796 break; 797 } 798 return std::make_pair(RRC, Cost); 799 } 800 801 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 802 switch (Opcode) { 803 default: return 0; 804 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 805 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 806 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 807 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 808 case ARMISD::CALL: return "ARMISD::CALL"; 809 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 810 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 811 case ARMISD::tCALL: return "ARMISD::tCALL"; 812 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 813 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 814 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 815 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 816 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 817 case ARMISD::CMP: return "ARMISD::CMP"; 818 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 819 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 820 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 821 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 822 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 823 case ARMISD::CMOV: return "ARMISD::CMOV"; 824 825 case ARMISD::RBIT: return "ARMISD::RBIT"; 826 827 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 828 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 829 case ARMISD::SITOF: return "ARMISD::SITOF"; 830 case ARMISD::UITOF: return "ARMISD::UITOF"; 831 832 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 833 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 834 case ARMISD::RRX: return "ARMISD::RRX"; 835 836 case ARMISD::ADDC: return "ARMISD::ADDC"; 837 case ARMISD::ADDE: return "ARMISD::ADDE"; 838 case ARMISD::SUBC: return "ARMISD::SUBC"; 839 case ARMISD::SUBE: return "ARMISD::SUBE"; 840 841 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 842 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 843 844 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 845 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 846 case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; 847 848 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 849 850 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 851 852 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 853 854 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 855 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 856 857 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 858 859 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 860 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 861 case ARMISD::VCGE: return "ARMISD::VCGE"; 862 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 863 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 864 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 865 case ARMISD::VCGT: return "ARMISD::VCGT"; 866 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 867 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 868 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 869 case ARMISD::VTST: return "ARMISD::VTST"; 870 871 case ARMISD::VSHL: return "ARMISD::VSHL"; 872 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 873 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 874 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 875 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 876 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 877 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 878 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 879 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 880 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 881 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 882 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 883 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 884 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 885 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 886 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 887 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 888 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 889 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 890 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 891 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 892 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 893 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 894 case ARMISD::VDUP: return "ARMISD::VDUP"; 895 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 896 case ARMISD::VEXT: return "ARMISD::VEXT"; 897 case ARMISD::VREV64: return "ARMISD::VREV64"; 898 case ARMISD::VREV32: return "ARMISD::VREV32"; 899 case ARMISD::VREV16: return "ARMISD::VREV16"; 900 case ARMISD::VZIP: return "ARMISD::VZIP"; 901 case ARMISD::VUZP: return "ARMISD::VUZP"; 902 case ARMISD::VTRN: return "ARMISD::VTRN"; 903 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 904 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 905 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 906 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 907 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 908 case ARMISD::FMAX: return "ARMISD::FMAX"; 909 case ARMISD::FMIN: return "ARMISD::FMIN"; 910 case ARMISD::BFI: return "ARMISD::BFI"; 911 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 912 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 913 case ARMISD::VBSL: return "ARMISD::VBSL"; 914 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 915 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 916 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 917 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 918 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 919 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 920 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 921 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 922 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 923 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 924 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 925 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 926 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 927 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 928 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 929 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 930 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 931 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 932 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 933 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 934 } 935 } 936 937 EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { 938 if (!VT.isVector()) return getPointerTy(); 939 return VT.changeVectorElementTypeToInteger(); 940 } 941 942 /// getRegClassFor - Return the register class that should be used for the 943 /// specified value type. 944 TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 945 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 946 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 947 // load / store 4 to 8 consecutive D registers. 948 if (Subtarget->hasNEON()) { 949 if (VT == MVT::v4i64) 950 return ARM::QQPRRegisterClass; 951 else if (VT == MVT::v8i64) 952 return ARM::QQQQPRRegisterClass; 953 } 954 return TargetLowering::getRegClassFor(VT); 955 } 956 957 // Create a fast isel object. 958 FastISel * 959 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 960 return ARM::createFastISel(funcInfo); 961 } 962 963 /// getMaximalGlobalOffset - Returns the maximal possible offset which can 964 /// be used for loads / stores from the global. 965 unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 966 return (Subtarget->isThumb1Only() ? 127 : 4095); 967 } 968 969 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 970 unsigned NumVals = N->getNumValues(); 971 if (!NumVals) 972 return Sched::RegPressure; 973 974 for (unsigned i = 0; i != NumVals; ++i) { 975 EVT VT = N->getValueType(i); 976 if (VT == MVT::Glue || VT == MVT::Other) 977 continue; 978 if (VT.isFloatingPoint() || VT.isVector()) 979 return Sched::Latency; 980 } 981 982 if (!N->isMachineOpcode()) 983 return Sched::RegPressure; 984 985 // Load are scheduled for latency even if there instruction itinerary 986 // is not available. 987 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 988 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 989 990 if (MCID.getNumDefs() == 0) 991 return Sched::RegPressure; 992 if (!Itins->isEmpty() && 993 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 994 return Sched::Latency; 995 996 return Sched::RegPressure; 997 } 998 999 //===----------------------------------------------------------------------===// 1000 // Lowering Code 1001 //===----------------------------------------------------------------------===// 1002 1003 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1004 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1005 switch (CC) { 1006 default: llvm_unreachable("Unknown condition code!"); 1007 case ISD::SETNE: return ARMCC::NE; 1008 case ISD::SETEQ: return ARMCC::EQ; 1009 case ISD::SETGT: return ARMCC::GT; 1010 case ISD::SETGE: return ARMCC::GE; 1011 case ISD::SETLT: return ARMCC::LT; 1012 case ISD::SETLE: return ARMCC::LE; 1013 case ISD::SETUGT: return ARMCC::HI; 1014 case ISD::SETUGE: return ARMCC::HS; 1015 case ISD::SETULT: return ARMCC::LO; 1016 case ISD::SETULE: return ARMCC::LS; 1017 } 1018 } 1019 1020 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1021 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1022 ARMCC::CondCodes &CondCode2) { 1023 CondCode2 = ARMCC::AL; 1024 switch (CC) { 1025 default: llvm_unreachable("Unknown FP condition!"); 1026 case ISD::SETEQ: 1027 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1028 case ISD::SETGT: 1029 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1030 case ISD::SETGE: 1031 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1032 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1033 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1034 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1035 case ISD::SETO: CondCode = ARMCC::VC; break; 1036 case ISD::SETUO: CondCode = ARMCC::VS; break; 1037 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1038 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1039 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1040 case ISD::SETLT: 1041 case ISD::SETULT: CondCode = ARMCC::LT; break; 1042 case ISD::SETLE: 1043 case ISD::SETULE: CondCode = ARMCC::LE; break; 1044 case ISD::SETNE: 1045 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1046 } 1047 } 1048 1049 //===----------------------------------------------------------------------===// 1050 // Calling Convention Implementation 1051 //===----------------------------------------------------------------------===// 1052 1053 #include "ARMGenCallingConv.inc" 1054 1055 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1056 /// given CallingConvention value. 1057 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1058 bool Return, 1059 bool isVarArg) const { 1060 switch (CC) { 1061 default: 1062 llvm_unreachable("Unsupported calling convention"); 1063 case CallingConv::Fast: 1064 if (Subtarget->hasVFP2() && !isVarArg) { 1065 if (!Subtarget->isAAPCS_ABI()) 1066 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1067 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1068 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1069 } 1070 // Fallthrough 1071 case CallingConv::C: { 1072 // Use target triple & subtarget features to do actual dispatch. 1073 if (!Subtarget->isAAPCS_ABI()) 1074 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1075 else if (Subtarget->hasVFP2() && 1076 FloatABIType == FloatABI::Hard && !isVarArg) 1077 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1078 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1079 } 1080 case CallingConv::ARM_AAPCS_VFP: 1081 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1082 case CallingConv::ARM_AAPCS: 1083 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1084 case CallingConv::ARM_APCS: 1085 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1086 } 1087 } 1088 1089 /// LowerCallResult - Lower the result values of a call into the 1090 /// appropriate copies out of appropriate physical registers. 1091 SDValue 1092 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1093 CallingConv::ID CallConv, bool isVarArg, 1094 const SmallVectorImpl<ISD::InputArg> &Ins, 1095 DebugLoc dl, SelectionDAG &DAG, 1096 SmallVectorImpl<SDValue> &InVals) const { 1097 1098 // Assign locations to each value returned by this call. 1099 SmallVector<CCValAssign, 16> RVLocs; 1100 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1101 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1102 CCInfo.AnalyzeCallResult(Ins, 1103 CCAssignFnForNode(CallConv, /* Return*/ true, 1104 isVarArg)); 1105 1106 // Copy all of the result registers out of their specified physreg. 1107 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1108 CCValAssign VA = RVLocs[i]; 1109 1110 SDValue Val; 1111 if (VA.needsCustom()) { 1112 // Handle f64 or half of a v2f64. 1113 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1114 InFlag); 1115 Chain = Lo.getValue(1); 1116 InFlag = Lo.getValue(2); 1117 VA = RVLocs[++i]; // skip ahead to next loc 1118 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1119 InFlag); 1120 Chain = Hi.getValue(1); 1121 InFlag = Hi.getValue(2); 1122 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1123 1124 if (VA.getLocVT() == MVT::v2f64) { 1125 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1126 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1127 DAG.getConstant(0, MVT::i32)); 1128 1129 VA = RVLocs[++i]; // skip ahead to next loc 1130 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1131 Chain = Lo.getValue(1); 1132 InFlag = Lo.getValue(2); 1133 VA = RVLocs[++i]; // skip ahead to next loc 1134 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1135 Chain = Hi.getValue(1); 1136 InFlag = Hi.getValue(2); 1137 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1138 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1139 DAG.getConstant(1, MVT::i32)); 1140 } 1141 } else { 1142 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1143 InFlag); 1144 Chain = Val.getValue(1); 1145 InFlag = Val.getValue(2); 1146 } 1147 1148 switch (VA.getLocInfo()) { 1149 default: llvm_unreachable("Unknown loc info!"); 1150 case CCValAssign::Full: break; 1151 case CCValAssign::BCvt: 1152 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1153 break; 1154 } 1155 1156 InVals.push_back(Val); 1157 } 1158 1159 return Chain; 1160 } 1161 1162 /// LowerMemOpCallTo - Store the argument to the stack. 1163 SDValue 1164 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1165 SDValue StackPtr, SDValue Arg, 1166 DebugLoc dl, SelectionDAG &DAG, 1167 const CCValAssign &VA, 1168 ISD::ArgFlagsTy Flags) const { 1169 unsigned LocMemOffset = VA.getLocMemOffset(); 1170 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1171 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1172 return DAG.getStore(Chain, dl, Arg, PtrOff, 1173 MachinePointerInfo::getStack(LocMemOffset), 1174 false, false, 0); 1175 } 1176 1177 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1178 SDValue Chain, SDValue &Arg, 1179 RegsToPassVector &RegsToPass, 1180 CCValAssign &VA, CCValAssign &NextVA, 1181 SDValue &StackPtr, 1182 SmallVector<SDValue, 8> &MemOpChains, 1183 ISD::ArgFlagsTy Flags) const { 1184 1185 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1186 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1187 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1188 1189 if (NextVA.isRegLoc()) 1190 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1191 else { 1192 assert(NextVA.isMemLoc()); 1193 if (StackPtr.getNode() == 0) 1194 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1195 1196 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1197 dl, DAG, NextVA, 1198 Flags)); 1199 } 1200 } 1201 1202 /// LowerCall - Lowering a call into a callseq_start <- 1203 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1204 /// nodes. 1205 SDValue 1206 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1207 CallingConv::ID CallConv, bool isVarArg, 1208 bool &isTailCall, 1209 const SmallVectorImpl<ISD::OutputArg> &Outs, 1210 const SmallVectorImpl<SDValue> &OutVals, 1211 const SmallVectorImpl<ISD::InputArg> &Ins, 1212 DebugLoc dl, SelectionDAG &DAG, 1213 SmallVectorImpl<SDValue> &InVals) const { 1214 MachineFunction &MF = DAG.getMachineFunction(); 1215 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1216 bool IsSibCall = false; 1217 // Temporarily disable tail calls so things don't break. 1218 if (!EnableARMTailCalls) 1219 isTailCall = false; 1220 if (isTailCall) { 1221 // Check if it's really possible to do a tail call. 1222 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1223 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1224 Outs, OutVals, Ins, DAG); 1225 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1226 // detected sibcalls. 1227 if (isTailCall) { 1228 ++NumTailCalls; 1229 IsSibCall = true; 1230 } 1231 } 1232 1233 // Analyze operands of the call, assigning locations to each operand. 1234 SmallVector<CCValAssign, 16> ArgLocs; 1235 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1236 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1237 CCInfo.AnalyzeCallOperands(Outs, 1238 CCAssignFnForNode(CallConv, /* Return*/ false, 1239 isVarArg)); 1240 1241 // Get a count of how many bytes are to be pushed on the stack. 1242 unsigned NumBytes = CCInfo.getNextStackOffset(); 1243 1244 // For tail calls, memory operands are available in our caller's stack. 1245 if (IsSibCall) 1246 NumBytes = 0; 1247 1248 // Adjust the stack pointer for the new arguments... 1249 // These operations are automatically eliminated by the prolog/epilog pass 1250 if (!IsSibCall) 1251 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1252 1253 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1254 1255 RegsToPassVector RegsToPass; 1256 SmallVector<SDValue, 8> MemOpChains; 1257 1258 // Walk the register/memloc assignments, inserting copies/loads. In the case 1259 // of tail call optimization, arguments are handled later. 1260 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1261 i != e; 1262 ++i, ++realArgIdx) { 1263 CCValAssign &VA = ArgLocs[i]; 1264 SDValue Arg = OutVals[realArgIdx]; 1265 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1266 bool isByVal = Flags.isByVal(); 1267 1268 // Promote the value if needed. 1269 switch (VA.getLocInfo()) { 1270 default: llvm_unreachable("Unknown loc info!"); 1271 case CCValAssign::Full: break; 1272 case CCValAssign::SExt: 1273 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1274 break; 1275 case CCValAssign::ZExt: 1276 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1277 break; 1278 case CCValAssign::AExt: 1279 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1280 break; 1281 case CCValAssign::BCvt: 1282 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1283 break; 1284 } 1285 1286 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1287 if (VA.needsCustom()) { 1288 if (VA.getLocVT() == MVT::v2f64) { 1289 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1290 DAG.getConstant(0, MVT::i32)); 1291 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1292 DAG.getConstant(1, MVT::i32)); 1293 1294 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1295 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1296 1297 VA = ArgLocs[++i]; // skip ahead to next loc 1298 if (VA.isRegLoc()) { 1299 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1300 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1301 } else { 1302 assert(VA.isMemLoc()); 1303 1304 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1305 dl, DAG, VA, Flags)); 1306 } 1307 } else { 1308 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1309 StackPtr, MemOpChains, Flags); 1310 } 1311 } else if (VA.isRegLoc()) { 1312 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1313 } else if (isByVal) { 1314 assert(VA.isMemLoc()); 1315 unsigned offset = 0; 1316 1317 // True if this byval aggregate will be split between registers 1318 // and memory. 1319 if (CCInfo.isFirstByValRegValid()) { 1320 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1321 unsigned int i, j; 1322 for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { 1323 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1324 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1325 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1326 MachinePointerInfo(), 1327 false, false, 0); 1328 MemOpChains.push_back(Load.getValue(1)); 1329 RegsToPass.push_back(std::make_pair(j, Load)); 1330 } 1331 offset = ARM::R4 - CCInfo.getFirstByValReg(); 1332 CCInfo.clearFirstByValReg(); 1333 } 1334 1335 unsigned LocMemOffset = VA.getLocMemOffset(); 1336 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1337 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1338 StkPtrOff); 1339 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1340 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1341 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1342 MVT::i32); 1343 // TODO: Disable AlwaysInline when it becomes possible 1344 // to emit a nested call sequence. 1345 MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, 1346 Flags.getByValAlign(), 1347 /*isVolatile=*/false, 1348 /*AlwaysInline=*/true, 1349 MachinePointerInfo(0), 1350 MachinePointerInfo(0))); 1351 1352 } else if (!IsSibCall) { 1353 assert(VA.isMemLoc()); 1354 1355 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1356 dl, DAG, VA, Flags)); 1357 } 1358 } 1359 1360 if (!MemOpChains.empty()) 1361 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1362 &MemOpChains[0], MemOpChains.size()); 1363 1364 // Build a sequence of copy-to-reg nodes chained together with token chain 1365 // and flag operands which copy the outgoing args into the appropriate regs. 1366 SDValue InFlag; 1367 // Tail call byval lowering might overwrite argument registers so in case of 1368 // tail call optimization the copies to registers are lowered later. 1369 if (!isTailCall) 1370 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1371 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1372 RegsToPass[i].second, InFlag); 1373 InFlag = Chain.getValue(1); 1374 } 1375 1376 // For tail calls lower the arguments to the 'real' stack slot. 1377 if (isTailCall) { 1378 // Force all the incoming stack arguments to be loaded from the stack 1379 // before any new outgoing arguments are stored to the stack, because the 1380 // outgoing stack slots may alias the incoming argument stack slots, and 1381 // the alias isn't otherwise explicit. This is slightly more conservative 1382 // than necessary, because it means that each store effectively depends 1383 // on every argument instead of just those arguments it would clobber. 1384 1385 // Do not flag preceding copytoreg stuff together with the following stuff. 1386 InFlag = SDValue(); 1387 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1388 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1389 RegsToPass[i].second, InFlag); 1390 InFlag = Chain.getValue(1); 1391 } 1392 InFlag =SDValue(); 1393 } 1394 1395 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1396 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1397 // node so that legalize doesn't hack it. 1398 bool isDirect = false; 1399 bool isARMFunc = false; 1400 bool isLocalARMFunc = false; 1401 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1402 1403 if (EnableARMLongCalls) { 1404 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1405 && "long-calls with non-static relocation model!"); 1406 // Handle a global address or an external symbol. If it's not one of 1407 // those, the target's already in a register, so we don't need to do 1408 // anything extra. 1409 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1410 const GlobalValue *GV = G->getGlobal(); 1411 // Create a constant pool entry for the callee address 1412 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1413 ARMConstantPoolValue *CPV = 1414 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1415 1416 // Get the address of the callee into a register 1417 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1418 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1419 Callee = DAG.getLoad(getPointerTy(), dl, 1420 DAG.getEntryNode(), CPAddr, 1421 MachinePointerInfo::getConstantPool(), 1422 false, false, 0); 1423 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1424 const char *Sym = S->getSymbol(); 1425 1426 // Create a constant pool entry for the callee address 1427 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1428 ARMConstantPoolValue *CPV = 1429 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1430 ARMPCLabelIndex, 0); 1431 // Get the address of the callee into a register 1432 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1433 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1434 Callee = DAG.getLoad(getPointerTy(), dl, 1435 DAG.getEntryNode(), CPAddr, 1436 MachinePointerInfo::getConstantPool(), 1437 false, false, 0); 1438 } 1439 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1440 const GlobalValue *GV = G->getGlobal(); 1441 isDirect = true; 1442 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1443 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1444 getTargetMachine().getRelocationModel() != Reloc::Static; 1445 isARMFunc = !Subtarget->isThumb() || isStub; 1446 // ARM call to a local ARM function is predicable. 1447 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1448 // tBX takes a register source operand. 1449 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1450 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1451 ARMConstantPoolValue *CPV = 1452 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1453 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1454 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1455 Callee = DAG.getLoad(getPointerTy(), dl, 1456 DAG.getEntryNode(), CPAddr, 1457 MachinePointerInfo::getConstantPool(), 1458 false, false, 0); 1459 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1460 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1461 getPointerTy(), Callee, PICLabel); 1462 } else { 1463 // On ELF targets for PIC code, direct calls should go through the PLT 1464 unsigned OpFlags = 0; 1465 if (Subtarget->isTargetELF() && 1466 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1467 OpFlags = ARMII::MO_PLT; 1468 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1469 } 1470 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1471 isDirect = true; 1472 bool isStub = Subtarget->isTargetDarwin() && 1473 getTargetMachine().getRelocationModel() != Reloc::Static; 1474 isARMFunc = !Subtarget->isThumb() || isStub; 1475 // tBX takes a register source operand. 1476 const char *Sym = S->getSymbol(); 1477 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1478 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1479 ARMConstantPoolValue *CPV = 1480 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1481 ARMPCLabelIndex, 4); 1482 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1483 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1484 Callee = DAG.getLoad(getPointerTy(), dl, 1485 DAG.getEntryNode(), CPAddr, 1486 MachinePointerInfo::getConstantPool(), 1487 false, false, 0); 1488 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1489 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1490 getPointerTy(), Callee, PICLabel); 1491 } else { 1492 unsigned OpFlags = 0; 1493 // On ELF targets for PIC code, direct calls should go through the PLT 1494 if (Subtarget->isTargetELF() && 1495 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1496 OpFlags = ARMII::MO_PLT; 1497 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1498 } 1499 } 1500 1501 // FIXME: handle tail calls differently. 1502 unsigned CallOpc; 1503 if (Subtarget->isThumb()) { 1504 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1505 CallOpc = ARMISD::CALL_NOLINK; 1506 else 1507 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1508 } else { 1509 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1510 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1511 : ARMISD::CALL_NOLINK; 1512 } 1513 1514 std::vector<SDValue> Ops; 1515 Ops.push_back(Chain); 1516 Ops.push_back(Callee); 1517 1518 // Add argument registers to the end of the list so that they are known live 1519 // into the call. 1520 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1521 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1522 RegsToPass[i].second.getValueType())); 1523 1524 if (InFlag.getNode()) 1525 Ops.push_back(InFlag); 1526 1527 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1528 if (isTailCall) 1529 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1530 1531 // Returns a chain and a flag for retval copy to use. 1532 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1533 InFlag = Chain.getValue(1); 1534 1535 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1536 DAG.getIntPtrConstant(0, true), InFlag); 1537 if (!Ins.empty()) 1538 InFlag = Chain.getValue(1); 1539 1540 // Handle result values, copying them out of physregs into vregs that we 1541 // return. 1542 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1543 dl, DAG, InVals); 1544 } 1545 1546 /// HandleByVal - Every parameter *after* a byval parameter is passed 1547 /// on the stack. Remember the next parameter register to allocate, 1548 /// and then confiscate the rest of the parameter registers to insure 1549 /// this. 1550 void 1551 llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { 1552 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1553 assert((State->getCallOrPrologue() == Prologue || 1554 State->getCallOrPrologue() == Call) && 1555 "unhandled ParmContext"); 1556 if ((!State->isFirstByValRegValid()) && 1557 (ARM::R0 <= reg) && (reg <= ARM::R3)) { 1558 State->setFirstByValReg(reg); 1559 // At a call site, a byval parameter that is split between 1560 // registers and memory needs its size truncated here. In a 1561 // function prologue, such byval parameters are reassembled in 1562 // memory, and are not truncated. 1563 if (State->getCallOrPrologue() == Call) { 1564 unsigned excess = 4 * (ARM::R4 - reg); 1565 assert(size >= excess && "expected larger existing stack allocation"); 1566 size -= excess; 1567 } 1568 } 1569 // Confiscate any remaining parameter registers to preclude their 1570 // assignment to subsequent parameters. 1571 while (State->AllocateReg(GPRArgRegs, 4)) 1572 ; 1573 } 1574 1575 /// MatchingStackOffset - Return true if the given stack call argument is 1576 /// already available in the same position (relatively) of the caller's 1577 /// incoming argument stack. 1578 static 1579 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1580 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1581 const ARMInstrInfo *TII) { 1582 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1583 int FI = INT_MAX; 1584 if (Arg.getOpcode() == ISD::CopyFromReg) { 1585 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1586 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1587 return false; 1588 MachineInstr *Def = MRI->getVRegDef(VR); 1589 if (!Def) 1590 return false; 1591 if (!Flags.isByVal()) { 1592 if (!TII->isLoadFromStackSlot(Def, FI)) 1593 return false; 1594 } else { 1595 return false; 1596 } 1597 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1598 if (Flags.isByVal()) 1599 // ByVal argument is passed in as a pointer but it's now being 1600 // dereferenced. e.g. 1601 // define @foo(%struct.X* %A) { 1602 // tail call @bar(%struct.X* byval %A) 1603 // } 1604 return false; 1605 SDValue Ptr = Ld->getBasePtr(); 1606 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1607 if (!FINode) 1608 return false; 1609 FI = FINode->getIndex(); 1610 } else 1611 return false; 1612 1613 assert(FI != INT_MAX); 1614 if (!MFI->isFixedObjectIndex(FI)) 1615 return false; 1616 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1617 } 1618 1619 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 1620 /// for tail call optimization. Targets which want to do tail call 1621 /// optimization should implement this function. 1622 bool 1623 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1624 CallingConv::ID CalleeCC, 1625 bool isVarArg, 1626 bool isCalleeStructRet, 1627 bool isCallerStructRet, 1628 const SmallVectorImpl<ISD::OutputArg> &Outs, 1629 const SmallVectorImpl<SDValue> &OutVals, 1630 const SmallVectorImpl<ISD::InputArg> &Ins, 1631 SelectionDAG& DAG) const { 1632 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1633 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1634 bool CCMatch = CallerCC == CalleeCC; 1635 1636 // Look for obvious safe cases to perform tail call optimization that do not 1637 // require ABI changes. This is what gcc calls sibcall. 1638 1639 // Do not sibcall optimize vararg calls unless the call site is not passing 1640 // any arguments. 1641 if (isVarArg && !Outs.empty()) 1642 return false; 1643 1644 // Also avoid sibcall optimization if either caller or callee uses struct 1645 // return semantics. 1646 if (isCalleeStructRet || isCallerStructRet) 1647 return false; 1648 1649 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1650 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1651 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1652 // support in the assembler and linker to be used. This would need to be 1653 // fixed to fully support tail calls in Thumb1. 1654 // 1655 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1656 // LR. This means if we need to reload LR, it takes an extra instructions, 1657 // which outweighs the value of the tail call; but here we don't know yet 1658 // whether LR is going to be used. Probably the right approach is to 1659 // generate the tail call here and turn it back into CALL/RET in 1660 // emitEpilogue if LR is used. 1661 1662 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1663 // but we need to make sure there are enough registers; the only valid 1664 // registers are the 4 used for parameters. We don't currently do this 1665 // case. 1666 if (Subtarget->isThumb1Only()) 1667 return false; 1668 1669 // If the calling conventions do not match, then we'd better make sure the 1670 // results are returned in the same way as what the caller expects. 1671 if (!CCMatch) { 1672 SmallVector<CCValAssign, 16> RVLocs1; 1673 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1674 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1675 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1676 1677 SmallVector<CCValAssign, 16> RVLocs2; 1678 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1679 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1680 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1681 1682 if (RVLocs1.size() != RVLocs2.size()) 1683 return false; 1684 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1685 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1686 return false; 1687 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1688 return false; 1689 if (RVLocs1[i].isRegLoc()) { 1690 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1691 return false; 1692 } else { 1693 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1694 return false; 1695 } 1696 } 1697 } 1698 1699 // If the callee takes no arguments then go on to check the results of the 1700 // call. 1701 if (!Outs.empty()) { 1702 // Check if stack adjustment is needed. For now, do not do this if any 1703 // argument is passed on the stack. 1704 SmallVector<CCValAssign, 16> ArgLocs; 1705 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 1706 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1707 CCInfo.AnalyzeCallOperands(Outs, 1708 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1709 if (CCInfo.getNextStackOffset()) { 1710 MachineFunction &MF = DAG.getMachineFunction(); 1711 1712 // Check if the arguments are already laid out in the right way as 1713 // the caller's fixed stack objects. 1714 MachineFrameInfo *MFI = MF.getFrameInfo(); 1715 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1716 const ARMInstrInfo *TII = 1717 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1718 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1719 i != e; 1720 ++i, ++realArgIdx) { 1721 CCValAssign &VA = ArgLocs[i]; 1722 EVT RegVT = VA.getLocVT(); 1723 SDValue Arg = OutVals[realArgIdx]; 1724 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1725 if (VA.getLocInfo() == CCValAssign::Indirect) 1726 return false; 1727 if (VA.needsCustom()) { 1728 // f64 and vector types are split into multiple registers or 1729 // register/stack-slot combinations. The types will not match 1730 // the registers; give up on memory f64 refs until we figure 1731 // out what to do about this. 1732 if (!VA.isRegLoc()) 1733 return false; 1734 if (!ArgLocs[++i].isRegLoc()) 1735 return false; 1736 if (RegVT == MVT::v2f64) { 1737 if (!ArgLocs[++i].isRegLoc()) 1738 return false; 1739 if (!ArgLocs[++i].isRegLoc()) 1740 return false; 1741 } 1742 } else if (!VA.isRegLoc()) { 1743 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1744 MFI, MRI, TII)) 1745 return false; 1746 } 1747 } 1748 } 1749 } 1750 1751 return true; 1752 } 1753 1754 SDValue 1755 ARMTargetLowering::LowerReturn(SDValue Chain, 1756 CallingConv::ID CallConv, bool isVarArg, 1757 const SmallVectorImpl<ISD::OutputArg> &Outs, 1758 const SmallVectorImpl<SDValue> &OutVals, 1759 DebugLoc dl, SelectionDAG &DAG) const { 1760 1761 // CCValAssign - represent the assignment of the return value to a location. 1762 SmallVector<CCValAssign, 16> RVLocs; 1763 1764 // CCState - Info about the registers and stack slots. 1765 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1766 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1767 1768 // Analyze outgoing return values. 1769 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1770 isVarArg)); 1771 1772 // If this is the first return lowered for this function, add 1773 // the regs to the liveout set for the function. 1774 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1775 for (unsigned i = 0; i != RVLocs.size(); ++i) 1776 if (RVLocs[i].isRegLoc()) 1777 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1778 } 1779 1780 SDValue Flag; 1781 1782 // Copy the result values into the output registers. 1783 for (unsigned i = 0, realRVLocIdx = 0; 1784 i != RVLocs.size(); 1785 ++i, ++realRVLocIdx) { 1786 CCValAssign &VA = RVLocs[i]; 1787 assert(VA.isRegLoc() && "Can only return in registers!"); 1788 1789 SDValue Arg = OutVals[realRVLocIdx]; 1790 1791 switch (VA.getLocInfo()) { 1792 default: llvm_unreachable("Unknown loc info!"); 1793 case CCValAssign::Full: break; 1794 case CCValAssign::BCvt: 1795 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1796 break; 1797 } 1798 1799 if (VA.needsCustom()) { 1800 if (VA.getLocVT() == MVT::v2f64) { 1801 // Extract the first half and return it in two registers. 1802 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1803 DAG.getConstant(0, MVT::i32)); 1804 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1805 DAG.getVTList(MVT::i32, MVT::i32), Half); 1806 1807 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1808 Flag = Chain.getValue(1); 1809 VA = RVLocs[++i]; // skip ahead to next loc 1810 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1811 HalfGPRs.getValue(1), Flag); 1812 Flag = Chain.getValue(1); 1813 VA = RVLocs[++i]; // skip ahead to next loc 1814 1815 // Extract the 2nd half and fall through to handle it as an f64 value. 1816 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1817 DAG.getConstant(1, MVT::i32)); 1818 } 1819 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1820 // available. 1821 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1822 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1823 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1824 Flag = Chain.getValue(1); 1825 VA = RVLocs[++i]; // skip ahead to next loc 1826 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1827 Flag); 1828 } else 1829 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1830 1831 // Guarantee that all emitted copies are 1832 // stuck together, avoiding something bad. 1833 Flag = Chain.getValue(1); 1834 } 1835 1836 SDValue result; 1837 if (Flag.getNode()) 1838 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1839 else // Return Void 1840 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1841 1842 return result; 1843 } 1844 1845 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { 1846 if (N->getNumValues() != 1) 1847 return false; 1848 if (!N->hasNUsesOfValue(1, 0)) 1849 return false; 1850 1851 unsigned NumCopies = 0; 1852 SDNode* Copies[2]; 1853 SDNode *Use = *N->use_begin(); 1854 if (Use->getOpcode() == ISD::CopyToReg) { 1855 Copies[NumCopies++] = Use; 1856 } else if (Use->getOpcode() == ARMISD::VMOVRRD) { 1857 // f64 returned in a pair of GPRs. 1858 for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); 1859 UI != UE; ++UI) { 1860 if (UI->getOpcode() != ISD::CopyToReg) 1861 return false; 1862 Copies[UI.getUse().getResNo()] = *UI; 1863 ++NumCopies; 1864 } 1865 } else if (Use->getOpcode() == ISD::BITCAST) { 1866 // f32 returned in a single GPR. 1867 if (!Use->hasNUsesOfValue(1, 0)) 1868 return false; 1869 Use = *Use->use_begin(); 1870 if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) 1871 return false; 1872 Copies[NumCopies++] = Use; 1873 } else { 1874 return false; 1875 } 1876 1877 if (NumCopies != 1 && NumCopies != 2) 1878 return false; 1879 1880 bool HasRet = false; 1881 for (unsigned i = 0; i < NumCopies; ++i) { 1882 SDNode *Copy = Copies[i]; 1883 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 1884 UI != UE; ++UI) { 1885 if (UI->getOpcode() == ISD::CopyToReg) { 1886 SDNode *Use = *UI; 1887 if (Use == Copies[0] || Use == Copies[1]) 1888 continue; 1889 return false; 1890 } 1891 if (UI->getOpcode() != ARMISD::RET_FLAG) 1892 return false; 1893 HasRet = true; 1894 } 1895 } 1896 1897 return HasRet; 1898 } 1899 1900 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 1901 if (!EnableARMTailCalls) 1902 return false; 1903 1904 if (!CI->isTailCall()) 1905 return false; 1906 1907 return !Subtarget->isThumb1Only(); 1908 } 1909 1910 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1911 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1912 // one of the above mentioned nodes. It has to be wrapped because otherwise 1913 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1914 // be used to form addressing mode. These wrapped nodes will be selected 1915 // into MOVi. 1916 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1917 EVT PtrVT = Op.getValueType(); 1918 // FIXME there is no actual debug info here 1919 DebugLoc dl = Op.getDebugLoc(); 1920 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1921 SDValue Res; 1922 if (CP->isMachineConstantPoolEntry()) 1923 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1924 CP->getAlignment()); 1925 else 1926 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1927 CP->getAlignment()); 1928 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1929 } 1930 1931 unsigned ARMTargetLowering::getJumpTableEncoding() const { 1932 return MachineJumpTableInfo::EK_Inline; 1933 } 1934 1935 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1936 SelectionDAG &DAG) const { 1937 MachineFunction &MF = DAG.getMachineFunction(); 1938 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1939 unsigned ARMPCLabelIndex = 0; 1940 DebugLoc DL = Op.getDebugLoc(); 1941 EVT PtrVT = getPointerTy(); 1942 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1943 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1944 SDValue CPAddr; 1945 if (RelocM == Reloc::Static) { 1946 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1947 } else { 1948 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1949 ARMPCLabelIndex = AFI->createPICLabelUId(); 1950 ARMConstantPoolValue *CPV = 1951 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 1952 ARMCP::CPBlockAddress, PCAdj); 1953 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1954 } 1955 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1956 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1957 MachinePointerInfo::getConstantPool(), 1958 false, false, 0); 1959 if (RelocM == Reloc::Static) 1960 return Result; 1961 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1962 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1963 } 1964 1965 // Lower ISD::GlobalTLSAddress using the "general dynamic" model 1966 SDValue 1967 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1968 SelectionDAG &DAG) const { 1969 DebugLoc dl = GA->getDebugLoc(); 1970 EVT PtrVT = getPointerTy(); 1971 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1972 MachineFunction &MF = DAG.getMachineFunction(); 1973 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1974 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1975 ARMConstantPoolValue *CPV = 1976 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 1977 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 1978 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1979 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1980 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1981 MachinePointerInfo::getConstantPool(), 1982 false, false, 0); 1983 SDValue Chain = Argument.getValue(1); 1984 1985 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1986 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1987 1988 // call __tls_get_addr. 1989 ArgListTy Args; 1990 ArgListEntry Entry; 1991 Entry.Node = Argument; 1992 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 1993 Args.push_back(Entry); 1994 // FIXME: is there useful debug info available here? 1995 std::pair<SDValue, SDValue> CallResult = 1996 LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), 1997 false, false, false, false, 1998 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1999 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2000 return CallResult.first; 2001 } 2002 2003 // Lower ISD::GlobalTLSAddress using the "initial exec" or 2004 // "local exec" model. 2005 SDValue 2006 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2007 SelectionDAG &DAG) const { 2008 const GlobalValue *GV = GA->getGlobal(); 2009 DebugLoc dl = GA->getDebugLoc(); 2010 SDValue Offset; 2011 SDValue Chain = DAG.getEntryNode(); 2012 EVT PtrVT = getPointerTy(); 2013 // Get the Thread Pointer 2014 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2015 2016 if (GV->isDeclaration()) { 2017 MachineFunction &MF = DAG.getMachineFunction(); 2018 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2019 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2020 // Initial exec model. 2021 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2022 ARMConstantPoolValue *CPV = 2023 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2024 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2025 true); 2026 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2027 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2028 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2029 MachinePointerInfo::getConstantPool(), 2030 false, false, 0); 2031 Chain = Offset.getValue(1); 2032 2033 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2034 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2035 2036 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2037 MachinePointerInfo::getConstantPool(), 2038 false, false, 0); 2039 } else { 2040 // local exec model 2041 ARMConstantPoolValue *CPV = 2042 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2043 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2044 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2045 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2046 MachinePointerInfo::getConstantPool(), 2047 false, false, 0); 2048 } 2049 2050 // The address of the thread local variable is the add of the thread 2051 // pointer with the offset of the variable. 2052 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2053 } 2054 2055 SDValue 2056 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2057 // TODO: implement the "local dynamic" model 2058 assert(Subtarget->isTargetELF() && 2059 "TLS not implemented for non-ELF targets"); 2060 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2061 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 2062 // otherwise use the "Local Exec" TLS Model 2063 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2064 return LowerToTLSGeneralDynamicModel(GA, DAG); 2065 else 2066 return LowerToTLSExecModels(GA, DAG); 2067 } 2068 2069 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2070 SelectionDAG &DAG) const { 2071 EVT PtrVT = getPointerTy(); 2072 DebugLoc dl = Op.getDebugLoc(); 2073 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2074 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2075 if (RelocM == Reloc::PIC_) { 2076 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2077 ARMConstantPoolValue *CPV = 2078 ARMConstantPoolConstant::Create(GV, 2079 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2080 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2081 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2082 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2083 CPAddr, 2084 MachinePointerInfo::getConstantPool(), 2085 false, false, 0); 2086 SDValue Chain = Result.getValue(1); 2087 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2088 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2089 if (!UseGOTOFF) 2090 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2091 MachinePointerInfo::getGOT(), false, false, 0); 2092 return Result; 2093 } 2094 2095 // If we have T2 ops, we can materialize the address directly via movt/movw 2096 // pair. This is always cheaper. 2097 if (Subtarget->useMovt()) { 2098 ++NumMovwMovt; 2099 // FIXME: Once remat is capable of dealing with instructions with register 2100 // operands, expand this into two nodes. 2101 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2102 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2103 } else { 2104 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2105 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2106 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2107 MachinePointerInfo::getConstantPool(), 2108 false, false, 0); 2109 } 2110 } 2111 2112 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2113 SelectionDAG &DAG) const { 2114 EVT PtrVT = getPointerTy(); 2115 DebugLoc dl = Op.getDebugLoc(); 2116 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2117 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2118 MachineFunction &MF = DAG.getMachineFunction(); 2119 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2120 2121 // FIXME: Enable this for static codegen when tool issues are fixed. 2122 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2123 ++NumMovwMovt; 2124 // FIXME: Once remat is capable of dealing with instructions with register 2125 // operands, expand this into two nodes. 2126 if (RelocM == Reloc::Static) 2127 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2128 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2129 2130 unsigned Wrapper = (RelocM == Reloc::PIC_) 2131 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2132 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2133 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2134 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2135 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2136 MachinePointerInfo::getGOT(), false, false, 0); 2137 return Result; 2138 } 2139 2140 unsigned ARMPCLabelIndex = 0; 2141 SDValue CPAddr; 2142 if (RelocM == Reloc::Static) { 2143 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2144 } else { 2145 ARMPCLabelIndex = AFI->createPICLabelUId(); 2146 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2147 ARMConstantPoolValue *CPV = 2148 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2149 PCAdj); 2150 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2151 } 2152 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2153 2154 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2155 MachinePointerInfo::getConstantPool(), 2156 false, false, 0); 2157 SDValue Chain = Result.getValue(1); 2158 2159 if (RelocM == Reloc::PIC_) { 2160 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2161 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2162 } 2163 2164 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2165 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2166 false, false, 0); 2167 2168 return Result; 2169 } 2170 2171 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2172 SelectionDAG &DAG) const { 2173 assert(Subtarget->isTargetELF() && 2174 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2175 MachineFunction &MF = DAG.getMachineFunction(); 2176 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2177 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2178 EVT PtrVT = getPointerTy(); 2179 DebugLoc dl = Op.getDebugLoc(); 2180 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2181 ARMConstantPoolValue *CPV = 2182 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2183 ARMPCLabelIndex, PCAdj); 2184 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2185 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2186 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2187 MachinePointerInfo::getConstantPool(), 2188 false, false, 0); 2189 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2190 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2191 } 2192 2193 SDValue 2194 ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) 2195 const { 2196 DebugLoc dl = Op.getDebugLoc(); 2197 return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, 2198 Op.getOperand(0), Op.getOperand(1)); 2199 } 2200 2201 SDValue 2202 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2203 DebugLoc dl = Op.getDebugLoc(); 2204 SDValue Val = DAG.getConstant(0, MVT::i32); 2205 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 2206 Op.getOperand(1), Val); 2207 } 2208 2209 SDValue 2210 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2211 DebugLoc dl = Op.getDebugLoc(); 2212 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2213 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2214 } 2215 2216 SDValue 2217 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2218 const ARMSubtarget *Subtarget) const { 2219 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2220 DebugLoc dl = Op.getDebugLoc(); 2221 switch (IntNo) { 2222 default: return SDValue(); // Don't custom lower most intrinsics. 2223 case Intrinsic::arm_thread_pointer: { 2224 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2225 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2226 } 2227 case Intrinsic::eh_sjlj_lsda: { 2228 MachineFunction &MF = DAG.getMachineFunction(); 2229 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2230 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2231 EVT PtrVT = getPointerTy(); 2232 DebugLoc dl = Op.getDebugLoc(); 2233 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2234 SDValue CPAddr; 2235 unsigned PCAdj = (RelocM != Reloc::PIC_) 2236 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2237 ARMConstantPoolValue *CPV = 2238 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2239 ARMCP::CPLSDA, PCAdj); 2240 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2241 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2242 SDValue Result = 2243 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2244 MachinePointerInfo::getConstantPool(), 2245 false, false, 0); 2246 2247 if (RelocM == Reloc::PIC_) { 2248 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2249 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2250 } 2251 return Result; 2252 } 2253 case Intrinsic::arm_neon_vmulls: 2254 case Intrinsic::arm_neon_vmullu: { 2255 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2256 ? ARMISD::VMULLs : ARMISD::VMULLu; 2257 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2258 Op.getOperand(1), Op.getOperand(2)); 2259 } 2260 } 2261 } 2262 2263 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2264 const ARMSubtarget *Subtarget) { 2265 DebugLoc dl = Op.getDebugLoc(); 2266 if (!Subtarget->hasDataBarrier()) { 2267 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2268 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2269 // here. 2270 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2271 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2272 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2273 DAG.getConstant(0, MVT::i32)); 2274 } 2275 2276 SDValue Op5 = Op.getOperand(5); 2277 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2278 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2279 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2280 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2281 2282 ARM_MB::MemBOpt DMBOpt; 2283 if (isDeviceBarrier) 2284 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2285 else 2286 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2287 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2288 DAG.getConstant(DMBOpt, MVT::i32)); 2289 } 2290 2291 2292 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2293 const ARMSubtarget *Subtarget) { 2294 // FIXME: handle "fence singlethread" more efficiently. 2295 DebugLoc dl = Op.getDebugLoc(); 2296 if (!Subtarget->hasDataBarrier()) { 2297 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2298 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2299 // here. 2300 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2301 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2302 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2303 DAG.getConstant(0, MVT::i32)); 2304 } 2305 2306 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2307 DAG.getConstant(ARM_MB::ISH, MVT::i32)); 2308 } 2309 2310 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2311 const ARMSubtarget *Subtarget) { 2312 // ARM pre v5TE and Thumb1 does not have preload instructions. 2313 if (!(Subtarget->isThumb2() || 2314 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2315 // Just preserve the chain. 2316 return Op.getOperand(0); 2317 2318 DebugLoc dl = Op.getDebugLoc(); 2319 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2320 if (!isRead && 2321 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2322 // ARMv7 with MP extension has PLDW. 2323 return Op.getOperand(0); 2324 2325 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2326 if (Subtarget->isThumb()) { 2327 // Invert the bits. 2328 isRead = ~isRead & 1; 2329 isData = ~isData & 1; 2330 } 2331 2332 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2333 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2334 DAG.getConstant(isData, MVT::i32)); 2335 } 2336 2337 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2338 MachineFunction &MF = DAG.getMachineFunction(); 2339 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2340 2341 // vastart just stores the address of the VarArgsFrameIndex slot into the 2342 // memory location argument. 2343 DebugLoc dl = Op.getDebugLoc(); 2344 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2345 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2346 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2347 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2348 MachinePointerInfo(SV), false, false, 0); 2349 } 2350 2351 SDValue 2352 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2353 SDValue &Root, SelectionDAG &DAG, 2354 DebugLoc dl) const { 2355 MachineFunction &MF = DAG.getMachineFunction(); 2356 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2357 2358 TargetRegisterClass *RC; 2359 if (AFI->isThumb1OnlyFunction()) 2360 RC = ARM::tGPRRegisterClass; 2361 else 2362 RC = ARM::GPRRegisterClass; 2363 2364 // Transform the arguments stored in physical registers into virtual ones. 2365 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2366 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2367 2368 SDValue ArgValue2; 2369 if (NextVA.isMemLoc()) { 2370 MachineFrameInfo *MFI = MF.getFrameInfo(); 2371 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2372 2373 // Create load node to retrieve arguments from the stack. 2374 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2375 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2376 MachinePointerInfo::getFixedStack(FI), 2377 false, false, 0); 2378 } else { 2379 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2380 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2381 } 2382 2383 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2384 } 2385 2386 void 2387 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2388 unsigned &VARegSize, unsigned &VARegSaveSize) 2389 const { 2390 unsigned NumGPRs; 2391 if (CCInfo.isFirstByValRegValid()) 2392 NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); 2393 else { 2394 unsigned int firstUnalloced; 2395 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2396 sizeof(GPRArgRegs) / 2397 sizeof(GPRArgRegs[0])); 2398 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2399 } 2400 2401 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2402 VARegSize = NumGPRs * 4; 2403 VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2404 } 2405 2406 // The remaining GPRs hold either the beginning of variable-argument 2407 // data, or the beginning of an aggregate passed by value (usuall 2408 // byval). Either way, we allocate stack slots adjacent to the data 2409 // provided by our caller, and store the unallocated registers there. 2410 // If this is a variadic function, the va_list pointer will begin with 2411 // these values; otherwise, this reassembles a (byval) structure that 2412 // was split between registers and memory. 2413 void 2414 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2415 DebugLoc dl, SDValue &Chain, 2416 unsigned ArgOffset) const { 2417 MachineFunction &MF = DAG.getMachineFunction(); 2418 MachineFrameInfo *MFI = MF.getFrameInfo(); 2419 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2420 unsigned firstRegToSaveIndex; 2421 if (CCInfo.isFirstByValRegValid()) 2422 firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; 2423 else { 2424 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2425 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2426 } 2427 2428 unsigned VARegSize, VARegSaveSize; 2429 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2430 if (VARegSaveSize) { 2431 // If this function is vararg, store any remaining integer argument regs 2432 // to their spots on the stack so that they may be loaded by deferencing 2433 // the result of va_next. 2434 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2435 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, 2436 ArgOffset + VARegSaveSize 2437 - VARegSize, 2438 false)); 2439 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2440 getPointerTy()); 2441 2442 SmallVector<SDValue, 4> MemOps; 2443 for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { 2444 TargetRegisterClass *RC; 2445 if (AFI->isThumb1OnlyFunction()) 2446 RC = ARM::tGPRRegisterClass; 2447 else 2448 RC = ARM::GPRRegisterClass; 2449 2450 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2451 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2452 SDValue Store = 2453 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2454 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2455 false, false, 0); 2456 MemOps.push_back(Store); 2457 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2458 DAG.getConstant(4, getPointerTy())); 2459 } 2460 if (!MemOps.empty()) 2461 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2462 &MemOps[0], MemOps.size()); 2463 } else 2464 // This will point to the next argument passed via stack. 2465 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2466 } 2467 2468 SDValue 2469 ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2470 CallingConv::ID CallConv, bool isVarArg, 2471 const SmallVectorImpl<ISD::InputArg> 2472 &Ins, 2473 DebugLoc dl, SelectionDAG &DAG, 2474 SmallVectorImpl<SDValue> &InVals) 2475 const { 2476 MachineFunction &MF = DAG.getMachineFunction(); 2477 MachineFrameInfo *MFI = MF.getFrameInfo(); 2478 2479 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2480 2481 // Assign locations to all of the incoming arguments. 2482 SmallVector<CCValAssign, 16> ArgLocs; 2483 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2484 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2485 CCInfo.AnalyzeFormalArguments(Ins, 2486 CCAssignFnForNode(CallConv, /* Return*/ false, 2487 isVarArg)); 2488 2489 SmallVector<SDValue, 16> ArgValues; 2490 int lastInsIndex = -1; 2491 2492 SDValue ArgValue; 2493 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2494 CCValAssign &VA = ArgLocs[i]; 2495 2496 // Arguments stored in registers. 2497 if (VA.isRegLoc()) { 2498 EVT RegVT = VA.getLocVT(); 2499 2500 if (VA.needsCustom()) { 2501 // f64 and vector types are split up into multiple registers or 2502 // combinations of registers and stack slots. 2503 if (VA.getLocVT() == MVT::v2f64) { 2504 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2505 Chain, DAG, dl); 2506 VA = ArgLocs[++i]; // skip ahead to next loc 2507 SDValue ArgValue2; 2508 if (VA.isMemLoc()) { 2509 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2510 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2511 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2512 MachinePointerInfo::getFixedStack(FI), 2513 false, false, 0); 2514 } else { 2515 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2516 Chain, DAG, dl); 2517 } 2518 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2519 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2520 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2521 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2522 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2523 } else 2524 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2525 2526 } else { 2527 TargetRegisterClass *RC; 2528 2529 if (RegVT == MVT::f32) 2530 RC = ARM::SPRRegisterClass; 2531 else if (RegVT == MVT::f64) 2532 RC = ARM::DPRRegisterClass; 2533 else if (RegVT == MVT::v2f64) 2534 RC = ARM::QPRRegisterClass; 2535 else if (RegVT == MVT::i32) 2536 RC = (AFI->isThumb1OnlyFunction() ? 2537 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2538 else 2539 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2540 2541 // Transform the arguments in physical registers into virtual ones. 2542 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2543 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2544 } 2545 2546 // If this is an 8 or 16-bit value, it is really passed promoted 2547 // to 32 bits. Insert an assert[sz]ext to capture this, then 2548 // truncate to the right size. 2549 switch (VA.getLocInfo()) { 2550 default: llvm_unreachable("Unknown loc info!"); 2551 case CCValAssign::Full: break; 2552 case CCValAssign::BCvt: 2553 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2554 break; 2555 case CCValAssign::SExt: 2556 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2557 DAG.getValueType(VA.getValVT())); 2558 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2559 break; 2560 case CCValAssign::ZExt: 2561 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2562 DAG.getValueType(VA.getValVT())); 2563 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2564 break; 2565 } 2566 2567 InVals.push_back(ArgValue); 2568 2569 } else { // VA.isRegLoc() 2570 2571 // sanity check 2572 assert(VA.isMemLoc()); 2573 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2574 2575 int index = ArgLocs[i].getValNo(); 2576 2577 // Some Ins[] entries become multiple ArgLoc[] entries. 2578 // Process them only once. 2579 if (index != lastInsIndex) 2580 { 2581 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2582 // FIXME: For now, all byval parameter objects are marked mutable. 2583 // This can be changed with more analysis. 2584 // In case of tail call optimization mark all arguments mutable. 2585 // Since they could be overwritten by lowering of arguments in case of 2586 // a tail call. 2587 if (Flags.isByVal()) { 2588 unsigned VARegSize, VARegSaveSize; 2589 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2590 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); 2591 unsigned Bytes = Flags.getByValSize() - VARegSize; 2592 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 2593 int FI = MFI->CreateFixedObject(Bytes, 2594 VA.getLocMemOffset(), false); 2595 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2596 } else { 2597 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2598 VA.getLocMemOffset(), true); 2599 2600 // Create load nodes to retrieve arguments from the stack. 2601 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2602 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2603 MachinePointerInfo::getFixedStack(FI), 2604 false, false, 0)); 2605 } 2606 lastInsIndex = index; 2607 } 2608 } 2609 } 2610 2611 // varargs 2612 if (isVarArg) 2613 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); 2614 2615 return Chain; 2616 } 2617 2618 /// isFloatingPointZero - Return true if this is +0.0. 2619 static bool isFloatingPointZero(SDValue Op) { 2620 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2621 return CFP->getValueAPF().isPosZero(); 2622 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2623 // Maybe this has already been legalized into the constant pool? 2624 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2625 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2626 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2627 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2628 return CFP->getValueAPF().isPosZero(); 2629 } 2630 } 2631 return false; 2632 } 2633 2634 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2635 /// the given operands. 2636 SDValue 2637 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2638 SDValue &ARMcc, SelectionDAG &DAG, 2639 DebugLoc dl) const { 2640 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2641 unsigned C = RHSC->getZExtValue(); 2642 if (!isLegalICmpImmediate(C)) { 2643 // Constant does not fit, try adjusting it by one? 2644 switch (CC) { 2645 default: break; 2646 case ISD::SETLT: 2647 case ISD::SETGE: 2648 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2649 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2650 RHS = DAG.getConstant(C-1, MVT::i32); 2651 } 2652 break; 2653 case ISD::SETULT: 2654 case ISD::SETUGE: 2655 if (C != 0 && isLegalICmpImmediate(C-1)) { 2656 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2657 RHS = DAG.getConstant(C-1, MVT::i32); 2658 } 2659 break; 2660 case ISD::SETLE: 2661 case ISD::SETGT: 2662 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2663 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2664 RHS = DAG.getConstant(C+1, MVT::i32); 2665 } 2666 break; 2667 case ISD::SETULE: 2668 case ISD::SETUGT: 2669 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2670 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2671 RHS = DAG.getConstant(C+1, MVT::i32); 2672 } 2673 break; 2674 } 2675 } 2676 } 2677 2678 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2679 ARMISD::NodeType CompareType; 2680 switch (CondCode) { 2681 default: 2682 CompareType = ARMISD::CMP; 2683 break; 2684 case ARMCC::EQ: 2685 case ARMCC::NE: 2686 // Uses only Z Flag 2687 CompareType = ARMISD::CMPZ; 2688 break; 2689 } 2690 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2691 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2692 } 2693 2694 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2695 SDValue 2696 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2697 DebugLoc dl) const { 2698 SDValue Cmp; 2699 if (!isFloatingPointZero(RHS)) 2700 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2701 else 2702 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2703 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2704 } 2705 2706 /// duplicateCmp - Glue values can have only one use, so this function 2707 /// duplicates a comparison node. 2708 SDValue 2709 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2710 unsigned Opc = Cmp.getOpcode(); 2711 DebugLoc DL = Cmp.getDebugLoc(); 2712 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2713 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2714 2715 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2716 Cmp = Cmp.getOperand(0); 2717 Opc = Cmp.getOpcode(); 2718 if (Opc == ARMISD::CMPFP) 2719 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2720 else { 2721 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2722 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2723 } 2724 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2725 } 2726 2727 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2728 SDValue Cond = Op.getOperand(0); 2729 SDValue SelectTrue = Op.getOperand(1); 2730 SDValue SelectFalse = Op.getOperand(2); 2731 DebugLoc dl = Op.getDebugLoc(); 2732 2733 // Convert: 2734 // 2735 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2736 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2737 // 2738 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2739 const ConstantSDNode *CMOVTrue = 2740 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2741 const ConstantSDNode *CMOVFalse = 2742 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2743 2744 if (CMOVTrue && CMOVFalse) { 2745 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2746 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2747 2748 SDValue True; 2749 SDValue False; 2750 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2751 True = SelectTrue; 2752 False = SelectFalse; 2753 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2754 True = SelectFalse; 2755 False = SelectTrue; 2756 } 2757 2758 if (True.getNode() && False.getNode()) { 2759 EVT VT = Op.getValueType(); 2760 SDValue ARMcc = Cond.getOperand(2); 2761 SDValue CCR = Cond.getOperand(3); 2762 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2763 assert(True.getValueType() == VT); 2764 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2765 } 2766 } 2767 } 2768 2769 return DAG.getSelectCC(dl, Cond, 2770 DAG.getConstant(0, Cond.getValueType()), 2771 SelectTrue, SelectFalse, ISD::SETNE); 2772 } 2773 2774 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2775 EVT VT = Op.getValueType(); 2776 SDValue LHS = Op.getOperand(0); 2777 SDValue RHS = Op.getOperand(1); 2778 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2779 SDValue TrueVal = Op.getOperand(2); 2780 SDValue FalseVal = Op.getOperand(3); 2781 DebugLoc dl = Op.getDebugLoc(); 2782 2783 if (LHS.getValueType() == MVT::i32) { 2784 SDValue ARMcc; 2785 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2786 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2787 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2788 } 2789 2790 ARMCC::CondCodes CondCode, CondCode2; 2791 FPCCToARMCC(CC, CondCode, CondCode2); 2792 2793 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2794 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2795 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2796 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2797 ARMcc, CCR, Cmp); 2798 if (CondCode2 != ARMCC::AL) { 2799 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2800 // FIXME: Needs another CMP because flag can have but one use. 2801 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2802 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2803 Result, TrueVal, ARMcc2, CCR, Cmp2); 2804 } 2805 return Result; 2806 } 2807 2808 /// canChangeToInt - Given the fp compare operand, return true if it is suitable 2809 /// to morph to an integer compare sequence. 2810 static bool canChangeToInt(SDValue Op, bool &SeenZero, 2811 const ARMSubtarget *Subtarget) { 2812 SDNode *N = Op.getNode(); 2813 if (!N->hasOneUse()) 2814 // Otherwise it requires moving the value from fp to integer registers. 2815 return false; 2816 if (!N->getNumValues()) 2817 return false; 2818 EVT VT = Op.getValueType(); 2819 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2820 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2821 // vmrs are very slow, e.g. cortex-a8. 2822 return false; 2823 2824 if (isFloatingPointZero(Op)) { 2825 SeenZero = true; 2826 return true; 2827 } 2828 return ISD::isNormalLoad(N); 2829 } 2830 2831 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2832 if (isFloatingPointZero(Op)) 2833 return DAG.getConstant(0, MVT::i32); 2834 2835 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2836 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2837 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2838 Ld->isVolatile(), Ld->isNonTemporal(), 2839 Ld->getAlignment()); 2840 2841 llvm_unreachable("Unknown VFP cmp argument!"); 2842 } 2843 2844 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2845 SDValue &RetVal1, SDValue &RetVal2) { 2846 if (isFloatingPointZero(Op)) { 2847 RetVal1 = DAG.getConstant(0, MVT::i32); 2848 RetVal2 = DAG.getConstant(0, MVT::i32); 2849 return; 2850 } 2851 2852 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2853 SDValue Ptr = Ld->getBasePtr(); 2854 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2855 Ld->getChain(), Ptr, 2856 Ld->getPointerInfo(), 2857 Ld->isVolatile(), Ld->isNonTemporal(), 2858 Ld->getAlignment()); 2859 2860 EVT PtrType = Ptr.getValueType(); 2861 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2862 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2863 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2864 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2865 Ld->getChain(), NewPtr, 2866 Ld->getPointerInfo().getWithOffset(4), 2867 Ld->isVolatile(), Ld->isNonTemporal(), 2868 NewAlign); 2869 return; 2870 } 2871 2872 llvm_unreachable("Unknown VFP cmp argument!"); 2873 } 2874 2875 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2876 /// f32 and even f64 comparisons to integer ones. 2877 SDValue 2878 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2879 SDValue Chain = Op.getOperand(0); 2880 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2881 SDValue LHS = Op.getOperand(2); 2882 SDValue RHS = Op.getOperand(3); 2883 SDValue Dest = Op.getOperand(4); 2884 DebugLoc dl = Op.getDebugLoc(); 2885 2886 bool SeenZero = false; 2887 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2888 canChangeToInt(RHS, SeenZero, Subtarget) && 2889 // If one of the operand is zero, it's safe to ignore the NaN case since 2890 // we only care about equality comparisons. 2891 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2892 // If unsafe fp math optimization is enabled and there are no other uses of 2893 // the CMP operands, and the condition code is EQ or NE, we can optimize it 2894 // to an integer comparison. 2895 if (CC == ISD::SETOEQ) 2896 CC = ISD::SETEQ; 2897 else if (CC == ISD::SETUNE) 2898 CC = ISD::SETNE; 2899 2900 SDValue ARMcc; 2901 if (LHS.getValueType() == MVT::f32) { 2902 LHS = bitcastf32Toi32(LHS, DAG); 2903 RHS = bitcastf32Toi32(RHS, DAG); 2904 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2905 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2906 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2907 Chain, Dest, ARMcc, CCR, Cmp); 2908 } 2909 2910 SDValue LHS1, LHS2; 2911 SDValue RHS1, RHS2; 2912 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2913 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2914 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2915 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2916 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2917 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2918 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2919 } 2920 2921 return SDValue(); 2922 } 2923 2924 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2925 SDValue Chain = Op.getOperand(0); 2926 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2927 SDValue LHS = Op.getOperand(2); 2928 SDValue RHS = Op.getOperand(3); 2929 SDValue Dest = Op.getOperand(4); 2930 DebugLoc dl = Op.getDebugLoc(); 2931 2932 if (LHS.getValueType() == MVT::i32) { 2933 SDValue ARMcc; 2934 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2935 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2936 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2937 Chain, Dest, ARMcc, CCR, Cmp); 2938 } 2939 2940 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2941 2942 if (UnsafeFPMath && 2943 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2944 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2945 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2946 if (Result.getNode()) 2947 return Result; 2948 } 2949 2950 ARMCC::CondCodes CondCode, CondCode2; 2951 FPCCToARMCC(CC, CondCode, CondCode2); 2952 2953 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2954 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2955 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2956 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2957 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2958 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2959 if (CondCode2 != ARMCC::AL) { 2960 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2961 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2962 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2963 } 2964 return Res; 2965 } 2966 2967 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2968 SDValue Chain = Op.getOperand(0); 2969 SDValue Table = Op.getOperand(1); 2970 SDValue Index = Op.getOperand(2); 2971 DebugLoc dl = Op.getDebugLoc(); 2972 2973 EVT PTy = getPointerTy(); 2974 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2975 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2976 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2977 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2978 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2979 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2980 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2981 if (Subtarget->isThumb2()) { 2982 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2983 // which does another jump to the destination. This also makes it easier 2984 // to translate it to TBB / TBH later. 2985 // FIXME: This might not work if the function is extremely large. 2986 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2987 Addr, Op.getOperand(2), JTI, UId); 2988 } 2989 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2990 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2991 MachinePointerInfo::getJumpTable(), 2992 false, false, 0); 2993 Chain = Addr.getValue(1); 2994 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2995 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2996 } else { 2997 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2998 MachinePointerInfo::getJumpTable(), false, false, 0); 2999 Chain = Addr.getValue(1); 3000 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3001 } 3002 } 3003 3004 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3005 DebugLoc dl = Op.getDebugLoc(); 3006 unsigned Opc; 3007 3008 switch (Op.getOpcode()) { 3009 default: 3010 assert(0 && "Invalid opcode!"); 3011 case ISD::FP_TO_SINT: 3012 Opc = ARMISD::FTOSI; 3013 break; 3014 case ISD::FP_TO_UINT: 3015 Opc = ARMISD::FTOUI; 3016 break; 3017 } 3018 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3019 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3020 } 3021 3022 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3023 EVT VT = Op.getValueType(); 3024 DebugLoc dl = Op.getDebugLoc(); 3025 3026 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3027 "Invalid type for custom lowering!"); 3028 if (VT != MVT::v4f32) 3029 return DAG.UnrollVectorOp(Op.getNode()); 3030 3031 unsigned CastOpc; 3032 unsigned Opc; 3033 switch (Op.getOpcode()) { 3034 default: 3035 assert(0 && "Invalid opcode!"); 3036 case ISD::SINT_TO_FP: 3037 CastOpc = ISD::SIGN_EXTEND; 3038 Opc = ISD::SINT_TO_FP; 3039 break; 3040 case ISD::UINT_TO_FP: 3041 CastOpc = ISD::ZERO_EXTEND; 3042 Opc = ISD::UINT_TO_FP; 3043 break; 3044 } 3045 3046 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3047 return DAG.getNode(Opc, dl, VT, Op); 3048 } 3049 3050 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3051 EVT VT = Op.getValueType(); 3052 if (VT.isVector()) 3053 return LowerVectorINT_TO_FP(Op, DAG); 3054 3055 DebugLoc dl = Op.getDebugLoc(); 3056 unsigned Opc; 3057 3058 switch (Op.getOpcode()) { 3059 default: 3060 assert(0 && "Invalid opcode!"); 3061 case ISD::SINT_TO_FP: 3062 Opc = ARMISD::SITOF; 3063 break; 3064 case ISD::UINT_TO_FP: 3065 Opc = ARMISD::UITOF; 3066 break; 3067 } 3068 3069 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3070 return DAG.getNode(Opc, dl, VT, Op); 3071 } 3072 3073 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3074 // Implement fcopysign with a fabs and a conditional fneg. 3075 SDValue Tmp0 = Op.getOperand(0); 3076 SDValue Tmp1 = Op.getOperand(1); 3077 DebugLoc dl = Op.getDebugLoc(); 3078 EVT VT = Op.getValueType(); 3079 EVT SrcVT = Tmp1.getValueType(); 3080 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3081 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3082 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3083 3084 if (UseNEON) { 3085 // Use VBSL to copy the sign bit. 3086 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3087 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3088 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3089 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3090 if (VT == MVT::f64) 3091 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3092 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3093 DAG.getConstant(32, MVT::i32)); 3094 else /*if (VT == MVT::f32)*/ 3095 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3096 if (SrcVT == MVT::f32) { 3097 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3098 if (VT == MVT::f64) 3099 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3100 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3101 DAG.getConstant(32, MVT::i32)); 3102 } else if (VT == MVT::f32) 3103 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3104 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3105 DAG.getConstant(32, MVT::i32)); 3106 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3107 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3108 3109 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3110 MVT::i32); 3111 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3112 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3113 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3114 3115 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3116 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3117 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3118 if (VT == MVT::f32) { 3119 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3120 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3121 DAG.getConstant(0, MVT::i32)); 3122 } else { 3123 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3124 } 3125 3126 return Res; 3127 } 3128 3129 // Bitcast operand 1 to i32. 3130 if (SrcVT == MVT::f64) 3131 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3132 &Tmp1, 1).getValue(1); 3133 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3134 3135 // Or in the signbit with integer operations. 3136 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3137 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3138 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3139 if (VT == MVT::f32) { 3140 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3141 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3142 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3143 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3144 } 3145 3146 // f64: Or the high part with signbit and then combine two parts. 3147 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3148 &Tmp0, 1); 3149 SDValue Lo = Tmp0.getValue(0); 3150 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3151 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3152 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3153 } 3154 3155 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3156 MachineFunction &MF = DAG.getMachineFunction(); 3157 MachineFrameInfo *MFI = MF.getFrameInfo(); 3158 MFI->setReturnAddressIsTaken(true); 3159 3160 EVT VT = Op.getValueType(); 3161 DebugLoc dl = Op.getDebugLoc(); 3162 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3163 if (Depth) { 3164 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3165 SDValue Offset = DAG.getConstant(4, MVT::i32); 3166 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3167 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3168 MachinePointerInfo(), false, false, 0); 3169 } 3170 3171 // Return LR, which contains the return address. Mark it an implicit live-in. 3172 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3173 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3174 } 3175 3176 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3177 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3178 MFI->setFrameAddressIsTaken(true); 3179 3180 EVT VT = Op.getValueType(); 3181 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3182 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3183 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3184 ? ARM::R7 : ARM::R11; 3185 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3186 while (Depth--) 3187 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3188 MachinePointerInfo(), 3189 false, false, 0); 3190 return FrameAddr; 3191 } 3192 3193 /// ExpandBITCAST - If the target supports VFP, this function is called to 3194 /// expand a bit convert where either the source or destination type is i64 to 3195 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3196 /// operand type is illegal (e.g., v2f32 for a target that doesn't support 3197 /// vectors), since the legalizer won't know what to do with that. 3198 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3199 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3200 DebugLoc dl = N->getDebugLoc(); 3201 SDValue Op = N->getOperand(0); 3202 3203 // This function is only supposed to be called for i64 types, either as the 3204 // source or destination of the bit convert. 3205 EVT SrcVT = Op.getValueType(); 3206 EVT DstVT = N->getValueType(0); 3207 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3208 "ExpandBITCAST called for non-i64 type"); 3209 3210 // Turn i64->f64 into VMOVDRR. 3211 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3212 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3213 DAG.getConstant(0, MVT::i32)); 3214 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3215 DAG.getConstant(1, MVT::i32)); 3216 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3217 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3218 } 3219 3220 // Turn f64->i64 into VMOVRRD. 3221 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3222 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3223 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3224 // Merge the pieces into a single i64 value. 3225 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3226 } 3227 3228 return SDValue(); 3229 } 3230 3231 /// getZeroVector - Returns a vector of specified type with all zero elements. 3232 /// Zero vectors are used to represent vector negation and in those cases 3233 /// will be implemented with the NEON VNEG instruction. However, VNEG does 3234 /// not support i64 elements, so sometimes the zero vectors will need to be 3235 /// explicitly constructed. Regardless, use a canonical VMOV to create the 3236 /// zero vector. 3237 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3238 assert(VT.isVector() && "Expected a vector type"); 3239 // The canonical modified immediate encoding of a zero vector is....0! 3240 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3241 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3242 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3243 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3244 } 3245 3246 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3247 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3248 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3249 SelectionDAG &DAG) const { 3250 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3251 EVT VT = Op.getValueType(); 3252 unsigned VTBits = VT.getSizeInBits(); 3253 DebugLoc dl = Op.getDebugLoc(); 3254 SDValue ShOpLo = Op.getOperand(0); 3255 SDValue ShOpHi = Op.getOperand(1); 3256 SDValue ShAmt = Op.getOperand(2); 3257 SDValue ARMcc; 3258 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3259 3260 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3261 3262 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3263 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3264 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3265 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3266 DAG.getConstant(VTBits, MVT::i32)); 3267 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3268 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3269 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3270 3271 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3272 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3273 ARMcc, DAG, dl); 3274 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3275 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3276 CCR, Cmp); 3277 3278 SDValue Ops[2] = { Lo, Hi }; 3279 return DAG.getMergeValues(Ops, 2, dl); 3280 } 3281 3282 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3283 /// i32 values and take a 2 x i32 value to shift plus a shift amount. 3284 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3285 SelectionDAG &DAG) const { 3286 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3287 EVT VT = Op.getValueType(); 3288 unsigned VTBits = VT.getSizeInBits(); 3289 DebugLoc dl = Op.getDebugLoc(); 3290 SDValue ShOpLo = Op.getOperand(0); 3291 SDValue ShOpHi = Op.getOperand(1); 3292 SDValue ShAmt = Op.getOperand(2); 3293 SDValue ARMcc; 3294 3295 assert(Op.getOpcode() == ISD::SHL_PARTS); 3296 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3297 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3298 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3299 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3300 DAG.getConstant(VTBits, MVT::i32)); 3301 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3302 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3303 3304 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3305 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3306 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3307 ARMcc, DAG, dl); 3308 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3309 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3310 CCR, Cmp); 3311 3312 SDValue Ops[2] = { Lo, Hi }; 3313 return DAG.getMergeValues(Ops, 2, dl); 3314 } 3315 3316 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3317 SelectionDAG &DAG) const { 3318 // The rounding mode is in bits 23:22 of the FPSCR. 3319 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3320 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3321 // so that the shift + and get folded into a bitfield extract. 3322 DebugLoc dl = Op.getDebugLoc(); 3323 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3324 DAG.getConstant(Intrinsic::arm_get_fpscr, 3325 MVT::i32)); 3326 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3327 DAG.getConstant(1U << 22, MVT::i32)); 3328 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3329 DAG.getConstant(22, MVT::i32)); 3330 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3331 DAG.getConstant(3, MVT::i32)); 3332 } 3333 3334 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3335 const ARMSubtarget *ST) { 3336 EVT VT = N->getValueType(0); 3337 DebugLoc dl = N->getDebugLoc(); 3338 3339 if (!ST->hasV6T2Ops()) 3340 return SDValue(); 3341 3342 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3343 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3344 } 3345 3346 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3347 const ARMSubtarget *ST) { 3348 EVT VT = N->getValueType(0); 3349 DebugLoc dl = N->getDebugLoc(); 3350 3351 if (!VT.isVector()) 3352 return SDValue(); 3353 3354 // Lower vector shifts on NEON to use VSHL. 3355 assert(ST->hasNEON() && "unexpected vector shift"); 3356 3357 // Left shifts translate directly to the vshiftu intrinsic. 3358 if (N->getOpcode() == ISD::SHL) 3359 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3360 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3361 N->getOperand(0), N->getOperand(1)); 3362 3363 assert((N->getOpcode() == ISD::SRA || 3364 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3365 3366 // NEON uses the same intrinsics for both left and right shifts. For 3367 // right shifts, the shift amounts are negative, so negate the vector of 3368 // shift amounts. 3369 EVT ShiftVT = N->getOperand(1).getValueType(); 3370 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3371 getZeroVector(ShiftVT, DAG, dl), 3372 N->getOperand(1)); 3373 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3374 Intrinsic::arm_neon_vshifts : 3375 Intrinsic::arm_neon_vshiftu); 3376 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3377 DAG.getConstant(vshiftInt, MVT::i32), 3378 N->getOperand(0), NegatedCount); 3379 } 3380 3381 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3382 const ARMSubtarget *ST) { 3383 EVT VT = N->getValueType(0); 3384 DebugLoc dl = N->getDebugLoc(); 3385 3386 // We can get here for a node like i32 = ISD::SHL i32, i64 3387 if (VT != MVT::i64) 3388 return SDValue(); 3389 3390 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3391 "Unknown shift to lower!"); 3392 3393 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3394 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3395 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3396 return SDValue(); 3397 3398 // If we are in thumb mode, we don't have RRX. 3399 if (ST->isThumb1Only()) return SDValue(); 3400 3401 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3402 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3403 DAG.getConstant(0, MVT::i32)); 3404 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3405 DAG.getConstant(1, MVT::i32)); 3406 3407 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3408 // captures the result into a carry flag. 3409 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3410 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3411 3412 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3413 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3414 3415 // Merge the pieces into a single i64 value. 3416 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3417 } 3418 3419 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3420 SDValue TmpOp0, TmpOp1; 3421 bool Invert = false; 3422 bool Swap = false; 3423 unsigned Opc = 0; 3424 3425 SDValue Op0 = Op.getOperand(0); 3426 SDValue Op1 = Op.getOperand(1); 3427 SDValue CC = Op.getOperand(2); 3428 EVT VT = Op.getValueType(); 3429 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3430 DebugLoc dl = Op.getDebugLoc(); 3431 3432 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3433 switch (SetCCOpcode) { 3434 default: llvm_unreachable("Illegal FP comparison"); break; 3435 case ISD::SETUNE: 3436 case ISD::SETNE: Invert = true; // Fallthrough 3437 case ISD::SETOEQ: 3438 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3439 case ISD::SETOLT: 3440 case ISD::SETLT: Swap = true; // Fallthrough 3441 case ISD::SETOGT: 3442 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3443 case ISD::SETOLE: 3444 case ISD::SETLE: Swap = true; // Fallthrough 3445 case ISD::SETOGE: 3446 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3447 case ISD::SETUGE: Swap = true; // Fallthrough 3448 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3449 case ISD::SETUGT: Swap = true; // Fallthrough 3450 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3451 case ISD::SETUEQ: Invert = true; // Fallthrough 3452 case ISD::SETONE: 3453 // Expand this to (OLT | OGT). 3454 TmpOp0 = Op0; 3455 TmpOp1 = Op1; 3456 Opc = ISD::OR; 3457 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3458 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3459 break; 3460 case ISD::SETUO: Invert = true; // Fallthrough 3461 case ISD::SETO: 3462 // Expand this to (OLT | OGE). 3463 TmpOp0 = Op0; 3464 TmpOp1 = Op1; 3465 Opc = ISD::OR; 3466 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3467 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3468 break; 3469 } 3470 } else { 3471 // Integer comparisons. 3472 switch (SetCCOpcode) { 3473 default: llvm_unreachable("Illegal integer comparison"); break; 3474 case ISD::SETNE: Invert = true; 3475 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3476 case ISD::SETLT: Swap = true; 3477 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3478 case ISD::SETLE: Swap = true; 3479 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3480 case ISD::SETULT: Swap = true; 3481 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3482 case ISD::SETULE: Swap = true; 3483 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3484 } 3485 3486 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3487 if (Opc == ARMISD::VCEQ) { 3488 3489 SDValue AndOp; 3490 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3491 AndOp = Op0; 3492 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3493 AndOp = Op1; 3494 3495 // Ignore bitconvert. 3496 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3497 AndOp = AndOp.getOperand(0); 3498 3499 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3500 Opc = ARMISD::VTST; 3501 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3502 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3503 Invert = !Invert; 3504 } 3505 } 3506 } 3507 3508 if (Swap) 3509 std::swap(Op0, Op1); 3510 3511 // If one of the operands is a constant vector zero, attempt to fold the 3512 // comparison to a specialized compare-against-zero form. 3513 SDValue SingleOp; 3514 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3515 SingleOp = Op0; 3516 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3517 if (Opc == ARMISD::VCGE) 3518 Opc = ARMISD::VCLEZ; 3519 else if (Opc == ARMISD::VCGT) 3520 Opc = ARMISD::VCLTZ; 3521 SingleOp = Op1; 3522 } 3523 3524 SDValue Result; 3525 if (SingleOp.getNode()) { 3526 switch (Opc) { 3527 case ARMISD::VCEQ: 3528 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3529 case ARMISD::VCGE: 3530 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3531 case ARMISD::VCLEZ: 3532 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3533 case ARMISD::VCGT: 3534 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3535 case ARMISD::VCLTZ: 3536 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3537 default: 3538 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3539 } 3540 } else { 3541 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3542 } 3543 3544 if (Invert) 3545 Result = DAG.getNOT(dl, Result, VT); 3546 3547 return Result; 3548 } 3549 3550 /// isNEONModifiedImm - Check if the specified splat value corresponds to a 3551 /// valid vector constant for a NEON instruction with a "modified immediate" 3552 /// operand (e.g., VMOV). If so, return the encoded value. 3553 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3554 unsigned SplatBitSize, SelectionDAG &DAG, 3555 EVT &VT, bool is128Bits, NEONModImmType type) { 3556 unsigned OpCmode, Imm; 3557 3558 // SplatBitSize is set to the smallest size that splats the vector, so a 3559 // zero vector will always have SplatBitSize == 8. However, NEON modified 3560 // immediate instructions others than VMOV do not support the 8-bit encoding 3561 // of a zero vector, and the default encoding of zero is supposed to be the 3562 // 32-bit version. 3563 if (SplatBits == 0) 3564 SplatBitSize = 32; 3565 3566 switch (SplatBitSize) { 3567 case 8: 3568 if (type != VMOVModImm) 3569 return SDValue(); 3570 // Any 1-byte value is OK. Op=0, Cmode=1110. 3571 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3572 OpCmode = 0xe; 3573 Imm = SplatBits; 3574 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3575 break; 3576 3577 case 16: 3578 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3579 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3580 if ((SplatBits & ~0xff) == 0) { 3581 // Value = 0x00nn: Op=x, Cmode=100x. 3582 OpCmode = 0x8; 3583 Imm = SplatBits; 3584 break; 3585 } 3586 if ((SplatBits & ~0xff00) == 0) { 3587 // Value = 0xnn00: Op=x, Cmode=101x. 3588 OpCmode = 0xa; 3589 Imm = SplatBits >> 8; 3590 break; 3591 } 3592 return SDValue(); 3593 3594 case 32: 3595 // NEON's 32-bit VMOV supports splat values where: 3596 // * only one byte is nonzero, or 3597 // * the least significant byte is 0xff and the second byte is nonzero, or 3598 // * the least significant 2 bytes are 0xff and the third is nonzero. 3599 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3600 if ((SplatBits & ~0xff) == 0) { 3601 // Value = 0x000000nn: Op=x, Cmode=000x. 3602 OpCmode = 0; 3603 Imm = SplatBits; 3604 break; 3605 } 3606 if ((SplatBits & ~0xff00) == 0) { 3607 // Value = 0x0000nn00: Op=x, Cmode=001x. 3608 OpCmode = 0x2; 3609 Imm = SplatBits >> 8; 3610 break; 3611 } 3612 if ((SplatBits & ~0xff0000) == 0) { 3613 // Value = 0x00nn0000: Op=x, Cmode=010x. 3614 OpCmode = 0x4; 3615 Imm = SplatBits >> 16; 3616 break; 3617 } 3618 if ((SplatBits & ~0xff000000) == 0) { 3619 // Value = 0xnn000000: Op=x, Cmode=011x. 3620 OpCmode = 0x6; 3621 Imm = SplatBits >> 24; 3622 break; 3623 } 3624 3625 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 3626 if (type == OtherModImm) return SDValue(); 3627 3628 if ((SplatBits & ~0xffff) == 0 && 3629 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3630 // Value = 0x0000nnff: Op=x, Cmode=1100. 3631 OpCmode = 0xc; 3632 Imm = SplatBits >> 8; 3633 SplatBits |= 0xff; 3634 break; 3635 } 3636 3637 if ((SplatBits & ~0xffffff) == 0 && 3638 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3639 // Value = 0x00nnffff: Op=x, Cmode=1101. 3640 OpCmode = 0xd; 3641 Imm = SplatBits >> 16; 3642 SplatBits |= 0xffff; 3643 break; 3644 } 3645 3646 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3647 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3648 // VMOV.I32. A (very) minor optimization would be to replicate the value 3649 // and fall through here to test for a valid 64-bit splat. But, then the 3650 // caller would also need to check and handle the change in size. 3651 return SDValue(); 3652 3653 case 64: { 3654 if (type != VMOVModImm) 3655 return SDValue(); 3656 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3657 uint64_t BitMask = 0xff; 3658 uint64_t Val = 0; 3659 unsigned ImmMask = 1; 3660 Imm = 0; 3661 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3662 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3663 Val |= BitMask; 3664 Imm |= ImmMask; 3665 } else if ((SplatBits & BitMask) != 0) { 3666 return SDValue(); 3667 } 3668 BitMask <<= 8; 3669 ImmMask <<= 1; 3670 } 3671 // Op=1, Cmode=1110. 3672 OpCmode = 0x1e; 3673 SplatBits = Val; 3674 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3675 break; 3676 } 3677 3678 default: 3679 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3680 return SDValue(); 3681 } 3682 3683 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3684 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3685 } 3686 3687 static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3688 bool &ReverseVEXT, unsigned &Imm) { 3689 unsigned NumElts = VT.getVectorNumElements(); 3690 ReverseVEXT = false; 3691 3692 // Assume that the first shuffle index is not UNDEF. Fail if it is. 3693 if (M[0] < 0) 3694 return false; 3695 3696 Imm = M[0]; 3697 3698 // If this is a VEXT shuffle, the immediate value is the index of the first 3699 // element. The other shuffle indices must be the successive elements after 3700 // the first one. 3701 unsigned ExpectedElt = Imm; 3702 for (unsigned i = 1; i < NumElts; ++i) { 3703 // Increment the expected index. If it wraps around, it may still be 3704 // a VEXT but the source vectors must be swapped. 3705 ExpectedElt += 1; 3706 if (ExpectedElt == NumElts * 2) { 3707 ExpectedElt = 0; 3708 ReverseVEXT = true; 3709 } 3710 3711 if (M[i] < 0) continue; // ignore UNDEF indices 3712 if (ExpectedElt != static_cast<unsigned>(M[i])) 3713 return false; 3714 } 3715 3716 // Adjust the index value if the source operands will be swapped. 3717 if (ReverseVEXT) 3718 Imm -= NumElts; 3719 3720 return true; 3721 } 3722 3723 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 3724 /// instruction with the specified blocksize. (The order of the elements 3725 /// within each block of the vector is reversed.) 3726 static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3727 unsigned BlockSize) { 3728 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3729 "Only possible block sizes for VREV are: 16, 32, 64"); 3730 3731 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3732 if (EltSz == 64) 3733 return false; 3734 3735 unsigned NumElts = VT.getVectorNumElements(); 3736 unsigned BlockElts = M[0] + 1; 3737 // If the first shuffle index is UNDEF, be optimistic. 3738 if (M[0] < 0) 3739 BlockElts = BlockSize / EltSz; 3740 3741 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3742 return false; 3743 3744 for (unsigned i = 0; i < NumElts; ++i) { 3745 if (M[i] < 0) continue; // ignore UNDEF indices 3746 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3747 return false; 3748 } 3749 3750 return true; 3751 } 3752 3753 static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { 3754 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 3755 // range, then 0 is placed into the resulting vector. So pretty much any mask 3756 // of 8 elements can work here. 3757 return VT == MVT::v8i8 && M.size() == 8; 3758 } 3759 3760 static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3761 unsigned &WhichResult) { 3762 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3763 if (EltSz == 64) 3764 return false; 3765 3766 unsigned NumElts = VT.getVectorNumElements(); 3767 WhichResult = (M[0] == 0 ? 0 : 1); 3768 for (unsigned i = 0; i < NumElts; i += 2) { 3769 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3770 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 3771 return false; 3772 } 3773 return true; 3774 } 3775 3776 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3777 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3778 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3779 static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3780 unsigned &WhichResult) { 3781 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3782 if (EltSz == 64) 3783 return false; 3784 3785 unsigned NumElts = VT.getVectorNumElements(); 3786 WhichResult = (M[0] == 0 ? 0 : 1); 3787 for (unsigned i = 0; i < NumElts; i += 2) { 3788 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3789 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 3790 return false; 3791 } 3792 return true; 3793 } 3794 3795 static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3796 unsigned &WhichResult) { 3797 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3798 if (EltSz == 64) 3799 return false; 3800 3801 unsigned NumElts = VT.getVectorNumElements(); 3802 WhichResult = (M[0] == 0 ? 0 : 1); 3803 for (unsigned i = 0; i != NumElts; ++i) { 3804 if (M[i] < 0) continue; // ignore UNDEF indices 3805 if ((unsigned) M[i] != 2 * i + WhichResult) 3806 return false; 3807 } 3808 3809 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3810 if (VT.is64BitVector() && EltSz == 32) 3811 return false; 3812 3813 return true; 3814 } 3815 3816 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3817 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3818 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3819 static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3820 unsigned &WhichResult) { 3821 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3822 if (EltSz == 64) 3823 return false; 3824 3825 unsigned Half = VT.getVectorNumElements() / 2; 3826 WhichResult = (M[0] == 0 ? 0 : 1); 3827 for (unsigned j = 0; j != 2; ++j) { 3828 unsigned Idx = WhichResult; 3829 for (unsigned i = 0; i != Half; ++i) { 3830 int MIdx = M[i + j * Half]; 3831 if (MIdx >= 0 && (unsigned) MIdx != Idx) 3832 return false; 3833 Idx += 2; 3834 } 3835 } 3836 3837 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3838 if (VT.is64BitVector() && EltSz == 32) 3839 return false; 3840 3841 return true; 3842 } 3843 3844 static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3845 unsigned &WhichResult) { 3846 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3847 if (EltSz == 64) 3848 return false; 3849 3850 unsigned NumElts = VT.getVectorNumElements(); 3851 WhichResult = (M[0] == 0 ? 0 : 1); 3852 unsigned Idx = WhichResult * NumElts / 2; 3853 for (unsigned i = 0; i != NumElts; i += 2) { 3854 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3855 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 3856 return false; 3857 Idx += 1; 3858 } 3859 3860 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3861 if (VT.is64BitVector() && EltSz == 32) 3862 return false; 3863 3864 return true; 3865 } 3866 3867 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3868 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3869 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3870 static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3871 unsigned &WhichResult) { 3872 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3873 if (EltSz == 64) 3874 return false; 3875 3876 unsigned NumElts = VT.getVectorNumElements(); 3877 WhichResult = (M[0] == 0 ? 0 : 1); 3878 unsigned Idx = WhichResult * NumElts / 2; 3879 for (unsigned i = 0; i != NumElts; i += 2) { 3880 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3881 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 3882 return false; 3883 Idx += 1; 3884 } 3885 3886 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3887 if (VT.is64BitVector() && EltSz == 32) 3888 return false; 3889 3890 return true; 3891 } 3892 3893 // If N is an integer constant that can be moved into a register in one 3894 // instruction, return an SDValue of such a constant (will become a MOV 3895 // instruction). Otherwise return null. 3896 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 3897 const ARMSubtarget *ST, DebugLoc dl) { 3898 uint64_t Val; 3899 if (!isa<ConstantSDNode>(N)) 3900 return SDValue(); 3901 Val = cast<ConstantSDNode>(N)->getZExtValue(); 3902 3903 if (ST->isThumb1Only()) { 3904 if (Val <= 255 || ~Val <= 255) 3905 return DAG.getConstant(Val, MVT::i32); 3906 } else { 3907 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 3908 return DAG.getConstant(Val, MVT::i32); 3909 } 3910 return SDValue(); 3911 } 3912 3913 // If this is a case we can't handle, return null and let the default 3914 // expansion code take care of it. 3915 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3916 const ARMSubtarget *ST) const { 3917 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3918 DebugLoc dl = Op.getDebugLoc(); 3919 EVT VT = Op.getValueType(); 3920 3921 APInt SplatBits, SplatUndef; 3922 unsigned SplatBitSize; 3923 bool HasAnyUndefs; 3924 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3925 if (SplatBitSize <= 64) { 3926 // Check if an immediate VMOV works. 3927 EVT VmovVT; 3928 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3929 SplatUndef.getZExtValue(), SplatBitSize, 3930 DAG, VmovVT, VT.is128BitVector(), 3931 VMOVModImm); 3932 if (Val.getNode()) { 3933 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3934 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3935 } 3936 3937 // Try an immediate VMVN. 3938 uint64_t NegatedImm = (SplatBits.getZExtValue() ^ 3939 ((1LL << SplatBitSize) - 1)); 3940 Val = isNEONModifiedImm(NegatedImm, 3941 SplatUndef.getZExtValue(), SplatBitSize, 3942 DAG, VmovVT, VT.is128BitVector(), 3943 VMVNModImm); 3944 if (Val.getNode()) { 3945 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 3946 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3947 } 3948 } 3949 } 3950 3951 // Scan through the operands to see if only one value is used. 3952 unsigned NumElts = VT.getVectorNumElements(); 3953 bool isOnlyLowElement = true; 3954 bool usesOnlyOneValue = true; 3955 bool isConstant = true; 3956 SDValue Value; 3957 for (unsigned i = 0; i < NumElts; ++i) { 3958 SDValue V = Op.getOperand(i); 3959 if (V.getOpcode() == ISD::UNDEF) 3960 continue; 3961 if (i > 0) 3962 isOnlyLowElement = false; 3963 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 3964 isConstant = false; 3965 3966 if (!Value.getNode()) 3967 Value = V; 3968 else if (V != Value) 3969 usesOnlyOneValue = false; 3970 } 3971 3972 if (!Value.getNode()) 3973 return DAG.getUNDEF(VT); 3974 3975 if (isOnlyLowElement) 3976 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 3977 3978 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3979 3980 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 3981 // i32 and try again. 3982 if (usesOnlyOneValue && EltSize <= 32) { 3983 if (!isConstant) 3984 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3985 if (VT.getVectorElementType().isFloatingPoint()) { 3986 SmallVector<SDValue, 8> Ops; 3987 for (unsigned i = 0; i < NumElts; ++i) 3988 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 3989 Op.getOperand(i))); 3990 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 3991 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 3992 Val = LowerBUILD_VECTOR(Val, DAG, ST); 3993 if (Val.getNode()) 3994 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 3995 } 3996 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 3997 if (Val.getNode()) 3998 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 3999 } 4000 4001 // If all elements are constants and the case above didn't get hit, fall back 4002 // to the default expansion, which will generate a load from the constant 4003 // pool. 4004 if (isConstant) 4005 return SDValue(); 4006 4007 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4008 if (NumElts >= 4) { 4009 SDValue shuffle = ReconstructShuffle(Op, DAG); 4010 if (shuffle != SDValue()) 4011 return shuffle; 4012 } 4013 4014 // Vectors with 32- or 64-bit elements can be built by directly assigning 4015 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4016 // will be legalized. 4017 if (EltSize >= 32) { 4018 // Do the expansion with floating-point types, since that is what the VFP 4019 // registers are defined to use, and since i64 is not legal. 4020 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4021 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4022 SmallVector<SDValue, 8> Ops; 4023 for (unsigned i = 0; i < NumElts; ++i) 4024 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 4025 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4026 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4027 } 4028 4029 return SDValue(); 4030 } 4031 4032 // Gather data to see if the operation can be modelled as a 4033 // shuffle in combination with VEXTs. 4034 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 4035 SelectionDAG &DAG) const { 4036 DebugLoc dl = Op.getDebugLoc(); 4037 EVT VT = Op.getValueType(); 4038 unsigned NumElts = VT.getVectorNumElements(); 4039 4040 SmallVector<SDValue, 2> SourceVecs; 4041 SmallVector<unsigned, 2> MinElts; 4042 SmallVector<unsigned, 2> MaxElts; 4043 4044 for (unsigned i = 0; i < NumElts; ++i) { 4045 SDValue V = Op.getOperand(i); 4046 if (V.getOpcode() == ISD::UNDEF) 4047 continue; 4048 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 4049 // A shuffle can only come from building a vector from various 4050 // elements of other vectors. 4051 return SDValue(); 4052 } 4053 4054 // Record this extraction against the appropriate vector if possible... 4055 SDValue SourceVec = V.getOperand(0); 4056 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 4057 bool FoundSource = false; 4058 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 4059 if (SourceVecs[j] == SourceVec) { 4060 if (MinElts[j] > EltNo) 4061 MinElts[j] = EltNo; 4062 if (MaxElts[j] < EltNo) 4063 MaxElts[j] = EltNo; 4064 FoundSource = true; 4065 break; 4066 } 4067 } 4068 4069 // Or record a new source if not... 4070 if (!FoundSource) { 4071 SourceVecs.push_back(SourceVec); 4072 MinElts.push_back(EltNo); 4073 MaxElts.push_back(EltNo); 4074 } 4075 } 4076 4077 // Currently only do something sane when at most two source vectors 4078 // involved. 4079 if (SourceVecs.size() > 2) 4080 return SDValue(); 4081 4082 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 4083 int VEXTOffsets[2] = {0, 0}; 4084 4085 // This loop extracts the usage patterns of the source vectors 4086 // and prepares appropriate SDValues for a shuffle if possible. 4087 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 4088 if (SourceVecs[i].getValueType() == VT) { 4089 // No VEXT necessary 4090 ShuffleSrcs[i] = SourceVecs[i]; 4091 VEXTOffsets[i] = 0; 4092 continue; 4093 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 4094 // It probably isn't worth padding out a smaller vector just to 4095 // break it down again in a shuffle. 4096 return SDValue(); 4097 } 4098 4099 // Since only 64-bit and 128-bit vectors are legal on ARM and 4100 // we've eliminated the other cases... 4101 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 4102 "unexpected vector sizes in ReconstructShuffle"); 4103 4104 if (MaxElts[i] - MinElts[i] >= NumElts) { 4105 // Span too large for a VEXT to cope 4106 return SDValue(); 4107 } 4108 4109 if (MinElts[i] >= NumElts) { 4110 // The extraction can just take the second half 4111 VEXTOffsets[i] = NumElts; 4112 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4113 SourceVecs[i], 4114 DAG.getIntPtrConstant(NumElts)); 4115 } else if (MaxElts[i] < NumElts) { 4116 // The extraction can just take the first half 4117 VEXTOffsets[i] = 0; 4118 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4119 SourceVecs[i], 4120 DAG.getIntPtrConstant(0)); 4121 } else { 4122 // An actual VEXT is needed 4123 VEXTOffsets[i] = MinElts[i]; 4124 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4125 SourceVecs[i], 4126 DAG.getIntPtrConstant(0)); 4127 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4128 SourceVecs[i], 4129 DAG.getIntPtrConstant(NumElts)); 4130 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 4131 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 4132 } 4133 } 4134 4135 SmallVector<int, 8> Mask; 4136 4137 for (unsigned i = 0; i < NumElts; ++i) { 4138 SDValue Entry = Op.getOperand(i); 4139 if (Entry.getOpcode() == ISD::UNDEF) { 4140 Mask.push_back(-1); 4141 continue; 4142 } 4143 4144 SDValue ExtractVec = Entry.getOperand(0); 4145 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 4146 .getOperand(1))->getSExtValue(); 4147 if (ExtractVec == SourceVecs[0]) { 4148 Mask.push_back(ExtractElt - VEXTOffsets[0]); 4149 } else { 4150 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 4151 } 4152 } 4153 4154 // Final check before we try to produce nonsense... 4155 if (isShuffleMaskLegal(Mask, VT)) 4156 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4157 &Mask[0]); 4158 4159 return SDValue(); 4160 } 4161 4162 /// isShuffleMaskLegal - Targets can use this to indicate that they only 4163 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4164 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4165 /// are assumed to be legal. 4166 bool 4167 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4168 EVT VT) const { 4169 if (VT.getVectorNumElements() == 4 && 4170 (VT.is128BitVector() || VT.is64BitVector())) { 4171 unsigned PFIndexes[4]; 4172 for (unsigned i = 0; i != 4; ++i) { 4173 if (M[i] < 0) 4174 PFIndexes[i] = 8; 4175 else 4176 PFIndexes[i] = M[i]; 4177 } 4178 4179 // Compute the index in the perfect shuffle table. 4180 unsigned PFTableIndex = 4181 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4182 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4183 unsigned Cost = (PFEntry >> 30); 4184 4185 if (Cost <= 4) 4186 return true; 4187 } 4188 4189 bool ReverseVEXT; 4190 unsigned Imm, WhichResult; 4191 4192 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4193 return (EltSize >= 32 || 4194 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4195 isVREVMask(M, VT, 64) || 4196 isVREVMask(M, VT, 32) || 4197 isVREVMask(M, VT, 16) || 4198 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4199 isVTBLMask(M, VT) || 4200 isVTRNMask(M, VT, WhichResult) || 4201 isVUZPMask(M, VT, WhichResult) || 4202 isVZIPMask(M, VT, WhichResult) || 4203 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4204 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4205 isVZIP_v_undef_Mask(M, VT, WhichResult)); 4206 } 4207 4208 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4209 /// the specified operations to build the shuffle. 4210 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4211 SDValue RHS, SelectionDAG &DAG, 4212 DebugLoc dl) { 4213 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4214 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4215 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4216 4217 enum { 4218 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4219 OP_VREV, 4220 OP_VDUP0, 4221 OP_VDUP1, 4222 OP_VDUP2, 4223 OP_VDUP3, 4224 OP_VEXT1, 4225 OP_VEXT2, 4226 OP_VEXT3, 4227 OP_VUZPL, // VUZP, left result 4228 OP_VUZPR, // VUZP, right result 4229 OP_VZIPL, // VZIP, left result 4230 OP_VZIPR, // VZIP, right result 4231 OP_VTRNL, // VTRN, left result 4232 OP_VTRNR // VTRN, right result 4233 }; 4234 4235 if (OpNum == OP_COPY) { 4236 if (LHSID == (1*9+2)*9+3) return LHS; 4237 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4238 return RHS; 4239 } 4240 4241 SDValue OpLHS, OpRHS; 4242 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4243 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4244 EVT VT = OpLHS.getValueType(); 4245 4246 switch (OpNum) { 4247 default: llvm_unreachable("Unknown shuffle opcode!"); 4248 case OP_VREV: 4249 // VREV divides the vector in half and swaps within the half. 4250 if (VT.getVectorElementType() == MVT::i32 || 4251 VT.getVectorElementType() == MVT::f32) 4252 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4253 // vrev <4 x i16> -> VREV32 4254 if (VT.getVectorElementType() == MVT::i16) 4255 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 4256 // vrev <4 x i8> -> VREV16 4257 assert(VT.getVectorElementType() == MVT::i8); 4258 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 4259 case OP_VDUP0: 4260 case OP_VDUP1: 4261 case OP_VDUP2: 4262 case OP_VDUP3: 4263 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4264 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4265 case OP_VEXT1: 4266 case OP_VEXT2: 4267 case OP_VEXT3: 4268 return DAG.getNode(ARMISD::VEXT, dl, VT, 4269 OpLHS, OpRHS, 4270 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4271 case OP_VUZPL: 4272 case OP_VUZPR: 4273 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4274 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4275 case OP_VZIPL: 4276 case OP_VZIPR: 4277 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4278 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4279 case OP_VTRNL: 4280 case OP_VTRNR: 4281 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4282 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4283 } 4284 } 4285 4286 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 4287 SmallVectorImpl<int> &ShuffleMask, 4288 SelectionDAG &DAG) { 4289 // Check to see if we can use the VTBL instruction. 4290 SDValue V1 = Op.getOperand(0); 4291 SDValue V2 = Op.getOperand(1); 4292 DebugLoc DL = Op.getDebugLoc(); 4293 4294 SmallVector<SDValue, 8> VTBLMask; 4295 for (SmallVectorImpl<int>::iterator 4296 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 4297 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 4298 4299 if (V2.getNode()->getOpcode() == ISD::UNDEF) 4300 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 4301 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4302 &VTBLMask[0], 8)); 4303 4304 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 4305 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4306 &VTBLMask[0], 8)); 4307 } 4308 4309 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4310 SDValue V1 = Op.getOperand(0); 4311 SDValue V2 = Op.getOperand(1); 4312 DebugLoc dl = Op.getDebugLoc(); 4313 EVT VT = Op.getValueType(); 4314 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4315 SmallVector<int, 8> ShuffleMask; 4316 4317 // Convert shuffles that are directly supported on NEON to target-specific 4318 // DAG nodes, instead of keeping them as shuffles and matching them again 4319 // during code selection. This is more efficient and avoids the possibility 4320 // of inconsistencies between legalization and selection. 4321 // FIXME: floating-point vectors should be canonicalized to integer vectors 4322 // of the same time so that they get CSEd properly. 4323 SVN->getMask(ShuffleMask); 4324 4325 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4326 if (EltSize <= 32) { 4327 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 4328 int Lane = SVN->getSplatIndex(); 4329 // If this is undef splat, generate it via "just" vdup, if possible. 4330 if (Lane == -1) Lane = 0; 4331 4332 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4333 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4334 } 4335 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 4336 DAG.getConstant(Lane, MVT::i32)); 4337 } 4338 4339 bool ReverseVEXT; 4340 unsigned Imm; 4341 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 4342 if (ReverseVEXT) 4343 std::swap(V1, V2); 4344 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 4345 DAG.getConstant(Imm, MVT::i32)); 4346 } 4347 4348 if (isVREVMask(ShuffleMask, VT, 64)) 4349 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 4350 if (isVREVMask(ShuffleMask, VT, 32)) 4351 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 4352 if (isVREVMask(ShuffleMask, VT, 16)) 4353 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 4354 4355 // Check for Neon shuffles that modify both input vectors in place. 4356 // If both results are used, i.e., if there are two shuffles with the same 4357 // source operands and with masks corresponding to both results of one of 4358 // these operations, DAG memoization will ensure that a single node is 4359 // used for both shuffles. 4360 unsigned WhichResult; 4361 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 4362 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4363 V1, V2).getValue(WhichResult); 4364 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 4365 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4366 V1, V2).getValue(WhichResult); 4367 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 4368 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4369 V1, V2).getValue(WhichResult); 4370 4371 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4372 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4373 V1, V1).getValue(WhichResult); 4374 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4375 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4376 V1, V1).getValue(WhichResult); 4377 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4378 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4379 V1, V1).getValue(WhichResult); 4380 } 4381 4382 // If the shuffle is not directly supported and it has 4 elements, use 4383 // the PerfectShuffle-generated table to synthesize it from other shuffles. 4384 unsigned NumElts = VT.getVectorNumElements(); 4385 if (NumElts == 4) { 4386 unsigned PFIndexes[4]; 4387 for (unsigned i = 0; i != 4; ++i) { 4388 if (ShuffleMask[i] < 0) 4389 PFIndexes[i] = 8; 4390 else 4391 PFIndexes[i] = ShuffleMask[i]; 4392 } 4393 4394 // Compute the index in the perfect shuffle table. 4395 unsigned PFTableIndex = 4396 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4397 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4398 unsigned Cost = (PFEntry >> 30); 4399 4400 if (Cost <= 4) 4401 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 4402 } 4403 4404 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 4405 if (EltSize >= 32) { 4406 // Do the expansion with floating-point types, since that is what the VFP 4407 // registers are defined to use, and since i64 is not legal. 4408 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4409 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4410 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 4411 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 4412 SmallVector<SDValue, 8> Ops; 4413 for (unsigned i = 0; i < NumElts; ++i) { 4414 if (ShuffleMask[i] < 0) 4415 Ops.push_back(DAG.getUNDEF(EltVT)); 4416 else 4417 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 4418 ShuffleMask[i] < (int)NumElts ? V1 : V2, 4419 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 4420 MVT::i32))); 4421 } 4422 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4423 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4424 } 4425 4426 if (VT == MVT::v8i8) { 4427 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 4428 if (NewOp.getNode()) 4429 return NewOp; 4430 } 4431 4432 return SDValue(); 4433 } 4434 4435 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4436 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 4437 SDValue Lane = Op.getOperand(1); 4438 if (!isa<ConstantSDNode>(Lane)) 4439 return SDValue(); 4440 4441 SDValue Vec = Op.getOperand(0); 4442 if (Op.getValueType() == MVT::i32 && 4443 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 4444 DebugLoc dl = Op.getDebugLoc(); 4445 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 4446 } 4447 4448 return Op; 4449 } 4450 4451 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 4452 // The only time a CONCAT_VECTORS operation can have legal types is when 4453 // two 64-bit vectors are concatenated to a 128-bit vector. 4454 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 4455 "unexpected CONCAT_VECTORS"); 4456 DebugLoc dl = Op.getDebugLoc(); 4457 SDValue Val = DAG.getUNDEF(MVT::v2f64); 4458 SDValue Op0 = Op.getOperand(0); 4459 SDValue Op1 = Op.getOperand(1); 4460 if (Op0.getOpcode() != ISD::UNDEF) 4461 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4462 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 4463 DAG.getIntPtrConstant(0)); 4464 if (Op1.getOpcode() != ISD::UNDEF) 4465 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4466 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 4467 DAG.getIntPtrConstant(1)); 4468 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 4469 } 4470 4471 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 4472 /// element has been zero/sign-extended, depending on the isSigned parameter, 4473 /// from an integer type half its size. 4474 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 4475 bool isSigned) { 4476 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 4477 EVT VT = N->getValueType(0); 4478 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 4479 SDNode *BVN = N->getOperand(0).getNode(); 4480 if (BVN->getValueType(0) != MVT::v4i32 || 4481 BVN->getOpcode() != ISD::BUILD_VECTOR) 4482 return false; 4483 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4484 unsigned HiElt = 1 - LoElt; 4485 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 4486 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 4487 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 4488 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 4489 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 4490 return false; 4491 if (isSigned) { 4492 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 4493 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 4494 return true; 4495 } else { 4496 if (Hi0->isNullValue() && Hi1->isNullValue()) 4497 return true; 4498 } 4499 return false; 4500 } 4501 4502 if (N->getOpcode() != ISD::BUILD_VECTOR) 4503 return false; 4504 4505 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 4506 SDNode *Elt = N->getOperand(i).getNode(); 4507 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 4508 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4509 unsigned HalfSize = EltSize / 2; 4510 if (isSigned) { 4511 int64_t SExtVal = C->getSExtValue(); 4512 if ((SExtVal >> HalfSize) != (SExtVal >> EltSize)) 4513 return false; 4514 } else { 4515 if ((C->getZExtValue() >> HalfSize) != 0) 4516 return false; 4517 } 4518 continue; 4519 } 4520 return false; 4521 } 4522 4523 return true; 4524 } 4525 4526 /// isSignExtended - Check if a node is a vector value that is sign-extended 4527 /// or a constant BUILD_VECTOR with sign-extended elements. 4528 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 4529 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 4530 return true; 4531 if (isExtendedBUILD_VECTOR(N, DAG, true)) 4532 return true; 4533 return false; 4534 } 4535 4536 /// isZeroExtended - Check if a node is a vector value that is zero-extended 4537 /// or a constant BUILD_VECTOR with zero-extended elements. 4538 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 4539 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 4540 return true; 4541 if (isExtendedBUILD_VECTOR(N, DAG, false)) 4542 return true; 4543 return false; 4544 } 4545 4546 /// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending 4547 /// load, or BUILD_VECTOR with extended elements, return the unextended value. 4548 static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { 4549 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 4550 return N->getOperand(0); 4551 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 4552 return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), 4553 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 4554 LD->isNonTemporal(), LD->getAlignment()); 4555 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 4556 // have been legalized as a BITCAST from v4i32. 4557 if (N->getOpcode() == ISD::BITCAST) { 4558 SDNode *BVN = N->getOperand(0).getNode(); 4559 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 4560 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 4561 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4562 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 4563 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 4564 } 4565 // Construct a new BUILD_VECTOR with elements truncated to half the size. 4566 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 4567 EVT VT = N->getValueType(0); 4568 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 4569 unsigned NumElts = VT.getVectorNumElements(); 4570 MVT TruncVT = MVT::getIntegerVT(EltSize); 4571 SmallVector<SDValue, 8> Ops; 4572 for (unsigned i = 0; i != NumElts; ++i) { 4573 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 4574 const APInt &CInt = C->getAPIntValue(); 4575 Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); 4576 } 4577 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 4578 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 4579 } 4580 4581 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 4582 unsigned Opcode = N->getOpcode(); 4583 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4584 SDNode *N0 = N->getOperand(0).getNode(); 4585 SDNode *N1 = N->getOperand(1).getNode(); 4586 return N0->hasOneUse() && N1->hasOneUse() && 4587 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 4588 } 4589 return false; 4590 } 4591 4592 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 4593 unsigned Opcode = N->getOpcode(); 4594 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4595 SDNode *N0 = N->getOperand(0).getNode(); 4596 SDNode *N1 = N->getOperand(1).getNode(); 4597 return N0->hasOneUse() && N1->hasOneUse() && 4598 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 4599 } 4600 return false; 4601 } 4602 4603 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 4604 // Multiplications are only custom-lowered for 128-bit vectors so that 4605 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 4606 EVT VT = Op.getValueType(); 4607 assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); 4608 SDNode *N0 = Op.getOperand(0).getNode(); 4609 SDNode *N1 = Op.getOperand(1).getNode(); 4610 unsigned NewOpc = 0; 4611 bool isMLA = false; 4612 bool isN0SExt = isSignExtended(N0, DAG); 4613 bool isN1SExt = isSignExtended(N1, DAG); 4614 if (isN0SExt && isN1SExt) 4615 NewOpc = ARMISD::VMULLs; 4616 else { 4617 bool isN0ZExt = isZeroExtended(N0, DAG); 4618 bool isN1ZExt = isZeroExtended(N1, DAG); 4619 if (isN0ZExt && isN1ZExt) 4620 NewOpc = ARMISD::VMULLu; 4621 else if (isN1SExt || isN1ZExt) { 4622 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 4623 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 4624 if (isN1SExt && isAddSubSExt(N0, DAG)) { 4625 NewOpc = ARMISD::VMULLs; 4626 isMLA = true; 4627 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 4628 NewOpc = ARMISD::VMULLu; 4629 isMLA = true; 4630 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 4631 std::swap(N0, N1); 4632 NewOpc = ARMISD::VMULLu; 4633 isMLA = true; 4634 } 4635 } 4636 4637 if (!NewOpc) { 4638 if (VT == MVT::v2i64) 4639 // Fall through to expand this. It is not legal. 4640 return SDValue(); 4641 else 4642 // Other vector multiplications are legal. 4643 return Op; 4644 } 4645 } 4646 4647 // Legalize to a VMULL instruction. 4648 DebugLoc DL = Op.getDebugLoc(); 4649 SDValue Op0; 4650 SDValue Op1 = SkipExtension(N1, DAG); 4651 if (!isMLA) { 4652 Op0 = SkipExtension(N0, DAG); 4653 assert(Op0.getValueType().is64BitVector() && 4654 Op1.getValueType().is64BitVector() && 4655 "unexpected types for extended operands to VMULL"); 4656 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 4657 } 4658 4659 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 4660 // isel lowering to take advantage of no-stall back to back vmul + vmla. 4661 // vmull q0, d4, d6 4662 // vmlal q0, d5, d6 4663 // is faster than 4664 // vaddl q0, d4, d5 4665 // vmovl q1, d6 4666 // vmul q0, q0, q1 4667 SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); 4668 SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); 4669 EVT Op1VT = Op1.getValueType(); 4670 return DAG.getNode(N0->getOpcode(), DL, VT, 4671 DAG.getNode(NewOpc, DL, VT, 4672 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 4673 DAG.getNode(NewOpc, DL, VT, 4674 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 4675 } 4676 4677 static SDValue 4678 LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { 4679 // Convert to float 4680 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 4681 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 4682 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 4683 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 4684 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 4685 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 4686 // Get reciprocal estimate. 4687 // float4 recip = vrecpeq_f32(yf); 4688 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4689 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 4690 // Because char has a smaller range than uchar, we can actually get away 4691 // without any newton steps. This requires that we use a weird bias 4692 // of 0xb000, however (again, this has been exhaustively tested). 4693 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 4694 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 4695 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 4696 Y = DAG.getConstant(0xb000, MVT::i32); 4697 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 4698 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 4699 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 4700 // Convert back to short. 4701 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 4702 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 4703 return X; 4704 } 4705 4706 static SDValue 4707 LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { 4708 SDValue N2; 4709 // Convert to float. 4710 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 4711 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 4712 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 4713 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 4714 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4715 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4716 4717 // Use reciprocal estimate and one refinement step. 4718 // float4 recip = vrecpeq_f32(yf); 4719 // recip *= vrecpsq_f32(yf, recip); 4720 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4721 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4722 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4723 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4724 N1, N2); 4725 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4726 // Because short has a smaller range than ushort, we can actually get away 4727 // with only a single newton step. This requires that we use a weird bias 4728 // of 89, however (again, this has been exhaustively tested). 4729 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 4730 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4731 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4732 N1 = DAG.getConstant(0x89, MVT::i32); 4733 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4734 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4735 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4736 // Convert back to integer and return. 4737 // return vmovn_s32(vcvt_s32_f32(result)); 4738 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4739 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4740 return N0; 4741 } 4742 4743 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 4744 EVT VT = Op.getValueType(); 4745 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4746 "unexpected type for custom-lowering ISD::SDIV"); 4747 4748 DebugLoc dl = Op.getDebugLoc(); 4749 SDValue N0 = Op.getOperand(0); 4750 SDValue N1 = Op.getOperand(1); 4751 SDValue N2, N3; 4752 4753 if (VT == MVT::v8i8) { 4754 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 4755 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 4756 4757 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4758 DAG.getIntPtrConstant(4)); 4759 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4760 DAG.getIntPtrConstant(4)); 4761 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4762 DAG.getIntPtrConstant(0)); 4763 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4764 DAG.getIntPtrConstant(0)); 4765 4766 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 4767 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 4768 4769 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4770 N0 = LowerCONCAT_VECTORS(N0, DAG); 4771 4772 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 4773 return N0; 4774 } 4775 return LowerSDIV_v4i16(N0, N1, dl, DAG); 4776 } 4777 4778 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 4779 EVT VT = Op.getValueType(); 4780 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4781 "unexpected type for custom-lowering ISD::UDIV"); 4782 4783 DebugLoc dl = Op.getDebugLoc(); 4784 SDValue N0 = Op.getOperand(0); 4785 SDValue N1 = Op.getOperand(1); 4786 SDValue N2, N3; 4787 4788 if (VT == MVT::v8i8) { 4789 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 4790 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 4791 4792 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4793 DAG.getIntPtrConstant(4)); 4794 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4795 DAG.getIntPtrConstant(4)); 4796 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4797 DAG.getIntPtrConstant(0)); 4798 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4799 DAG.getIntPtrConstant(0)); 4800 4801 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 4802 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 4803 4804 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4805 N0 = LowerCONCAT_VECTORS(N0, DAG); 4806 4807 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 4808 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 4809 N0); 4810 return N0; 4811 } 4812 4813 // v4i16 sdiv ... Convert to float. 4814 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 4815 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 4816 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 4817 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 4818 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4819 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4820 4821 // Use reciprocal estimate and two refinement steps. 4822 // float4 recip = vrecpeq_f32(yf); 4823 // recip *= vrecpsq_f32(yf, recip); 4824 // recip *= vrecpsq_f32(yf, recip); 4825 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4826 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 4827 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4828 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4829 BN1, N2); 4830 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4831 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4832 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4833 BN1, N2); 4834 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4835 // Simply multiplying by the reciprocal estimate can leave us a few ulps 4836 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 4837 // and that it will never cause us to return an answer too large). 4838 // float4 result = as_float4(as_int4(xf*recip) + 2); 4839 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4840 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4841 N1 = DAG.getConstant(2, MVT::i32); 4842 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4843 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4844 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4845 // Convert back to integer and return. 4846 // return vmovn_u32(vcvt_s32_f32(result)); 4847 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4848 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4849 return N0; 4850 } 4851 4852 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 4853 EVT VT = Op.getNode()->getValueType(0); 4854 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 4855 4856 unsigned Opc; 4857 bool ExtraOp = false; 4858 switch (Op.getOpcode()) { 4859 default: assert(0 && "Invalid code"); 4860 case ISD::ADDC: Opc = ARMISD::ADDC; break; 4861 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 4862 case ISD::SUBC: Opc = ARMISD::SUBC; break; 4863 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 4864 } 4865 4866 if (!ExtraOp) 4867 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 4868 Op.getOperand(1)); 4869 return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), 4870 Op.getOperand(1), Op.getOperand(2)); 4871 } 4872 4873 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 4874 // Monotonic load/store is legal for all targets 4875 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 4876 return Op; 4877 4878 // Aquire/Release load/store is not legal for targets without a 4879 // dmb or equivalent available. 4880 return SDValue(); 4881 } 4882 4883 4884 static void 4885 ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 4886 SelectionDAG &DAG, unsigned NewOp) { 4887 EVT T = Node->getValueType(0); 4888 DebugLoc dl = Node->getDebugLoc(); 4889 assert (T == MVT::i64 && "Only know how to expand i64 atomics"); 4890 4891 SmallVector<SDValue, 6> Ops; 4892 Ops.push_back(Node->getOperand(0)); // Chain 4893 Ops.push_back(Node->getOperand(1)); // Ptr 4894 // Low part of Val1 4895 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 4896 Node->getOperand(2), DAG.getIntPtrConstant(0))); 4897 // High part of Val1 4898 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 4899 Node->getOperand(2), DAG.getIntPtrConstant(1))); 4900 if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { 4901 // High part of Val1 4902 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 4903 Node->getOperand(3), DAG.getIntPtrConstant(0))); 4904 // High part of Val2 4905 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 4906 Node->getOperand(3), DAG.getIntPtrConstant(1))); 4907 } 4908 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 4909 SDValue Result = 4910 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, 4911 cast<MemSDNode>(Node)->getMemOperand()); 4912 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 4913 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 4914 Results.push_back(Result.getValue(2)); 4915 } 4916 4917 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 4918 switch (Op.getOpcode()) { 4919 default: llvm_unreachable("Don't know how to custom lower this!"); 4920 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4921 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 4922 case ISD::GlobalAddress: 4923 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 4924 LowerGlobalAddressELF(Op, DAG); 4925 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4926 case ISD::SELECT: return LowerSELECT(Op, DAG); 4927 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 4928 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 4929 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 4930 case ISD::VASTART: return LowerVASTART(Op, DAG); 4931 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 4932 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 4933 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 4934 case ISD::SINT_TO_FP: 4935 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 4936 case ISD::FP_TO_SINT: 4937 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 4938 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4939 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4940 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4941 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 4942 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 4943 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 4944 case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); 4945 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 4946 Subtarget); 4947 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 4948 case ISD::SHL: 4949 case ISD::SRL: 4950 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 4951 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 4952 case ISD::SRL_PARTS: 4953 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 4954 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 4955 case ISD::SETCC: return LowerVSETCC(Op, DAG); 4956 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 4957 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4958 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4959 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 4960 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 4961 case ISD::MUL: return LowerMUL(Op, DAG); 4962 case ISD::SDIV: return LowerSDIV(Op, DAG); 4963 case ISD::UDIV: return LowerUDIV(Op, DAG); 4964 case ISD::ADDC: 4965 case ISD::ADDE: 4966 case ISD::SUBC: 4967 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 4968 case ISD::ATOMIC_LOAD: 4969 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 4970 } 4971 return SDValue(); 4972 } 4973 4974 /// ReplaceNodeResults - Replace the results of node with an illegal result 4975 /// type with new values built out of custom code. 4976 void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 4977 SmallVectorImpl<SDValue>&Results, 4978 SelectionDAG &DAG) const { 4979 SDValue Res; 4980 switch (N->getOpcode()) { 4981 default: 4982 llvm_unreachable("Don't know how to custom expand this!"); 4983 break; 4984 case ISD::BITCAST: 4985 Res = ExpandBITCAST(N, DAG); 4986 break; 4987 case ISD::SRL: 4988 case ISD::SRA: 4989 Res = Expand64BitShift(N, DAG, Subtarget); 4990 break; 4991 case ISD::ATOMIC_LOAD_ADD: 4992 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); 4993 return; 4994 case ISD::ATOMIC_LOAD_AND: 4995 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); 4996 return; 4997 case ISD::ATOMIC_LOAD_NAND: 4998 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); 4999 return; 5000 case ISD::ATOMIC_LOAD_OR: 5001 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); 5002 return; 5003 case ISD::ATOMIC_LOAD_SUB: 5004 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); 5005 return; 5006 case ISD::ATOMIC_LOAD_XOR: 5007 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); 5008 return; 5009 case ISD::ATOMIC_SWAP: 5010 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); 5011 return; 5012 case ISD::ATOMIC_CMP_SWAP: 5013 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); 5014 return; 5015 } 5016 if (Res.getNode()) 5017 Results.push_back(Res); 5018 } 5019 5020 //===----------------------------------------------------------------------===// 5021 // ARM Scheduler Hooks 5022 //===----------------------------------------------------------------------===// 5023 5024 MachineBasicBlock * 5025 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 5026 MachineBasicBlock *BB, 5027 unsigned Size) const { 5028 unsigned dest = MI->getOperand(0).getReg(); 5029 unsigned ptr = MI->getOperand(1).getReg(); 5030 unsigned oldval = MI->getOperand(2).getReg(); 5031 unsigned newval = MI->getOperand(3).getReg(); 5032 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5033 DebugLoc dl = MI->getDebugLoc(); 5034 bool isThumb2 = Subtarget->isThumb2(); 5035 5036 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5037 unsigned scratch = 5038 MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass 5039 : ARM::GPRRegisterClass); 5040 5041 if (isThumb2) { 5042 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5043 MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass); 5044 MRI.constrainRegClass(newval, ARM::rGPRRegisterClass); 5045 } 5046 5047 unsigned ldrOpc, strOpc; 5048 switch (Size) { 5049 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5050 case 1: 5051 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5052 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5053 break; 5054 case 2: 5055 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5056 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5057 break; 5058 case 4: 5059 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5060 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5061 break; 5062 } 5063 5064 MachineFunction *MF = BB->getParent(); 5065 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5066 MachineFunction::iterator It = BB; 5067 ++It; // insert the new blocks after the current block 5068 5069 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5070 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5071 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5072 MF->insert(It, loop1MBB); 5073 MF->insert(It, loop2MBB); 5074 MF->insert(It, exitMBB); 5075 5076 // Transfer the remainder of BB and its successor edges to exitMBB. 5077 exitMBB->splice(exitMBB->begin(), BB, 5078 llvm::next(MachineBasicBlock::iterator(MI)), 5079 BB->end()); 5080 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5081 5082 // thisMBB: 5083 // ... 5084 // fallthrough --> loop1MBB 5085 BB->addSuccessor(loop1MBB); 5086 5087 // loop1MBB: 5088 // ldrex dest, [ptr] 5089 // cmp dest, oldval 5090 // bne exitMBB 5091 BB = loop1MBB; 5092 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5093 if (ldrOpc == ARM::t2LDREX) 5094 MIB.addImm(0); 5095 AddDefaultPred(MIB); 5096 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5097 .addReg(dest).addReg(oldval)); 5098 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5099 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5100 BB->addSuccessor(loop2MBB); 5101 BB->addSuccessor(exitMBB); 5102 5103 // loop2MBB: 5104 // strex scratch, newval, [ptr] 5105 // cmp scratch, #0 5106 // bne loop1MBB 5107 BB = loop2MBB; 5108 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 5109 if (strOpc == ARM::t2STREX) 5110 MIB.addImm(0); 5111 AddDefaultPred(MIB); 5112 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5113 .addReg(scratch).addImm(0)); 5114 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5115 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5116 BB->addSuccessor(loop1MBB); 5117 BB->addSuccessor(exitMBB); 5118 5119 // exitMBB: 5120 // ... 5121 BB = exitMBB; 5122 5123 MI->eraseFromParent(); // The instruction is gone now. 5124 5125 return BB; 5126 } 5127 5128 MachineBasicBlock * 5129 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5130 unsigned Size, unsigned BinOpcode) const { 5131 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5132 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5133 5134 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5135 MachineFunction *MF = BB->getParent(); 5136 MachineFunction::iterator It = BB; 5137 ++It; 5138 5139 unsigned dest = MI->getOperand(0).getReg(); 5140 unsigned ptr = MI->getOperand(1).getReg(); 5141 unsigned incr = MI->getOperand(2).getReg(); 5142 DebugLoc dl = MI->getDebugLoc(); 5143 bool isThumb2 = Subtarget->isThumb2(); 5144 5145 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5146 if (isThumb2) { 5147 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5148 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5149 } 5150 5151 unsigned ldrOpc, strOpc; 5152 switch (Size) { 5153 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5154 case 1: 5155 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5156 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5157 break; 5158 case 2: 5159 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5160 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5161 break; 5162 case 4: 5163 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5164 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5165 break; 5166 } 5167 5168 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5169 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5170 MF->insert(It, loopMBB); 5171 MF->insert(It, exitMBB); 5172 5173 // Transfer the remainder of BB and its successor edges to exitMBB. 5174 exitMBB->splice(exitMBB->begin(), BB, 5175 llvm::next(MachineBasicBlock::iterator(MI)), 5176 BB->end()); 5177 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5178 5179 TargetRegisterClass *TRC = 5180 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5181 unsigned scratch = MRI.createVirtualRegister(TRC); 5182 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 5183 5184 // thisMBB: 5185 // ... 5186 // fallthrough --> loopMBB 5187 BB->addSuccessor(loopMBB); 5188 5189 // loopMBB: 5190 // ldrex dest, ptr 5191 // <binop> scratch2, dest, incr 5192 // strex scratch, scratch2, ptr 5193 // cmp scratch, #0 5194 // bne- loopMBB 5195 // fallthrough --> exitMBB 5196 BB = loopMBB; 5197 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5198 if (ldrOpc == ARM::t2LDREX) 5199 MIB.addImm(0); 5200 AddDefaultPred(MIB); 5201 if (BinOpcode) { 5202 // operand order needs to go the other way for NAND 5203 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 5204 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5205 addReg(incr).addReg(dest)).addReg(0); 5206 else 5207 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5208 addReg(dest).addReg(incr)).addReg(0); 5209 } 5210 5211 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5212 if (strOpc == ARM::t2STREX) 5213 MIB.addImm(0); 5214 AddDefaultPred(MIB); 5215 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5216 .addReg(scratch).addImm(0)); 5217 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5218 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5219 5220 BB->addSuccessor(loopMBB); 5221 BB->addSuccessor(exitMBB); 5222 5223 // exitMBB: 5224 // ... 5225 BB = exitMBB; 5226 5227 MI->eraseFromParent(); // The instruction is gone now. 5228 5229 return BB; 5230 } 5231 5232 MachineBasicBlock * 5233 ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 5234 MachineBasicBlock *BB, 5235 unsigned Size, 5236 bool signExtend, 5237 ARMCC::CondCodes Cond) const { 5238 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5239 5240 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5241 MachineFunction *MF = BB->getParent(); 5242 MachineFunction::iterator It = BB; 5243 ++It; 5244 5245 unsigned dest = MI->getOperand(0).getReg(); 5246 unsigned ptr = MI->getOperand(1).getReg(); 5247 unsigned incr = MI->getOperand(2).getReg(); 5248 unsigned oldval = dest; 5249 DebugLoc dl = MI->getDebugLoc(); 5250 bool isThumb2 = Subtarget->isThumb2(); 5251 5252 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5253 if (isThumb2) { 5254 MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); 5255 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5256 } 5257 5258 unsigned ldrOpc, strOpc, extendOpc; 5259 switch (Size) { 5260 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5261 case 1: 5262 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5263 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5264 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 5265 break; 5266 case 2: 5267 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5268 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5269 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 5270 break; 5271 case 4: 5272 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5273 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5274 extendOpc = 0; 5275 break; 5276 } 5277 5278 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5279 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5280 MF->insert(It, loopMBB); 5281 MF->insert(It, exitMBB); 5282 5283 // Transfer the remainder of BB and its successor edges to exitMBB. 5284 exitMBB->splice(exitMBB->begin(), BB, 5285 llvm::next(MachineBasicBlock::iterator(MI)), 5286 BB->end()); 5287 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5288 5289 TargetRegisterClass *TRC = 5290 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5291 unsigned scratch = MRI.createVirtualRegister(TRC); 5292 unsigned scratch2 = MRI.createVirtualRegister(TRC); 5293 5294 // thisMBB: 5295 // ... 5296 // fallthrough --> loopMBB 5297 BB->addSuccessor(loopMBB); 5298 5299 // loopMBB: 5300 // ldrex dest, ptr 5301 // (sign extend dest, if required) 5302 // cmp dest, incr 5303 // cmov.cond scratch2, dest, incr 5304 // strex scratch, scratch2, ptr 5305 // cmp scratch, #0 5306 // bne- loopMBB 5307 // fallthrough --> exitMBB 5308 BB = loopMBB; 5309 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5310 if (ldrOpc == ARM::t2LDREX) 5311 MIB.addImm(0); 5312 AddDefaultPred(MIB); 5313 5314 // Sign extend the value, if necessary. 5315 if (signExtend && extendOpc) { 5316 oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass); 5317 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 5318 .addReg(dest) 5319 .addImm(0)); 5320 } 5321 5322 // Build compare and cmov instructions. 5323 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5324 .addReg(oldval).addReg(incr)); 5325 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 5326 .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); 5327 5328 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 5329 if (strOpc == ARM::t2STREX) 5330 MIB.addImm(0); 5331 AddDefaultPred(MIB); 5332 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5333 .addReg(scratch).addImm(0)); 5334 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5335 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5336 5337 BB->addSuccessor(loopMBB); 5338 BB->addSuccessor(exitMBB); 5339 5340 // exitMBB: 5341 // ... 5342 BB = exitMBB; 5343 5344 MI->eraseFromParent(); // The instruction is gone now. 5345 5346 return BB; 5347 } 5348 5349 MachineBasicBlock * 5350 ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 5351 unsigned Op1, unsigned Op2, 5352 bool NeedsCarry, bool IsCmpxchg) const { 5353 // This also handles ATOMIC_SWAP, indicated by Op1==0. 5354 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5355 5356 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5357 MachineFunction *MF = BB->getParent(); 5358 MachineFunction::iterator It = BB; 5359 ++It; 5360 5361 unsigned destlo = MI->getOperand(0).getReg(); 5362 unsigned desthi = MI->getOperand(1).getReg(); 5363 unsigned ptr = MI->getOperand(2).getReg(); 5364 unsigned vallo = MI->getOperand(3).getReg(); 5365 unsigned valhi = MI->getOperand(4).getReg(); 5366 DebugLoc dl = MI->getDebugLoc(); 5367 bool isThumb2 = Subtarget->isThumb2(); 5368 5369 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5370 if (isThumb2) { 5371 MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass); 5372 MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass); 5373 MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); 5374 } 5375 5376 unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; 5377 unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; 5378 5379 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5380 MachineBasicBlock *contBB = 0, *cont2BB = 0; 5381 if (IsCmpxchg) { 5382 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 5383 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 5384 } 5385 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5386 MF->insert(It, loopMBB); 5387 if (IsCmpxchg) { 5388 MF->insert(It, contBB); 5389 MF->insert(It, cont2BB); 5390 } 5391 MF->insert(It, exitMBB); 5392 5393 // Transfer the remainder of BB and its successor edges to exitMBB. 5394 exitMBB->splice(exitMBB->begin(), BB, 5395 llvm::next(MachineBasicBlock::iterator(MI)), 5396 BB->end()); 5397 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5398 5399 TargetRegisterClass *TRC = 5400 isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; 5401 unsigned storesuccess = MRI.createVirtualRegister(TRC); 5402 5403 // thisMBB: 5404 // ... 5405 // fallthrough --> loopMBB 5406 BB->addSuccessor(loopMBB); 5407 5408 // loopMBB: 5409 // ldrexd r2, r3, ptr 5410 // <binopa> r0, r2, incr 5411 // <binopb> r1, r3, incr 5412 // strexd storesuccess, r0, r1, ptr 5413 // cmp storesuccess, #0 5414 // bne- loopMBB 5415 // fallthrough --> exitMBB 5416 // 5417 // Note that the registers are explicitly specified because there is not any 5418 // way to force the register allocator to allocate a register pair. 5419 // 5420 // FIXME: The hardcoded registers are not necessary for Thumb2, but we 5421 // need to properly enforce the restriction that the two output registers 5422 // for ldrexd must be different. 5423 BB = loopMBB; 5424 // Load 5425 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) 5426 .addReg(ARM::R2, RegState::Define) 5427 .addReg(ARM::R3, RegState::Define).addReg(ptr)); 5428 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 5429 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); 5430 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); 5431 5432 if (IsCmpxchg) { 5433 // Add early exit 5434 for (unsigned i = 0; i < 2; i++) { 5435 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 5436 ARM::CMPrr)) 5437 .addReg(i == 0 ? destlo : desthi) 5438 .addReg(i == 0 ? vallo : valhi)); 5439 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5440 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5441 BB->addSuccessor(exitMBB); 5442 BB->addSuccessor(i == 0 ? contBB : cont2BB); 5443 BB = (i == 0 ? contBB : cont2BB); 5444 } 5445 5446 // Copy to physregs for strexd 5447 unsigned setlo = MI->getOperand(5).getReg(); 5448 unsigned sethi = MI->getOperand(6).getReg(); 5449 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); 5450 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); 5451 } else if (Op1) { 5452 // Perform binary operation 5453 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) 5454 .addReg(destlo).addReg(vallo)) 5455 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 5456 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) 5457 .addReg(desthi).addReg(valhi)).addReg(0); 5458 } else { 5459 // Copy to physregs for strexd 5460 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); 5461 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); 5462 } 5463 5464 // Store 5465 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) 5466 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); 5467 // Cmp+jump 5468 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5469 .addReg(storesuccess).addImm(0)); 5470 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5471 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5472 5473 BB->addSuccessor(loopMBB); 5474 BB->addSuccessor(exitMBB); 5475 5476 // exitMBB: 5477 // ... 5478 BB = exitMBB; 5479 5480 MI->eraseFromParent(); // The instruction is gone now. 5481 5482 return BB; 5483 } 5484 5485 static 5486 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 5487 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 5488 E = MBB->succ_end(); I != E; ++I) 5489 if (*I != Succ) 5490 return *I; 5491 llvm_unreachable("Expecting a BB with two successors!"); 5492 } 5493 5494 MachineBasicBlock * 5495 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 5496 MachineBasicBlock *BB) const { 5497 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5498 DebugLoc dl = MI->getDebugLoc(); 5499 bool isThumb2 = Subtarget->isThumb2(); 5500 switch (MI->getOpcode()) { 5501 default: { 5502 MI->dump(); 5503 llvm_unreachable("Unexpected instr type to insert"); 5504 } 5505 // The Thumb2 pre-indexed stores have the same MI operands, they just 5506 // define them differently in the .td files from the isel patterns, so 5507 // they need pseudos. 5508 case ARM::t2STR_preidx: 5509 MI->setDesc(TII->get(ARM::t2STR_PRE)); 5510 return BB; 5511 case ARM::t2STRB_preidx: 5512 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 5513 return BB; 5514 case ARM::t2STRH_preidx: 5515 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 5516 return BB; 5517 5518 case ARM::STRi_preidx: 5519 case ARM::STRBi_preidx: { 5520 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 5521 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 5522 // Decode the offset. 5523 unsigned Offset = MI->getOperand(4).getImm(); 5524 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 5525 Offset = ARM_AM::getAM2Offset(Offset); 5526 if (isSub) 5527 Offset = -Offset; 5528 5529 MachineMemOperand *MMO = *MI->memoperands_begin(); 5530 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 5531 .addOperand(MI->getOperand(0)) // Rn_wb 5532 .addOperand(MI->getOperand(1)) // Rt 5533 .addOperand(MI->getOperand(2)) // Rn 5534 .addImm(Offset) // offset (skip GPR==zero_reg) 5535 .addOperand(MI->getOperand(5)) // pred 5536 .addOperand(MI->getOperand(6)) 5537 .addMemOperand(MMO); 5538 MI->eraseFromParent(); 5539 return BB; 5540 } 5541 case ARM::STRr_preidx: 5542 case ARM::STRBr_preidx: 5543 case ARM::STRH_preidx: { 5544 unsigned NewOpc; 5545 switch (MI->getOpcode()) { 5546 default: llvm_unreachable("unexpected opcode!"); 5547 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 5548 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 5549 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 5550 } 5551 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 5552 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 5553 MIB.addOperand(MI->getOperand(i)); 5554 MI->eraseFromParent(); 5555 return BB; 5556 } 5557 case ARM::ATOMIC_LOAD_ADD_I8: 5558 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 5559 case ARM::ATOMIC_LOAD_ADD_I16: 5560 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 5561 case ARM::ATOMIC_LOAD_ADD_I32: 5562 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 5563 5564 case ARM::ATOMIC_LOAD_AND_I8: 5565 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 5566 case ARM::ATOMIC_LOAD_AND_I16: 5567 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 5568 case ARM::ATOMIC_LOAD_AND_I32: 5569 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 5570 5571 case ARM::ATOMIC_LOAD_OR_I8: 5572 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 5573 case ARM::ATOMIC_LOAD_OR_I16: 5574 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 5575 case ARM::ATOMIC_LOAD_OR_I32: 5576 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 5577 5578 case ARM::ATOMIC_LOAD_XOR_I8: 5579 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 5580 case ARM::ATOMIC_LOAD_XOR_I16: 5581 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 5582 case ARM::ATOMIC_LOAD_XOR_I32: 5583 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 5584 5585 case ARM::ATOMIC_LOAD_NAND_I8: 5586 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 5587 case ARM::ATOMIC_LOAD_NAND_I16: 5588 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 5589 case ARM::ATOMIC_LOAD_NAND_I32: 5590 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 5591 5592 case ARM::ATOMIC_LOAD_SUB_I8: 5593 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5594 case ARM::ATOMIC_LOAD_SUB_I16: 5595 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5596 case ARM::ATOMIC_LOAD_SUB_I32: 5597 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5598 5599 case ARM::ATOMIC_LOAD_MIN_I8: 5600 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 5601 case ARM::ATOMIC_LOAD_MIN_I16: 5602 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 5603 case ARM::ATOMIC_LOAD_MIN_I32: 5604 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 5605 5606 case ARM::ATOMIC_LOAD_MAX_I8: 5607 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 5608 case ARM::ATOMIC_LOAD_MAX_I16: 5609 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 5610 case ARM::ATOMIC_LOAD_MAX_I32: 5611 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 5612 5613 case ARM::ATOMIC_LOAD_UMIN_I8: 5614 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 5615 case ARM::ATOMIC_LOAD_UMIN_I16: 5616 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 5617 case ARM::ATOMIC_LOAD_UMIN_I32: 5618 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 5619 5620 case ARM::ATOMIC_LOAD_UMAX_I8: 5621 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 5622 case ARM::ATOMIC_LOAD_UMAX_I16: 5623 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 5624 case ARM::ATOMIC_LOAD_UMAX_I32: 5625 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 5626 5627 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 5628 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 5629 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 5630 5631 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 5632 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 5633 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 5634 5635 5636 case ARM::ATOMADD6432: 5637 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 5638 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 5639 /*NeedsCarry*/ true); 5640 case ARM::ATOMSUB6432: 5641 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 5642 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 5643 /*NeedsCarry*/ true); 5644 case ARM::ATOMOR6432: 5645 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 5646 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 5647 case ARM::ATOMXOR6432: 5648 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 5649 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 5650 case ARM::ATOMAND6432: 5651 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 5652 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 5653 case ARM::ATOMSWAP6432: 5654 return EmitAtomicBinary64(MI, BB, 0, 0, false); 5655 case ARM::ATOMCMPXCHG6432: 5656 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 5657 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 5658 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 5659 5660 case ARM::tMOVCCr_pseudo: { 5661 // To "insert" a SELECT_CC instruction, we actually have to insert the 5662 // diamond control-flow pattern. The incoming instruction knows the 5663 // destination vreg to set, the condition code register to branch on, the 5664 // true/false values to select between, and a branch opcode to use. 5665 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5666 MachineFunction::iterator It = BB; 5667 ++It; 5668 5669 // thisMBB: 5670 // ... 5671 // TrueVal = ... 5672 // cmpTY ccX, r1, r2 5673 // bCC copy1MBB 5674 // fallthrough --> copy0MBB 5675 MachineBasicBlock *thisMBB = BB; 5676 MachineFunction *F = BB->getParent(); 5677 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 5678 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 5679 F->insert(It, copy0MBB); 5680 F->insert(It, sinkMBB); 5681 5682 // Transfer the remainder of BB and its successor edges to sinkMBB. 5683 sinkMBB->splice(sinkMBB->begin(), BB, 5684 llvm::next(MachineBasicBlock::iterator(MI)), 5685 BB->end()); 5686 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 5687 5688 BB->addSuccessor(copy0MBB); 5689 BB->addSuccessor(sinkMBB); 5690 5691 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 5692 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 5693 5694 // copy0MBB: 5695 // %FalseValue = ... 5696 // # fallthrough to sinkMBB 5697 BB = copy0MBB; 5698 5699 // Update machine-CFG edges 5700 BB->addSuccessor(sinkMBB); 5701 5702 // sinkMBB: 5703 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5704 // ... 5705 BB = sinkMBB; 5706 BuildMI(*BB, BB->begin(), dl, 5707 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 5708 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5709 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5710 5711 MI->eraseFromParent(); // The pseudo instruction is gone now. 5712 return BB; 5713 } 5714 5715 case ARM::BCCi64: 5716 case ARM::BCCZi64: { 5717 // If there is an unconditional branch to the other successor, remove it. 5718 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 5719 5720 // Compare both parts that make up the double comparison separately for 5721 // equality. 5722 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 5723 5724 unsigned LHS1 = MI->getOperand(1).getReg(); 5725 unsigned LHS2 = MI->getOperand(2).getReg(); 5726 if (RHSisZero) { 5727 AddDefaultPred(BuildMI(BB, dl, 5728 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5729 .addReg(LHS1).addImm(0)); 5730 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5731 .addReg(LHS2).addImm(0) 5732 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 5733 } else { 5734 unsigned RHS1 = MI->getOperand(3).getReg(); 5735 unsigned RHS2 = MI->getOperand(4).getReg(); 5736 AddDefaultPred(BuildMI(BB, dl, 5737 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5738 .addReg(LHS1).addReg(RHS1)); 5739 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5740 .addReg(LHS2).addReg(RHS2) 5741 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 5742 } 5743 5744 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 5745 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 5746 if (MI->getOperand(0).getImm() == ARMCC::NE) 5747 std::swap(destMBB, exitMBB); 5748 5749 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5750 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 5751 if (isThumb2) 5752 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 5753 else 5754 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 5755 5756 MI->eraseFromParent(); // The pseudo instruction is gone now. 5757 return BB; 5758 } 5759 } 5760 } 5761 5762 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 5763 SDNode *Node) const { 5764 const MCInstrDesc &MCID = MI->getDesc(); 5765 if (!MCID.hasPostISelHook()) { 5766 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 5767 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 5768 return; 5769 } 5770 5771 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 5772 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 5773 // operand is still set to noreg. If needed, set the optional operand's 5774 // register to CPSR, and remove the redundant implicit def. 5775 // 5776 // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 5777 5778 // Rename pseudo opcodes. 5779 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 5780 if (NewOpc) { 5781 const ARMBaseInstrInfo *TII = 5782 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 5783 MI->setDesc(TII->get(NewOpc)); 5784 } 5785 unsigned ccOutIdx = MCID.getNumOperands() - 1; 5786 5787 // Any ARM instruction that sets the 's' bit should specify an optional 5788 // "cc_out" operand in the last operand position. 5789 if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) { 5790 assert(!NewOpc && "Optional cc_out operand required"); 5791 return; 5792 } 5793 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 5794 // since we already have an optional CPSR def. 5795 bool definesCPSR = false; 5796 bool deadCPSR = false; 5797 for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands(); 5798 i != e; ++i) { 5799 const MachineOperand &MO = MI->getOperand(i); 5800 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 5801 definesCPSR = true; 5802 if (MO.isDead()) 5803 deadCPSR = true; 5804 MI->RemoveOperand(i); 5805 break; 5806 } 5807 } 5808 if (!definesCPSR) { 5809 assert(!NewOpc && "Optional cc_out operand required"); 5810 return; 5811 } 5812 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 5813 if (deadCPSR) { 5814 assert(!MI->getOperand(ccOutIdx).getReg() && 5815 "expect uninitialized optional cc_out operand"); 5816 return; 5817 } 5818 5819 // If this instruction was defined with an optional CPSR def and its dag node 5820 // had a live implicit CPSR def, then activate the optional CPSR def. 5821 MachineOperand &MO = MI->getOperand(ccOutIdx); 5822 MO.setReg(ARM::CPSR); 5823 MO.setIsDef(true); 5824 } 5825 5826 //===----------------------------------------------------------------------===// 5827 // ARM Optimization Hooks 5828 //===----------------------------------------------------------------------===// 5829 5830 static 5831 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 5832 TargetLowering::DAGCombinerInfo &DCI) { 5833 SelectionDAG &DAG = DCI.DAG; 5834 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5835 EVT VT = N->getValueType(0); 5836 unsigned Opc = N->getOpcode(); 5837 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 5838 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 5839 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 5840 ISD::CondCode CC = ISD::SETCC_INVALID; 5841 5842 if (isSlctCC) { 5843 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 5844 } else { 5845 SDValue CCOp = Slct.getOperand(0); 5846 if (CCOp.getOpcode() == ISD::SETCC) 5847 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 5848 } 5849 5850 bool DoXform = false; 5851 bool InvCC = false; 5852 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 5853 "Bad input!"); 5854 5855 if (LHS.getOpcode() == ISD::Constant && 5856 cast<ConstantSDNode>(LHS)->isNullValue()) { 5857 DoXform = true; 5858 } else if (CC != ISD::SETCC_INVALID && 5859 RHS.getOpcode() == ISD::Constant && 5860 cast<ConstantSDNode>(RHS)->isNullValue()) { 5861 std::swap(LHS, RHS); 5862 SDValue Op0 = Slct.getOperand(0); 5863 EVT OpVT = isSlctCC ? Op0.getValueType() : 5864 Op0.getOperand(0).getValueType(); 5865 bool isInt = OpVT.isInteger(); 5866 CC = ISD::getSetCCInverse(CC, isInt); 5867 5868 if (!TLI.isCondCodeLegal(CC, OpVT)) 5869 return SDValue(); // Inverse operator isn't legal. 5870 5871 DoXform = true; 5872 InvCC = true; 5873 } 5874 5875 if (DoXform) { 5876 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 5877 if (isSlctCC) 5878 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 5879 Slct.getOperand(0), Slct.getOperand(1), CC); 5880 SDValue CCOp = Slct.getOperand(0); 5881 if (InvCC) 5882 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 5883 CCOp.getOperand(0), CCOp.getOperand(1), CC); 5884 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 5885 CCOp, OtherOp, Result); 5886 } 5887 return SDValue(); 5888 } 5889 5890 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 5891 // (only after legalization). 5892 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 5893 TargetLowering::DAGCombinerInfo &DCI, 5894 const ARMSubtarget *Subtarget) { 5895 5896 // Only perform optimization if after legalize, and if NEON is available. We 5897 // also expected both operands to be BUILD_VECTORs. 5898 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 5899 || N0.getOpcode() != ISD::BUILD_VECTOR 5900 || N1.getOpcode() != ISD::BUILD_VECTOR) 5901 return SDValue(); 5902 5903 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 5904 EVT VT = N->getValueType(0); 5905 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 5906 return SDValue(); 5907 5908 // Check that the vector operands are of the right form. 5909 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 5910 // operands, where N is the size of the formed vector. 5911 // Each EXTRACT_VECTOR should have the same input vector and odd or even 5912 // index such that we have a pair wise add pattern. 5913 5914 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 5915 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 5916 return SDValue(); 5917 SDValue Vec = N0->getOperand(0)->getOperand(0); 5918 SDNode *V = Vec.getNode(); 5919 unsigned nextIndex = 0; 5920 5921 // For each operands to the ADD which are BUILD_VECTORs, 5922 // check to see if each of their operands are an EXTRACT_VECTOR with 5923 // the same vector and appropriate index. 5924 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 5925 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 5926 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 5927 5928 SDValue ExtVec0 = N0->getOperand(i); 5929 SDValue ExtVec1 = N1->getOperand(i); 5930 5931 // First operand is the vector, verify its the same. 5932 if (V != ExtVec0->getOperand(0).getNode() || 5933 V != ExtVec1->getOperand(0).getNode()) 5934 return SDValue(); 5935 5936 // Second is the constant, verify its correct. 5937 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 5938 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 5939 5940 // For the constant, we want to see all the even or all the odd. 5941 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 5942 || C1->getZExtValue() != nextIndex+1) 5943 return SDValue(); 5944 5945 // Increment index. 5946 nextIndex+=2; 5947 } else 5948 return SDValue(); 5949 } 5950 5951 // Create VPADDL node. 5952 SelectionDAG &DAG = DCI.DAG; 5953 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5954 5955 // Build operand list. 5956 SmallVector<SDValue, 8> Ops; 5957 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 5958 TLI.getPointerTy())); 5959 5960 // Input is the vector. 5961 Ops.push_back(Vec); 5962 5963 // Get widened type and narrowed type. 5964 MVT widenType; 5965 unsigned numElem = VT.getVectorNumElements(); 5966 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 5967 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 5968 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 5969 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 5970 default: 5971 assert(0 && "Invalid vector element type for padd optimization."); 5972 } 5973 5974 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 5975 widenType, &Ops[0], Ops.size()); 5976 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); 5977 } 5978 5979 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 5980 /// operands N0 and N1. This is a helper for PerformADDCombine that is 5981 /// called with the default operands, and if that fails, with commuted 5982 /// operands. 5983 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 5984 TargetLowering::DAGCombinerInfo &DCI, 5985 const ARMSubtarget *Subtarget){ 5986 5987 // Attempt to create vpaddl for this add. 5988 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 5989 if (Result.getNode()) 5990 return Result; 5991 5992 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 5993 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 5994 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 5995 if (Result.getNode()) return Result; 5996 } 5997 return SDValue(); 5998 } 5999 6000 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 6001 /// 6002 static SDValue PerformADDCombine(SDNode *N, 6003 TargetLowering::DAGCombinerInfo &DCI, 6004 const ARMSubtarget *Subtarget) { 6005 SDValue N0 = N->getOperand(0); 6006 SDValue N1 = N->getOperand(1); 6007 6008 // First try with the default operand order. 6009 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 6010 if (Result.getNode()) 6011 return Result; 6012 6013 // If that didn't work, try again with the operands commuted. 6014 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 6015 } 6016 6017 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 6018 /// 6019 static SDValue PerformSUBCombine(SDNode *N, 6020 TargetLowering::DAGCombinerInfo &DCI) { 6021 SDValue N0 = N->getOperand(0); 6022 SDValue N1 = N->getOperand(1); 6023 6024 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 6025 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 6026 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 6027 if (Result.getNode()) return Result; 6028 } 6029 6030 return SDValue(); 6031 } 6032 6033 /// PerformVMULCombine 6034 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 6035 /// special multiplier accumulator forwarding. 6036 /// vmul d3, d0, d2 6037 /// vmla d3, d1, d2 6038 /// is faster than 6039 /// vadd d3, d0, d1 6040 /// vmul d3, d3, d2 6041 static SDValue PerformVMULCombine(SDNode *N, 6042 TargetLowering::DAGCombinerInfo &DCI, 6043 const ARMSubtarget *Subtarget) { 6044 if (!Subtarget->hasVMLxForwarding()) 6045 return SDValue(); 6046 6047 SelectionDAG &DAG = DCI.DAG; 6048 SDValue N0 = N->getOperand(0); 6049 SDValue N1 = N->getOperand(1); 6050 unsigned Opcode = N0.getOpcode(); 6051 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 6052 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 6053 Opcode = N1.getOpcode(); 6054 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 6055 Opcode != ISD::FADD && Opcode != ISD::FSUB) 6056 return SDValue(); 6057 std::swap(N0, N1); 6058 } 6059 6060 EVT VT = N->getValueType(0); 6061 DebugLoc DL = N->getDebugLoc(); 6062 SDValue N00 = N0->getOperand(0); 6063 SDValue N01 = N0->getOperand(1); 6064 return DAG.getNode(Opcode, DL, VT, 6065 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 6066 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 6067 } 6068 6069 static SDValue PerformMULCombine(SDNode *N, 6070 TargetLowering::DAGCombinerInfo &DCI, 6071 const ARMSubtarget *Subtarget) { 6072 SelectionDAG &DAG = DCI.DAG; 6073 6074 if (Subtarget->isThumb1Only()) 6075 return SDValue(); 6076 6077 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 6078 return SDValue(); 6079 6080 EVT VT = N->getValueType(0); 6081 if (VT.is64BitVector() || VT.is128BitVector()) 6082 return PerformVMULCombine(N, DCI, Subtarget); 6083 if (VT != MVT::i32) 6084 return SDValue(); 6085 6086 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6087 if (!C) 6088 return SDValue(); 6089 6090 uint64_t MulAmt = C->getZExtValue(); 6091 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 6092 ShiftAmt = ShiftAmt & (32 - 1); 6093 SDValue V = N->getOperand(0); 6094 DebugLoc DL = N->getDebugLoc(); 6095 6096 SDValue Res; 6097 MulAmt >>= ShiftAmt; 6098 if (isPowerOf2_32(MulAmt - 1)) { 6099 // (mul x, 2^N + 1) => (add (shl x, N), x) 6100 Res = DAG.getNode(ISD::ADD, DL, VT, 6101 V, DAG.getNode(ISD::SHL, DL, VT, 6102 V, DAG.getConstant(Log2_32(MulAmt-1), 6103 MVT::i32))); 6104 } else if (isPowerOf2_32(MulAmt + 1)) { 6105 // (mul x, 2^N - 1) => (sub (shl x, N), x) 6106 Res = DAG.getNode(ISD::SUB, DL, VT, 6107 DAG.getNode(ISD::SHL, DL, VT, 6108 V, DAG.getConstant(Log2_32(MulAmt+1), 6109 MVT::i32)), 6110 V); 6111 } else 6112 return SDValue(); 6113 6114 if (ShiftAmt != 0) 6115 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 6116 DAG.getConstant(ShiftAmt, MVT::i32)); 6117 6118 // Do not add new nodes to DAG combiner worklist. 6119 DCI.CombineTo(N, Res, false); 6120 return SDValue(); 6121 } 6122 6123 static SDValue PerformANDCombine(SDNode *N, 6124 TargetLowering::DAGCombinerInfo &DCI) { 6125 6126 // Attempt to use immediate-form VBIC 6127 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 6128 DebugLoc dl = N->getDebugLoc(); 6129 EVT VT = N->getValueType(0); 6130 SelectionDAG &DAG = DCI.DAG; 6131 6132 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 6133 return SDValue(); 6134 6135 APInt SplatBits, SplatUndef; 6136 unsigned SplatBitSize; 6137 bool HasAnyUndefs; 6138 if (BVN && 6139 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 6140 if (SplatBitSize <= 64) { 6141 EVT VbicVT; 6142 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 6143 SplatUndef.getZExtValue(), SplatBitSize, 6144 DAG, VbicVT, VT.is128BitVector(), 6145 OtherModImm); 6146 if (Val.getNode()) { 6147 SDValue Input = 6148 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 6149 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 6150 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 6151 } 6152 } 6153 } 6154 6155 return SDValue(); 6156 } 6157 6158 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR 6159 static SDValue PerformORCombine(SDNode *N, 6160 TargetLowering::DAGCombinerInfo &DCI, 6161 const ARMSubtarget *Subtarget) { 6162 // Attempt to use immediate-form VORR 6163 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 6164 DebugLoc dl = N->getDebugLoc(); 6165 EVT VT = N->getValueType(0); 6166 SelectionDAG &DAG = DCI.DAG; 6167 6168 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 6169 return SDValue(); 6170 6171 APInt SplatBits, SplatUndef; 6172 unsigned SplatBitSize; 6173 bool HasAnyUndefs; 6174 if (BVN && Subtarget->hasNEON() && 6175 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 6176 if (SplatBitSize <= 64) { 6177 EVT VorrVT; 6178 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 6179 SplatUndef.getZExtValue(), SplatBitSize, 6180 DAG, VorrVT, VT.is128BitVector(), 6181 OtherModImm); 6182 if (Val.getNode()) { 6183 SDValue Input = 6184 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 6185 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 6186 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 6187 } 6188 } 6189 } 6190 6191 SDValue N0 = N->getOperand(0); 6192 if (N0.getOpcode() != ISD::AND) 6193 return SDValue(); 6194 SDValue N1 = N->getOperand(1); 6195 6196 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 6197 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 6198 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 6199 APInt SplatUndef; 6200 unsigned SplatBitSize; 6201 bool HasAnyUndefs; 6202 6203 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 6204 APInt SplatBits0; 6205 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 6206 HasAnyUndefs) && !HasAnyUndefs) { 6207 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 6208 APInt SplatBits1; 6209 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 6210 HasAnyUndefs) && !HasAnyUndefs && 6211 SplatBits0 == ~SplatBits1) { 6212 // Canonicalize the vector type to make instruction selection simpler. 6213 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 6214 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 6215 N0->getOperand(1), N0->getOperand(0), 6216 N1->getOperand(0)); 6217 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 6218 } 6219 } 6220 } 6221 6222 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 6223 // reasonable. 6224 6225 // BFI is only available on V6T2+ 6226 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 6227 return SDValue(); 6228 6229 DebugLoc DL = N->getDebugLoc(); 6230 // 1) or (and A, mask), val => ARMbfi A, val, mask 6231 // iff (val & mask) == val 6232 // 6233 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 6234 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 6235 // && mask == ~mask2 6236 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 6237 // && ~mask == mask2 6238 // (i.e., copy a bitfield value into another bitfield of the same width) 6239 6240 if (VT != MVT::i32) 6241 return SDValue(); 6242 6243 SDValue N00 = N0.getOperand(0); 6244 6245 // The value and the mask need to be constants so we can verify this is 6246 // actually a bitfield set. If the mask is 0xffff, we can do better 6247 // via a movt instruction, so don't use BFI in that case. 6248 SDValue MaskOp = N0.getOperand(1); 6249 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 6250 if (!MaskC) 6251 return SDValue(); 6252 unsigned Mask = MaskC->getZExtValue(); 6253 if (Mask == 0xffff) 6254 return SDValue(); 6255 SDValue Res; 6256 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 6257 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 6258 if (N1C) { 6259 unsigned Val = N1C->getZExtValue(); 6260 if ((Val & ~Mask) != Val) 6261 return SDValue(); 6262 6263 if (ARM::isBitFieldInvertedMask(Mask)) { 6264 Val >>= CountTrailingZeros_32(~Mask); 6265 6266 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 6267 DAG.getConstant(Val, MVT::i32), 6268 DAG.getConstant(Mask, MVT::i32)); 6269 6270 // Do not add new nodes to DAG combiner worklist. 6271 DCI.CombineTo(N, Res, false); 6272 return SDValue(); 6273 } 6274 } else if (N1.getOpcode() == ISD::AND) { 6275 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 6276 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 6277 if (!N11C) 6278 return SDValue(); 6279 unsigned Mask2 = N11C->getZExtValue(); 6280 6281 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 6282 // as is to match. 6283 if (ARM::isBitFieldInvertedMask(Mask) && 6284 (Mask == ~Mask2)) { 6285 // The pack halfword instruction works better for masks that fit it, 6286 // so use that when it's available. 6287 if (Subtarget->hasT2ExtractPack() && 6288 (Mask == 0xffff || Mask == 0xffff0000)) 6289 return SDValue(); 6290 // 2a 6291 unsigned amt = CountTrailingZeros_32(Mask2); 6292 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 6293 DAG.getConstant(amt, MVT::i32)); 6294 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 6295 DAG.getConstant(Mask, MVT::i32)); 6296 // Do not add new nodes to DAG combiner worklist. 6297 DCI.CombineTo(N, Res, false); 6298 return SDValue(); 6299 } else if (ARM::isBitFieldInvertedMask(~Mask) && 6300 (~Mask == Mask2)) { 6301 // The pack halfword instruction works better for masks that fit it, 6302 // so use that when it's available. 6303 if (Subtarget->hasT2ExtractPack() && 6304 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 6305 return SDValue(); 6306 // 2b 6307 unsigned lsb = CountTrailingZeros_32(Mask); 6308 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 6309 DAG.getConstant(lsb, MVT::i32)); 6310 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 6311 DAG.getConstant(Mask2, MVT::i32)); 6312 // Do not add new nodes to DAG combiner worklist. 6313 DCI.CombineTo(N, Res, false); 6314 return SDValue(); 6315 } 6316 } 6317 6318 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 6319 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 6320 ARM::isBitFieldInvertedMask(~Mask)) { 6321 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 6322 // where lsb(mask) == #shamt and masked bits of B are known zero. 6323 SDValue ShAmt = N00.getOperand(1); 6324 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 6325 unsigned LSB = CountTrailingZeros_32(Mask); 6326 if (ShAmtC != LSB) 6327 return SDValue(); 6328 6329 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 6330 DAG.getConstant(~Mask, MVT::i32)); 6331 6332 // Do not add new nodes to DAG combiner worklist. 6333 DCI.CombineTo(N, Res, false); 6334 } 6335 6336 return SDValue(); 6337 } 6338 6339 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 6340 /// the bits being cleared by the AND are not demanded by the BFI. 6341 static SDValue PerformBFICombine(SDNode *N, 6342 TargetLowering::DAGCombinerInfo &DCI) { 6343 SDValue N1 = N->getOperand(1); 6344 if (N1.getOpcode() == ISD::AND) { 6345 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 6346 if (!N11C) 6347 return SDValue(); 6348 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 6349 unsigned LSB = CountTrailingZeros_32(~InvMask); 6350 unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; 6351 unsigned Mask = (1 << Width)-1; 6352 unsigned Mask2 = N11C->getZExtValue(); 6353 if ((Mask & (~Mask2)) == 0) 6354 return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), 6355 N->getOperand(0), N1.getOperand(0), 6356 N->getOperand(2)); 6357 } 6358 return SDValue(); 6359 } 6360 6361 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for 6362 /// ARMISD::VMOVRRD. 6363 static SDValue PerformVMOVRRDCombine(SDNode *N, 6364 TargetLowering::DAGCombinerInfo &DCI) { 6365 // vmovrrd(vmovdrr x, y) -> x,y 6366 SDValue InDouble = N->getOperand(0); 6367 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 6368 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 6369 6370 // vmovrrd(load f64) -> (load i32), (load i32) 6371 SDNode *InNode = InDouble.getNode(); 6372 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 6373 InNode->getValueType(0) == MVT::f64 && 6374 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 6375 !cast<LoadSDNode>(InNode)->isVolatile()) { 6376 // TODO: Should this be done for non-FrameIndex operands? 6377 LoadSDNode *LD = cast<LoadSDNode>(InNode); 6378 6379 SelectionDAG &DAG = DCI.DAG; 6380 DebugLoc DL = LD->getDebugLoc(); 6381 SDValue BasePtr = LD->getBasePtr(); 6382 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 6383 LD->getPointerInfo(), LD->isVolatile(), 6384 LD->isNonTemporal(), LD->getAlignment()); 6385 6386 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 6387 DAG.getConstant(4, MVT::i32)); 6388 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 6389 LD->getPointerInfo(), LD->isVolatile(), 6390 LD->isNonTemporal(), 6391 std::min(4U, LD->getAlignment() / 2)); 6392 6393 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 6394 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 6395 DCI.RemoveFromWorklist(LD); 6396 DAG.DeleteNode(LD); 6397 return Result; 6398 } 6399 6400 return SDValue(); 6401 } 6402 6403 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for 6404 /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 6405 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 6406 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 6407 SDValue Op0 = N->getOperand(0); 6408 SDValue Op1 = N->getOperand(1); 6409 if (Op0.getOpcode() == ISD::BITCAST) 6410 Op0 = Op0.getOperand(0); 6411 if (Op1.getOpcode() == ISD::BITCAST) 6412 Op1 = Op1.getOperand(0); 6413 if (Op0.getOpcode() == ARMISD::VMOVRRD && 6414 Op0.getNode() == Op1.getNode() && 6415 Op0.getResNo() == 0 && Op1.getResNo() == 1) 6416 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 6417 N->getValueType(0), Op0.getOperand(0)); 6418 return SDValue(); 6419 } 6420 6421 /// PerformSTORECombine - Target-specific dag combine xforms for 6422 /// ISD::STORE. 6423 static SDValue PerformSTORECombine(SDNode *N, 6424 TargetLowering::DAGCombinerInfo &DCI) { 6425 // Bitcast an i64 store extracted from a vector to f64. 6426 // Otherwise, the i64 value will be legalized to a pair of i32 values. 6427 StoreSDNode *St = cast<StoreSDNode>(N); 6428 SDValue StVal = St->getValue(); 6429 if (!ISD::isNormalStore(St) || St->isVolatile()) 6430 return SDValue(); 6431 6432 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 6433 StVal.getNode()->hasOneUse() && !St->isVolatile()) { 6434 SelectionDAG &DAG = DCI.DAG; 6435 DebugLoc DL = St->getDebugLoc(); 6436 SDValue BasePtr = St->getBasePtr(); 6437 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 6438 StVal.getNode()->getOperand(0), BasePtr, 6439 St->getPointerInfo(), St->isVolatile(), 6440 St->isNonTemporal(), St->getAlignment()); 6441 6442 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 6443 DAG.getConstant(4, MVT::i32)); 6444 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 6445 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 6446 St->isNonTemporal(), 6447 std::min(4U, St->getAlignment() / 2)); 6448 } 6449 6450 if (StVal.getValueType() != MVT::i64 || 6451 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 6452 return SDValue(); 6453 6454 SelectionDAG &DAG = DCI.DAG; 6455 DebugLoc dl = StVal.getDebugLoc(); 6456 SDValue IntVec = StVal.getOperand(0); 6457 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 6458 IntVec.getValueType().getVectorNumElements()); 6459 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 6460 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 6461 Vec, StVal.getOperand(1)); 6462 dl = N->getDebugLoc(); 6463 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 6464 // Make the DAGCombiner fold the bitcasts. 6465 DCI.AddToWorklist(Vec.getNode()); 6466 DCI.AddToWorklist(ExtElt.getNode()); 6467 DCI.AddToWorklist(V.getNode()); 6468 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 6469 St->getPointerInfo(), St->isVolatile(), 6470 St->isNonTemporal(), St->getAlignment(), 6471 St->getTBAAInfo()); 6472 } 6473 6474 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 6475 /// are normal, non-volatile loads. If so, it is profitable to bitcast an 6476 /// i64 vector to have f64 elements, since the value can then be loaded 6477 /// directly into a VFP register. 6478 static bool hasNormalLoadOperand(SDNode *N) { 6479 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 6480 for (unsigned i = 0; i < NumElts; ++i) { 6481 SDNode *Elt = N->getOperand(i).getNode(); 6482 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 6483 return true; 6484 } 6485 return false; 6486 } 6487 6488 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 6489 /// ISD::BUILD_VECTOR. 6490 static SDValue PerformBUILD_VECTORCombine(SDNode *N, 6491 TargetLowering::DAGCombinerInfo &DCI){ 6492 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 6493 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 6494 // into a pair of GPRs, which is fine when the value is used as a scalar, 6495 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 6496 SelectionDAG &DAG = DCI.DAG; 6497 if (N->getNumOperands() == 2) { 6498 SDValue RV = PerformVMOVDRRCombine(N, DAG); 6499 if (RV.getNode()) 6500 return RV; 6501 } 6502 6503 // Load i64 elements as f64 values so that type legalization does not split 6504 // them up into i32 values. 6505 EVT VT = N->getValueType(0); 6506 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 6507 return SDValue(); 6508 DebugLoc dl = N->getDebugLoc(); 6509 SmallVector<SDValue, 8> Ops; 6510 unsigned NumElts = VT.getVectorNumElements(); 6511 for (unsigned i = 0; i < NumElts; ++i) { 6512 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 6513 Ops.push_back(V); 6514 // Make the DAGCombiner fold the bitcast. 6515 DCI.AddToWorklist(V.getNode()); 6516 } 6517 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 6518 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 6519 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 6520 } 6521 6522 /// PerformInsertEltCombine - Target-specific dag combine xforms for 6523 /// ISD::INSERT_VECTOR_ELT. 6524 static SDValue PerformInsertEltCombine(SDNode *N, 6525 TargetLowering::DAGCombinerInfo &DCI) { 6526 // Bitcast an i64 load inserted into a vector to f64. 6527 // Otherwise, the i64 value will be legalized to a pair of i32 values. 6528 EVT VT = N->getValueType(0); 6529 SDNode *Elt = N->getOperand(1).getNode(); 6530 if (VT.getVectorElementType() != MVT::i64 || 6531 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 6532 return SDValue(); 6533 6534 SelectionDAG &DAG = DCI.DAG; 6535 DebugLoc dl = N->getDebugLoc(); 6536 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 6537 VT.getVectorNumElements()); 6538 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 6539 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 6540 // Make the DAGCombiner fold the bitcasts. 6541 DCI.AddToWorklist(Vec.getNode()); 6542 DCI.AddToWorklist(V.getNode()); 6543 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 6544 Vec, V, N->getOperand(2)); 6545 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 6546 } 6547 6548 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 6549 /// ISD::VECTOR_SHUFFLE. 6550 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 6551 // The LLVM shufflevector instruction does not require the shuffle mask 6552 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 6553 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 6554 // operands do not match the mask length, they are extended by concatenating 6555 // them with undef vectors. That is probably the right thing for other 6556 // targets, but for NEON it is better to concatenate two double-register 6557 // size vector operands into a single quad-register size vector. Do that 6558 // transformation here: 6559 // shuffle(concat(v1, undef), concat(v2, undef)) -> 6560 // shuffle(concat(v1, v2), undef) 6561 SDValue Op0 = N->getOperand(0); 6562 SDValue Op1 = N->getOperand(1); 6563 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 6564 Op1.getOpcode() != ISD::CONCAT_VECTORS || 6565 Op0.getNumOperands() != 2 || 6566 Op1.getNumOperands() != 2) 6567 return SDValue(); 6568 SDValue Concat0Op1 = Op0.getOperand(1); 6569 SDValue Concat1Op1 = Op1.getOperand(1); 6570 if (Concat0Op1.getOpcode() != ISD::UNDEF || 6571 Concat1Op1.getOpcode() != ISD::UNDEF) 6572 return SDValue(); 6573 // Skip the transformation if any of the types are illegal. 6574 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6575 EVT VT = N->getValueType(0); 6576 if (!TLI.isTypeLegal(VT) || 6577 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 6578 !TLI.isTypeLegal(Concat1Op1.getValueType())) 6579 return SDValue(); 6580 6581 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 6582 Op0.getOperand(0), Op1.getOperand(0)); 6583 // Translate the shuffle mask. 6584 SmallVector<int, 16> NewMask; 6585 unsigned NumElts = VT.getVectorNumElements(); 6586 unsigned HalfElts = NumElts/2; 6587 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 6588 for (unsigned n = 0; n < NumElts; ++n) { 6589 int MaskElt = SVN->getMaskElt(n); 6590 int NewElt = -1; 6591 if (MaskElt < (int)HalfElts) 6592 NewElt = MaskElt; 6593 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 6594 NewElt = HalfElts + MaskElt - NumElts; 6595 NewMask.push_back(NewElt); 6596 } 6597 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, 6598 DAG.getUNDEF(VT), NewMask.data()); 6599 } 6600 6601 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 6602 /// NEON load/store intrinsics to merge base address updates. 6603 static SDValue CombineBaseUpdate(SDNode *N, 6604 TargetLowering::DAGCombinerInfo &DCI) { 6605 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 6606 return SDValue(); 6607 6608 SelectionDAG &DAG = DCI.DAG; 6609 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 6610 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 6611 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 6612 SDValue Addr = N->getOperand(AddrOpIdx); 6613 6614 // Search for a use of the address operand that is an increment. 6615 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 6616 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 6617 SDNode *User = *UI; 6618 if (User->getOpcode() != ISD::ADD || 6619 UI.getUse().getResNo() != Addr.getResNo()) 6620 continue; 6621 6622 // Check that the add is independent of the load/store. Otherwise, folding 6623 // it would create a cycle. 6624 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 6625 continue; 6626 6627 // Find the new opcode for the updating load/store. 6628 bool isLoad = true; 6629 bool isLaneOp = false; 6630 unsigned NewOpc = 0; 6631 unsigned NumVecs = 0; 6632 if (isIntrinsic) { 6633 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 6634 switch (IntNo) { 6635 default: assert(0 && "unexpected intrinsic for Neon base update"); 6636 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 6637 NumVecs = 1; break; 6638 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 6639 NumVecs = 2; break; 6640 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 6641 NumVecs = 3; break; 6642 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 6643 NumVecs = 4; break; 6644 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 6645 NumVecs = 2; isLaneOp = true; break; 6646 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 6647 NumVecs = 3; isLaneOp = true; break; 6648 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 6649 NumVecs = 4; isLaneOp = true; break; 6650 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 6651 NumVecs = 1; isLoad = false; break; 6652 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 6653 NumVecs = 2; isLoad = false; break; 6654 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 6655 NumVecs = 3; isLoad = false; break; 6656 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 6657 NumVecs = 4; isLoad = false; break; 6658 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 6659 NumVecs = 2; isLoad = false; isLaneOp = true; break; 6660 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 6661 NumVecs = 3; isLoad = false; isLaneOp = true; break; 6662 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 6663 NumVecs = 4; isLoad = false; isLaneOp = true; break; 6664 } 6665 } else { 6666 isLaneOp = true; 6667 switch (N->getOpcode()) { 6668 default: assert(0 && "unexpected opcode for Neon base update"); 6669 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 6670 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 6671 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 6672 } 6673 } 6674 6675 // Find the size of memory referenced by the load/store. 6676 EVT VecTy; 6677 if (isLoad) 6678 VecTy = N->getValueType(0); 6679 else 6680 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 6681 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 6682 if (isLaneOp) 6683 NumBytes /= VecTy.getVectorNumElements(); 6684 6685 // If the increment is a constant, it must match the memory ref size. 6686 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 6687 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 6688 uint64_t IncVal = CInc->getZExtValue(); 6689 if (IncVal != NumBytes) 6690 continue; 6691 } else if (NumBytes >= 3 * 16) { 6692 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 6693 // separate instructions that make it harder to use a non-constant update. 6694 continue; 6695 } 6696 6697 // Create the new updating load/store node. 6698 EVT Tys[6]; 6699 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 6700 unsigned n; 6701 for (n = 0; n < NumResultVecs; ++n) 6702 Tys[n] = VecTy; 6703 Tys[n++] = MVT::i32; 6704 Tys[n] = MVT::Other; 6705 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 6706 SmallVector<SDValue, 8> Ops; 6707 Ops.push_back(N->getOperand(0)); // incoming chain 6708 Ops.push_back(N->getOperand(AddrOpIdx)); 6709 Ops.push_back(Inc); 6710 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 6711 Ops.push_back(N->getOperand(i)); 6712 } 6713 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 6714 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, 6715 Ops.data(), Ops.size(), 6716 MemInt->getMemoryVT(), 6717 MemInt->getMemOperand()); 6718 6719 // Update the uses. 6720 std::vector<SDValue> NewResults; 6721 for (unsigned i = 0; i < NumResultVecs; ++i) { 6722 NewResults.push_back(SDValue(UpdN.getNode(), i)); 6723 } 6724 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 6725 DCI.CombineTo(N, NewResults); 6726 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 6727 6728 break; 6729 } 6730 return SDValue(); 6731 } 6732 6733 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 6734 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 6735 /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 6736 /// return true. 6737 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 6738 SelectionDAG &DAG = DCI.DAG; 6739 EVT VT = N->getValueType(0); 6740 // vldN-dup instructions only support 64-bit vectors for N > 1. 6741 if (!VT.is64BitVector()) 6742 return false; 6743 6744 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 6745 SDNode *VLD = N->getOperand(0).getNode(); 6746 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 6747 return false; 6748 unsigned NumVecs = 0; 6749 unsigned NewOpc = 0; 6750 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 6751 if (IntNo == Intrinsic::arm_neon_vld2lane) { 6752 NumVecs = 2; 6753 NewOpc = ARMISD::VLD2DUP; 6754 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 6755 NumVecs = 3; 6756 NewOpc = ARMISD::VLD3DUP; 6757 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 6758 NumVecs = 4; 6759 NewOpc = ARMISD::VLD4DUP; 6760 } else { 6761 return false; 6762 } 6763 6764 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 6765 // numbers match the load. 6766 unsigned VLDLaneNo = 6767 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 6768 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 6769 UI != UE; ++UI) { 6770 // Ignore uses of the chain result. 6771 if (UI.getUse().getResNo() == NumVecs) 6772 continue; 6773 SDNode *User = *UI; 6774 if (User->getOpcode() != ARMISD::VDUPLANE || 6775 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 6776 return false; 6777 } 6778 6779 // Create the vldN-dup node. 6780 EVT Tys[5]; 6781 unsigned n; 6782 for (n = 0; n < NumVecs; ++n) 6783 Tys[n] = VT; 6784 Tys[n] = MVT::Other; 6785 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 6786 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 6787 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 6788 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, 6789 Ops, 2, VLDMemInt->getMemoryVT(), 6790 VLDMemInt->getMemOperand()); 6791 6792 // Update the uses. 6793 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 6794 UI != UE; ++UI) { 6795 unsigned ResNo = UI.getUse().getResNo(); 6796 // Ignore uses of the chain result. 6797 if (ResNo == NumVecs) 6798 continue; 6799 SDNode *User = *UI; 6800 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 6801 } 6802 6803 // Now the vldN-lane intrinsic is dead except for its chain result. 6804 // Update uses of the chain. 6805 std::vector<SDValue> VLDDupResults; 6806 for (unsigned n = 0; n < NumVecs; ++n) 6807 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 6808 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 6809 DCI.CombineTo(VLD, VLDDupResults); 6810 6811 return true; 6812 } 6813 6814 /// PerformVDUPLANECombine - Target-specific dag combine xforms for 6815 /// ARMISD::VDUPLANE. 6816 static SDValue PerformVDUPLANECombine(SDNode *N, 6817 TargetLowering::DAGCombinerInfo &DCI) { 6818 SDValue Op = N->getOperand(0); 6819 6820 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 6821 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 6822 if (CombineVLDDUP(N, DCI)) 6823 return SDValue(N, 0); 6824 6825 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 6826 // redundant. Ignore bit_converts for now; element sizes are checked below. 6827 while (Op.getOpcode() == ISD::BITCAST) 6828 Op = Op.getOperand(0); 6829 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 6830 return SDValue(); 6831 6832 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 6833 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 6834 // The canonical VMOV for a zero vector uses a 32-bit element size. 6835 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 6836 unsigned EltBits; 6837 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 6838 EltSize = 8; 6839 EVT VT = N->getValueType(0); 6840 if (EltSize > VT.getVectorElementType().getSizeInBits()) 6841 return SDValue(); 6842 6843 return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); 6844 } 6845 6846 // isConstVecPow2 - Return true if each vector element is a power of 2, all 6847 // elements are the same constant, C, and Log2(C) ranges from 1 to 32. 6848 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 6849 { 6850 integerPart cN; 6851 integerPart c0 = 0; 6852 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 6853 I != E; I++) { 6854 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 6855 if (!C) 6856 return false; 6857 6858 bool isExact; 6859 APFloat APF = C->getValueAPF(); 6860 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 6861 != APFloat::opOK || !isExact) 6862 return false; 6863 6864 c0 = (I == 0) ? cN : c0; 6865 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 6866 return false; 6867 } 6868 C = c0; 6869 return true; 6870 } 6871 6872 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 6873 /// can replace combinations of VMUL and VCVT (floating-point to integer) 6874 /// when the VMUL has a constant operand that is a power of 2. 6875 /// 6876 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 6877 /// vmul.f32 d16, d17, d16 6878 /// vcvt.s32.f32 d16, d16 6879 /// becomes: 6880 /// vcvt.s32.f32 d16, d16, #3 6881 static SDValue PerformVCVTCombine(SDNode *N, 6882 TargetLowering::DAGCombinerInfo &DCI, 6883 const ARMSubtarget *Subtarget) { 6884 SelectionDAG &DAG = DCI.DAG; 6885 SDValue Op = N->getOperand(0); 6886 6887 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 6888 Op.getOpcode() != ISD::FMUL) 6889 return SDValue(); 6890 6891 uint64_t C; 6892 SDValue N0 = Op->getOperand(0); 6893 SDValue ConstVec = Op->getOperand(1); 6894 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 6895 6896 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 6897 !isConstVecPow2(ConstVec, isSigned, C)) 6898 return SDValue(); 6899 6900 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 6901 Intrinsic::arm_neon_vcvtfp2fxu; 6902 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 6903 N->getValueType(0), 6904 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 6905 DAG.getConstant(Log2_64(C), MVT::i32)); 6906 } 6907 6908 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 6909 /// can replace combinations of VCVT (integer to floating-point) and VDIV 6910 /// when the VDIV has a constant operand that is a power of 2. 6911 /// 6912 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 6913 /// vcvt.f32.s32 d16, d16 6914 /// vdiv.f32 d16, d17, d16 6915 /// becomes: 6916 /// vcvt.f32.s32 d16, d16, #3 6917 static SDValue PerformVDIVCombine(SDNode *N, 6918 TargetLowering::DAGCombinerInfo &DCI, 6919 const ARMSubtarget *Subtarget) { 6920 SelectionDAG &DAG = DCI.DAG; 6921 SDValue Op = N->getOperand(0); 6922 unsigned OpOpcode = Op.getNode()->getOpcode(); 6923 6924 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 6925 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 6926 return SDValue(); 6927 6928 uint64_t C; 6929 SDValue ConstVec = N->getOperand(1); 6930 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 6931 6932 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 6933 !isConstVecPow2(ConstVec, isSigned, C)) 6934 return SDValue(); 6935 6936 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 6937 Intrinsic::arm_neon_vcvtfxu2fp; 6938 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), 6939 Op.getValueType(), 6940 DAG.getConstant(IntrinsicOpcode, MVT::i32), 6941 Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); 6942 } 6943 6944 /// Getvshiftimm - Check if this is a valid build_vector for the immediate 6945 /// operand of a vector shift operation, where all the elements of the 6946 /// build_vector must have the same constant integer value. 6947 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 6948 // Ignore bit_converts. 6949 while (Op.getOpcode() == ISD::BITCAST) 6950 Op = Op.getOperand(0); 6951 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 6952 APInt SplatBits, SplatUndef; 6953 unsigned SplatBitSize; 6954 bool HasAnyUndefs; 6955 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 6956 HasAnyUndefs, ElementBits) || 6957 SplatBitSize > ElementBits) 6958 return false; 6959 Cnt = SplatBits.getSExtValue(); 6960 return true; 6961 } 6962 6963 /// isVShiftLImm - Check if this is a valid build_vector for the immediate 6964 /// operand of a vector shift left operation. That value must be in the range: 6965 /// 0 <= Value < ElementBits for a left shift; or 6966 /// 0 <= Value <= ElementBits for a long left shift. 6967 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 6968 assert(VT.isVector() && "vector shift count is not a vector type"); 6969 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 6970 if (! getVShiftImm(Op, ElementBits, Cnt)) 6971 return false; 6972 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 6973 } 6974 6975 /// isVShiftRImm - Check if this is a valid build_vector for the immediate 6976 /// operand of a vector shift right operation. For a shift opcode, the value 6977 /// is positive, but for an intrinsic the value count must be negative. The 6978 /// absolute value must be in the range: 6979 /// 1 <= |Value| <= ElementBits for a right shift; or 6980 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 6981 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 6982 int64_t &Cnt) { 6983 assert(VT.isVector() && "vector shift count is not a vector type"); 6984 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 6985 if (! getVShiftImm(Op, ElementBits, Cnt)) 6986 return false; 6987 if (isIntrinsic) 6988 Cnt = -Cnt; 6989 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 6990 } 6991 6992 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 6993 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 6994 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 6995 switch (IntNo) { 6996 default: 6997 // Don't do anything for most intrinsics. 6998 break; 6999 7000 // Vector shifts: check for immediate versions and lower them. 7001 // Note: This is done during DAG combining instead of DAG legalizing because 7002 // the build_vectors for 64-bit vector element shift counts are generally 7003 // not legal, and it is hard to see their values after they get legalized to 7004 // loads from a constant pool. 7005 case Intrinsic::arm_neon_vshifts: 7006 case Intrinsic::arm_neon_vshiftu: 7007 case Intrinsic::arm_neon_vshiftls: 7008 case Intrinsic::arm_neon_vshiftlu: 7009 case Intrinsic::arm_neon_vshiftn: 7010 case Intrinsic::arm_neon_vrshifts: 7011 case Intrinsic::arm_neon_vrshiftu: 7012 case Intrinsic::arm_neon_vrshiftn: 7013 case Intrinsic::arm_neon_vqshifts: 7014 case Intrinsic::arm_neon_vqshiftu: 7015 case Intrinsic::arm_neon_vqshiftsu: 7016 case Intrinsic::arm_neon_vqshiftns: 7017 case Intrinsic::arm_neon_vqshiftnu: 7018 case Intrinsic::arm_neon_vqshiftnsu: 7019 case Intrinsic::arm_neon_vqrshiftns: 7020 case Intrinsic::arm_neon_vqrshiftnu: 7021 case Intrinsic::arm_neon_vqrshiftnsu: { 7022 EVT VT = N->getOperand(1).getValueType(); 7023 int64_t Cnt; 7024 unsigned VShiftOpc = 0; 7025 7026 switch (IntNo) { 7027 case Intrinsic::arm_neon_vshifts: 7028 case Intrinsic::arm_neon_vshiftu: 7029 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 7030 VShiftOpc = ARMISD::VSHL; 7031 break; 7032 } 7033 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 7034 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 7035 ARMISD::VSHRs : ARMISD::VSHRu); 7036 break; 7037 } 7038 return SDValue(); 7039 7040 case Intrinsic::arm_neon_vshiftls: 7041 case Intrinsic::arm_neon_vshiftlu: 7042 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 7043 break; 7044 llvm_unreachable("invalid shift count for vshll intrinsic"); 7045 7046 case Intrinsic::arm_neon_vrshifts: 7047 case Intrinsic::arm_neon_vrshiftu: 7048 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 7049 break; 7050 return SDValue(); 7051 7052 case Intrinsic::arm_neon_vqshifts: 7053 case Intrinsic::arm_neon_vqshiftu: 7054 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 7055 break; 7056 return SDValue(); 7057 7058 case Intrinsic::arm_neon_vqshiftsu: 7059 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 7060 break; 7061 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 7062 7063 case Intrinsic::arm_neon_vshiftn: 7064 case Intrinsic::arm_neon_vrshiftn: 7065 case Intrinsic::arm_neon_vqshiftns: 7066 case Intrinsic::arm_neon_vqshiftnu: 7067 case Intrinsic::arm_neon_vqshiftnsu: 7068 case Intrinsic::arm_neon_vqrshiftns: 7069 case Intrinsic::arm_neon_vqrshiftnu: 7070 case Intrinsic::arm_neon_vqrshiftnsu: 7071 // Narrowing shifts require an immediate right shift. 7072 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 7073 break; 7074 llvm_unreachable("invalid shift count for narrowing vector shift " 7075 "intrinsic"); 7076 7077 default: 7078 llvm_unreachable("unhandled vector shift"); 7079 } 7080 7081 switch (IntNo) { 7082 case Intrinsic::arm_neon_vshifts: 7083 case Intrinsic::arm_neon_vshiftu: 7084 // Opcode already set above. 7085 break; 7086 case Intrinsic::arm_neon_vshiftls: 7087 case Intrinsic::arm_neon_vshiftlu: 7088 if (Cnt == VT.getVectorElementType().getSizeInBits()) 7089 VShiftOpc = ARMISD::VSHLLi; 7090 else 7091 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 7092 ARMISD::VSHLLs : ARMISD::VSHLLu); 7093 break; 7094 case Intrinsic::arm_neon_vshiftn: 7095 VShiftOpc = ARMISD::VSHRN; break; 7096 case Intrinsic::arm_neon_vrshifts: 7097 VShiftOpc = ARMISD::VRSHRs; break; 7098 case Intrinsic::arm_neon_vrshiftu: 7099 VShiftOpc = ARMISD::VRSHRu; break; 7100 case Intrinsic::arm_neon_vrshiftn: 7101 VShiftOpc = ARMISD::VRSHRN; break; 7102 case Intrinsic::arm_neon_vqshifts: 7103 VShiftOpc = ARMISD::VQSHLs; break; 7104 case Intrinsic::arm_neon_vqshiftu: 7105 VShiftOpc = ARMISD::VQSHLu; break; 7106 case Intrinsic::arm_neon_vqshiftsu: 7107 VShiftOpc = ARMISD::VQSHLsu; break; 7108 case Intrinsic::arm_neon_vqshiftns: 7109 VShiftOpc = ARMISD::VQSHRNs; break; 7110 case Intrinsic::arm_neon_vqshiftnu: 7111 VShiftOpc = ARMISD::VQSHRNu; break; 7112 case Intrinsic::arm_neon_vqshiftnsu: 7113 VShiftOpc = ARMISD::VQSHRNsu; break; 7114 case Intrinsic::arm_neon_vqrshiftns: 7115 VShiftOpc = ARMISD::VQRSHRNs; break; 7116 case Intrinsic::arm_neon_vqrshiftnu: 7117 VShiftOpc = ARMISD::VQRSHRNu; break; 7118 case Intrinsic::arm_neon_vqrshiftnsu: 7119 VShiftOpc = ARMISD::VQRSHRNsu; break; 7120 } 7121 7122 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 7123 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 7124 } 7125 7126 case Intrinsic::arm_neon_vshiftins: { 7127 EVT VT = N->getOperand(1).getValueType(); 7128 int64_t Cnt; 7129 unsigned VShiftOpc = 0; 7130 7131 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 7132 VShiftOpc = ARMISD::VSLI; 7133 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 7134 VShiftOpc = ARMISD::VSRI; 7135 else { 7136 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 7137 } 7138 7139 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 7140 N->getOperand(1), N->getOperand(2), 7141 DAG.getConstant(Cnt, MVT::i32)); 7142 } 7143 7144 case Intrinsic::arm_neon_vqrshifts: 7145 case Intrinsic::arm_neon_vqrshiftu: 7146 // No immediate versions of these to check for. 7147 break; 7148 } 7149 7150 return SDValue(); 7151 } 7152 7153 /// PerformShiftCombine - Checks for immediate versions of vector shifts and 7154 /// lowers them. As with the vector shift intrinsics, this is done during DAG 7155 /// combining instead of DAG legalizing because the build_vectors for 64-bit 7156 /// vector element shift counts are generally not legal, and it is hard to see 7157 /// their values after they get legalized to loads from a constant pool. 7158 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 7159 const ARMSubtarget *ST) { 7160 EVT VT = N->getValueType(0); 7161 7162 // Nothing to be done for scalar shifts. 7163 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7164 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 7165 return SDValue(); 7166 7167 assert(ST->hasNEON() && "unexpected vector shift"); 7168 int64_t Cnt; 7169 7170 switch (N->getOpcode()) { 7171 default: llvm_unreachable("unexpected shift opcode"); 7172 7173 case ISD::SHL: 7174 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 7175 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 7176 DAG.getConstant(Cnt, MVT::i32)); 7177 break; 7178 7179 case ISD::SRA: 7180 case ISD::SRL: 7181 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 7182 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 7183 ARMISD::VSHRs : ARMISD::VSHRu); 7184 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 7185 DAG.getConstant(Cnt, MVT::i32)); 7186 } 7187 } 7188 return SDValue(); 7189 } 7190 7191 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 7192 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 7193 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 7194 const ARMSubtarget *ST) { 7195 SDValue N0 = N->getOperand(0); 7196 7197 // Check for sign- and zero-extensions of vector extract operations of 8- 7198 // and 16-bit vector elements. NEON supports these directly. They are 7199 // handled during DAG combining because type legalization will promote them 7200 // to 32-bit types and it is messy to recognize the operations after that. 7201 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 7202 SDValue Vec = N0.getOperand(0); 7203 SDValue Lane = N0.getOperand(1); 7204 EVT VT = N->getValueType(0); 7205 EVT EltVT = N0.getValueType(); 7206 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7207 7208 if (VT == MVT::i32 && 7209 (EltVT == MVT::i8 || EltVT == MVT::i16) && 7210 TLI.isTypeLegal(Vec.getValueType()) && 7211 isa<ConstantSDNode>(Lane)) { 7212 7213 unsigned Opc = 0; 7214 switch (N->getOpcode()) { 7215 default: llvm_unreachable("unexpected opcode"); 7216 case ISD::SIGN_EXTEND: 7217 Opc = ARMISD::VGETLANEs; 7218 break; 7219 case ISD::ZERO_EXTEND: 7220 case ISD::ANY_EXTEND: 7221 Opc = ARMISD::VGETLANEu; 7222 break; 7223 } 7224 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 7225 } 7226 } 7227 7228 return SDValue(); 7229 } 7230 7231 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 7232 /// to match f32 max/min patterns to use NEON vmax/vmin instructions. 7233 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 7234 const ARMSubtarget *ST) { 7235 // If the target supports NEON, try to use vmax/vmin instructions for f32 7236 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 7237 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 7238 // a NaN; only do the transformation when it matches that behavior. 7239 7240 // For now only do this when using NEON for FP operations; if using VFP, it 7241 // is not obvious that the benefit outweighs the cost of switching to the 7242 // NEON pipeline. 7243 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 7244 N->getValueType(0) != MVT::f32) 7245 return SDValue(); 7246 7247 SDValue CondLHS = N->getOperand(0); 7248 SDValue CondRHS = N->getOperand(1); 7249 SDValue LHS = N->getOperand(2); 7250 SDValue RHS = N->getOperand(3); 7251 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 7252 7253 unsigned Opcode = 0; 7254 bool IsReversed; 7255 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 7256 IsReversed = false; // x CC y ? x : y 7257 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 7258 IsReversed = true ; // x CC y ? y : x 7259 } else { 7260 return SDValue(); 7261 } 7262 7263 bool IsUnordered; 7264 switch (CC) { 7265 default: break; 7266 case ISD::SETOLT: 7267 case ISD::SETOLE: 7268 case ISD::SETLT: 7269 case ISD::SETLE: 7270 case ISD::SETULT: 7271 case ISD::SETULE: 7272 // If LHS is NaN, an ordered comparison will be false and the result will 7273 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 7274 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 7275 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 7276 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 7277 break; 7278 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 7279 // will return -0, so vmin can only be used for unsafe math or if one of 7280 // the operands is known to be nonzero. 7281 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 7282 !UnsafeFPMath && 7283 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 7284 break; 7285 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 7286 break; 7287 7288 case ISD::SETOGT: 7289 case ISD::SETOGE: 7290 case ISD::SETGT: 7291 case ISD::SETGE: 7292 case ISD::SETUGT: 7293 case ISD::SETUGE: 7294 // If LHS is NaN, an ordered comparison will be false and the result will 7295 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 7296 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 7297 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 7298 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 7299 break; 7300 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 7301 // will return +0, so vmax can only be used for unsafe math or if one of 7302 // the operands is known to be nonzero. 7303 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 7304 !UnsafeFPMath && 7305 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 7306 break; 7307 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 7308 break; 7309 } 7310 7311 if (!Opcode) 7312 return SDValue(); 7313 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 7314 } 7315 7316 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 7317 SDValue 7318 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 7319 SDValue Cmp = N->getOperand(4); 7320 if (Cmp.getOpcode() != ARMISD::CMPZ) 7321 // Only looking at EQ and NE cases. 7322 return SDValue(); 7323 7324 EVT VT = N->getValueType(0); 7325 DebugLoc dl = N->getDebugLoc(); 7326 SDValue LHS = Cmp.getOperand(0); 7327 SDValue RHS = Cmp.getOperand(1); 7328 SDValue FalseVal = N->getOperand(0); 7329 SDValue TrueVal = N->getOperand(1); 7330 SDValue ARMcc = N->getOperand(2); 7331 ARMCC::CondCodes CC = 7332 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 7333 7334 // Simplify 7335 // mov r1, r0 7336 // cmp r1, x 7337 // mov r0, y 7338 // moveq r0, x 7339 // to 7340 // cmp r0, x 7341 // movne r0, y 7342 // 7343 // mov r1, r0 7344 // cmp r1, x 7345 // mov r0, x 7346 // movne r0, y 7347 // to 7348 // cmp r0, x 7349 // movne r0, y 7350 /// FIXME: Turn this into a target neutral optimization? 7351 SDValue Res; 7352 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 7353 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 7354 N->getOperand(3), Cmp); 7355 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 7356 SDValue ARMcc; 7357 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 7358 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 7359 N->getOperand(3), NewCmp); 7360 } 7361 7362 if (Res.getNode()) { 7363 APInt KnownZero, KnownOne; 7364 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 7365 DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); 7366 // Capture demanded bits information that would be otherwise lost. 7367 if (KnownZero == 0xfffffffe) 7368 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 7369 DAG.getValueType(MVT::i1)); 7370 else if (KnownZero == 0xffffff00) 7371 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 7372 DAG.getValueType(MVT::i8)); 7373 else if (KnownZero == 0xffff0000) 7374 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 7375 DAG.getValueType(MVT::i16)); 7376 } 7377 7378 return Res; 7379 } 7380 7381 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 7382 DAGCombinerInfo &DCI) const { 7383 switch (N->getOpcode()) { 7384 default: break; 7385 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 7386 case ISD::SUB: return PerformSUBCombine(N, DCI); 7387 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 7388 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 7389 case ISD::AND: return PerformANDCombine(N, DCI); 7390 case ARMISD::BFI: return PerformBFICombine(N, DCI); 7391 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 7392 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 7393 case ISD::STORE: return PerformSTORECombine(N, DCI); 7394 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 7395 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 7396 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 7397 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 7398 case ISD::FP_TO_SINT: 7399 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 7400 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 7401 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 7402 case ISD::SHL: 7403 case ISD::SRA: 7404 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 7405 case ISD::SIGN_EXTEND: 7406 case ISD::ZERO_EXTEND: 7407 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 7408 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 7409 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 7410 case ARMISD::VLD2DUP: 7411 case ARMISD::VLD3DUP: 7412 case ARMISD::VLD4DUP: 7413 return CombineBaseUpdate(N, DCI); 7414 case ISD::INTRINSIC_VOID: 7415 case ISD::INTRINSIC_W_CHAIN: 7416 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 7417 case Intrinsic::arm_neon_vld1: 7418 case Intrinsic::arm_neon_vld2: 7419 case Intrinsic::arm_neon_vld3: 7420 case Intrinsic::arm_neon_vld4: 7421 case Intrinsic::arm_neon_vld2lane: 7422 case Intrinsic::arm_neon_vld3lane: 7423 case Intrinsic::arm_neon_vld4lane: 7424 case Intrinsic::arm_neon_vst1: 7425 case Intrinsic::arm_neon_vst2: 7426 case Intrinsic::arm_neon_vst3: 7427 case Intrinsic::arm_neon_vst4: 7428 case Intrinsic::arm_neon_vst2lane: 7429 case Intrinsic::arm_neon_vst3lane: 7430 case Intrinsic::arm_neon_vst4lane: 7431 return CombineBaseUpdate(N, DCI); 7432 default: break; 7433 } 7434 break; 7435 } 7436 return SDValue(); 7437 } 7438 7439 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 7440 EVT VT) const { 7441 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 7442 } 7443 7444 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 7445 if (!Subtarget->allowsUnalignedMem()) 7446 return false; 7447 7448 switch (VT.getSimpleVT().SimpleTy) { 7449 default: 7450 return false; 7451 case MVT::i8: 7452 case MVT::i16: 7453 case MVT::i32: 7454 return true; 7455 // FIXME: VLD1 etc with standard alignment is legal. 7456 } 7457 } 7458 7459 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 7460 if (V < 0) 7461 return false; 7462 7463 unsigned Scale = 1; 7464 switch (VT.getSimpleVT().SimpleTy) { 7465 default: return false; 7466 case MVT::i1: 7467 case MVT::i8: 7468 // Scale == 1; 7469 break; 7470 case MVT::i16: 7471 // Scale == 2; 7472 Scale = 2; 7473 break; 7474 case MVT::i32: 7475 // Scale == 4; 7476 Scale = 4; 7477 break; 7478 } 7479 7480 if ((V & (Scale - 1)) != 0) 7481 return false; 7482 V /= Scale; 7483 return V == (V & ((1LL << 5) - 1)); 7484 } 7485 7486 static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 7487 const ARMSubtarget *Subtarget) { 7488 bool isNeg = false; 7489 if (V < 0) { 7490 isNeg = true; 7491 V = - V; 7492 } 7493 7494 switch (VT.getSimpleVT().SimpleTy) { 7495 default: return false; 7496 case MVT::i1: 7497 case MVT::i8: 7498 case MVT::i16: 7499 case MVT::i32: 7500 // + imm12 or - imm8 7501 if (isNeg) 7502 return V == (V & ((1LL << 8) - 1)); 7503 return V == (V & ((1LL << 12) - 1)); 7504 case MVT::f32: 7505 case MVT::f64: 7506 // Same as ARM mode. FIXME: NEON? 7507 if (!Subtarget->hasVFP2()) 7508 return false; 7509 if ((V & 3) != 0) 7510 return false; 7511 V >>= 2; 7512 return V == (V & ((1LL << 8) - 1)); 7513 } 7514 } 7515 7516 /// isLegalAddressImmediate - Return true if the integer value can be used 7517 /// as the offset of the target addressing mode for load / store of the 7518 /// given type. 7519 static bool isLegalAddressImmediate(int64_t V, EVT VT, 7520 const ARMSubtarget *Subtarget) { 7521 if (V == 0) 7522 return true; 7523 7524 if (!VT.isSimple()) 7525 return false; 7526 7527 if (Subtarget->isThumb1Only()) 7528 return isLegalT1AddressImmediate(V, VT); 7529 else if (Subtarget->isThumb2()) 7530 return isLegalT2AddressImmediate(V, VT, Subtarget); 7531 7532 // ARM mode. 7533 if (V < 0) 7534 V = - V; 7535 switch (VT.getSimpleVT().SimpleTy) { 7536 default: return false; 7537 case MVT::i1: 7538 case MVT::i8: 7539 case MVT::i32: 7540 // +- imm12 7541 return V == (V & ((1LL << 12) - 1)); 7542 case MVT::i16: 7543 // +- imm8 7544 return V == (V & ((1LL << 8) - 1)); 7545 case MVT::f32: 7546 case MVT::f64: 7547 if (!Subtarget->hasVFP2()) // FIXME: NEON? 7548 return false; 7549 if ((V & 3) != 0) 7550 return false; 7551 V >>= 2; 7552 return V == (V & ((1LL << 8) - 1)); 7553 } 7554 } 7555 7556 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 7557 EVT VT) const { 7558 int Scale = AM.Scale; 7559 if (Scale < 0) 7560 return false; 7561 7562 switch (VT.getSimpleVT().SimpleTy) { 7563 default: return false; 7564 case MVT::i1: 7565 case MVT::i8: 7566 case MVT::i16: 7567 case MVT::i32: 7568 if (Scale == 1) 7569 return true; 7570 // r + r << imm 7571 Scale = Scale & ~1; 7572 return Scale == 2 || Scale == 4 || Scale == 8; 7573 case MVT::i64: 7574 // r + r 7575 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 7576 return true; 7577 return false; 7578 case MVT::isVoid: 7579 // Note, we allow "void" uses (basically, uses that aren't loads or 7580 // stores), because arm allows folding a scale into many arithmetic 7581 // operations. This should be made more precise and revisited later. 7582 7583 // Allow r << imm, but the imm has to be a multiple of two. 7584 if (Scale & 1) return false; 7585 return isPowerOf2_32(Scale); 7586 } 7587 } 7588 7589 /// isLegalAddressingMode - Return true if the addressing mode represented 7590 /// by AM is legal for this target, for a load/store of the specified type. 7591 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 7592 Type *Ty) const { 7593 EVT VT = getValueType(Ty, true); 7594 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 7595 return false; 7596 7597 // Can never fold addr of global into load/store. 7598 if (AM.BaseGV) 7599 return false; 7600 7601 switch (AM.Scale) { 7602 case 0: // no scale reg, must be "r+i" or "r", or "i". 7603 break; 7604 case 1: 7605 if (Subtarget->isThumb1Only()) 7606 return false; 7607 // FALL THROUGH. 7608 default: 7609 // ARM doesn't support any R+R*scale+imm addr modes. 7610 if (AM.BaseOffs) 7611 return false; 7612 7613 if (!VT.isSimple()) 7614 return false; 7615 7616 if (Subtarget->isThumb2()) 7617 return isLegalT2ScaledAddressingMode(AM, VT); 7618 7619 int Scale = AM.Scale; 7620 switch (VT.getSimpleVT().SimpleTy) { 7621 default: return false; 7622 case MVT::i1: 7623 case MVT::i8: 7624 case MVT::i32: 7625 if (Scale < 0) Scale = -Scale; 7626 if (Scale == 1) 7627 return true; 7628 // r + r << imm 7629 return isPowerOf2_32(Scale & ~1); 7630 case MVT::i16: 7631 case MVT::i64: 7632 // r + r 7633 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 7634 return true; 7635 return false; 7636 7637 case MVT::isVoid: 7638 // Note, we allow "void" uses (basically, uses that aren't loads or 7639 // stores), because arm allows folding a scale into many arithmetic 7640 // operations. This should be made more precise and revisited later. 7641 7642 // Allow r << imm, but the imm has to be a multiple of two. 7643 if (Scale & 1) return false; 7644 return isPowerOf2_32(Scale); 7645 } 7646 break; 7647 } 7648 return true; 7649 } 7650 7651 /// isLegalICmpImmediate - Return true if the specified immediate is legal 7652 /// icmp immediate, that is the target has icmp instructions which can compare 7653 /// a register against the immediate without having to materialize the 7654 /// immediate into a register. 7655 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 7656 if (!Subtarget->isThumb()) 7657 return ARM_AM::getSOImmVal(Imm) != -1; 7658 if (Subtarget->isThumb2()) 7659 return ARM_AM::getT2SOImmVal(Imm) != -1; 7660 return Imm >= 0 && Imm <= 255; 7661 } 7662 7663 /// isLegalAddImmediate - Return true if the specified immediate is legal 7664 /// add immediate, that is the target has add instructions which can add 7665 /// a register with the immediate without having to materialize the 7666 /// immediate into a register. 7667 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 7668 return ARM_AM::getSOImmVal(Imm) != -1; 7669 } 7670 7671 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 7672 bool isSEXTLoad, SDValue &Base, 7673 SDValue &Offset, bool &isInc, 7674 SelectionDAG &DAG) { 7675 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 7676 return false; 7677 7678 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 7679 // AddressingMode 3 7680 Base = Ptr->getOperand(0); 7681 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 7682 int RHSC = (int)RHS->getZExtValue(); 7683 if (RHSC < 0 && RHSC > -256) { 7684 assert(Ptr->getOpcode() == ISD::ADD); 7685 isInc = false; 7686 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 7687 return true; 7688 } 7689 } 7690 isInc = (Ptr->getOpcode() == ISD::ADD); 7691 Offset = Ptr->getOperand(1); 7692 return true; 7693 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 7694 // AddressingMode 2 7695 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 7696 int RHSC = (int)RHS->getZExtValue(); 7697 if (RHSC < 0 && RHSC > -0x1000) { 7698 assert(Ptr->getOpcode() == ISD::ADD); 7699 isInc = false; 7700 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 7701 Base = Ptr->getOperand(0); 7702 return true; 7703 } 7704 } 7705 7706 if (Ptr->getOpcode() == ISD::ADD) { 7707 isInc = true; 7708 ARM_AM::ShiftOpc ShOpcVal= 7709 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 7710 if (ShOpcVal != ARM_AM::no_shift) { 7711 Base = Ptr->getOperand(1); 7712 Offset = Ptr->getOperand(0); 7713 } else { 7714 Base = Ptr->getOperand(0); 7715 Offset = Ptr->getOperand(1); 7716 } 7717 return true; 7718 } 7719 7720 isInc = (Ptr->getOpcode() == ISD::ADD); 7721 Base = Ptr->getOperand(0); 7722 Offset = Ptr->getOperand(1); 7723 return true; 7724 } 7725 7726 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 7727 return false; 7728 } 7729 7730 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 7731 bool isSEXTLoad, SDValue &Base, 7732 SDValue &Offset, bool &isInc, 7733 SelectionDAG &DAG) { 7734 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 7735 return false; 7736 7737 Base = Ptr->getOperand(0); 7738 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 7739 int RHSC = (int)RHS->getZExtValue(); 7740 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 7741 assert(Ptr->getOpcode() == ISD::ADD); 7742 isInc = false; 7743 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 7744 return true; 7745 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 7746 isInc = Ptr->getOpcode() == ISD::ADD; 7747 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 7748 return true; 7749 } 7750 } 7751 7752 return false; 7753 } 7754 7755 /// getPreIndexedAddressParts - returns true by value, base pointer and 7756 /// offset pointer and addressing mode by reference if the node's address 7757 /// can be legally represented as pre-indexed load / store address. 7758 bool 7759 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 7760 SDValue &Offset, 7761 ISD::MemIndexedMode &AM, 7762 SelectionDAG &DAG) const { 7763 if (Subtarget->isThumb1Only()) 7764 return false; 7765 7766 EVT VT; 7767 SDValue Ptr; 7768 bool isSEXTLoad = false; 7769 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 7770 Ptr = LD->getBasePtr(); 7771 VT = LD->getMemoryVT(); 7772 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 7773 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 7774 Ptr = ST->getBasePtr(); 7775 VT = ST->getMemoryVT(); 7776 } else 7777 return false; 7778 7779 bool isInc; 7780 bool isLegal = false; 7781 if (Subtarget->isThumb2()) 7782 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 7783 Offset, isInc, DAG); 7784 else 7785 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 7786 Offset, isInc, DAG); 7787 if (!isLegal) 7788 return false; 7789 7790 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 7791 return true; 7792 } 7793 7794 /// getPostIndexedAddressParts - returns true by value, base pointer and 7795 /// offset pointer and addressing mode by reference if this node can be 7796 /// combined with a load / store to form a post-indexed load / store. 7797 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 7798 SDValue &Base, 7799 SDValue &Offset, 7800 ISD::MemIndexedMode &AM, 7801 SelectionDAG &DAG) const { 7802 if (Subtarget->isThumb1Only()) 7803 return false; 7804 7805 EVT VT; 7806 SDValue Ptr; 7807 bool isSEXTLoad = false; 7808 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 7809 VT = LD->getMemoryVT(); 7810 Ptr = LD->getBasePtr(); 7811 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 7812 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 7813 VT = ST->getMemoryVT(); 7814 Ptr = ST->getBasePtr(); 7815 } else 7816 return false; 7817 7818 bool isInc; 7819 bool isLegal = false; 7820 if (Subtarget->isThumb2()) 7821 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 7822 isInc, DAG); 7823 else 7824 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 7825 isInc, DAG); 7826 if (!isLegal) 7827 return false; 7828 7829 if (Ptr != Base) { 7830 // Swap base ptr and offset to catch more post-index load / store when 7831 // it's legal. In Thumb2 mode, offset must be an immediate. 7832 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 7833 !Subtarget->isThumb2()) 7834 std::swap(Base, Offset); 7835 7836 // Post-indexed load / store update the base pointer. 7837 if (Ptr != Base) 7838 return false; 7839 } 7840 7841 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 7842 return true; 7843 } 7844 7845 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7846 const APInt &Mask, 7847 APInt &KnownZero, 7848 APInt &KnownOne, 7849 const SelectionDAG &DAG, 7850 unsigned Depth) const { 7851 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 7852 switch (Op.getOpcode()) { 7853 default: break; 7854 case ARMISD::CMOV: { 7855 // Bits are known zero/one if known on the LHS and RHS. 7856 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 7857 if (KnownZero == 0 && KnownOne == 0) return; 7858 7859 APInt KnownZeroRHS, KnownOneRHS; 7860 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 7861 KnownZeroRHS, KnownOneRHS, Depth+1); 7862 KnownZero &= KnownZeroRHS; 7863 KnownOne &= KnownOneRHS; 7864 return; 7865 } 7866 } 7867 } 7868 7869 //===----------------------------------------------------------------------===// 7870 // ARM Inline Assembly Support 7871 //===----------------------------------------------------------------------===// 7872 7873 bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 7874 // Looking for "rev" which is V6+. 7875 if (!Subtarget->hasV6Ops()) 7876 return false; 7877 7878 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 7879 std::string AsmStr = IA->getAsmString(); 7880 SmallVector<StringRef, 4> AsmPieces; 7881 SplitString(AsmStr, AsmPieces, ";\n"); 7882 7883 switch (AsmPieces.size()) { 7884 default: return false; 7885 case 1: 7886 AsmStr = AsmPieces[0]; 7887 AsmPieces.clear(); 7888 SplitString(AsmStr, AsmPieces, " \t,"); 7889 7890 // rev $0, $1 7891 if (AsmPieces.size() == 3 && 7892 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 7893 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 7894 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 7895 if (Ty && Ty->getBitWidth() == 32) 7896 return IntrinsicLowering::LowerToByteSwap(CI); 7897 } 7898 break; 7899 } 7900 7901 return false; 7902 } 7903 7904 /// getConstraintType - Given a constraint letter, return the type of 7905 /// constraint it is for this target. 7906 ARMTargetLowering::ConstraintType 7907 ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 7908 if (Constraint.size() == 1) { 7909 switch (Constraint[0]) { 7910 default: break; 7911 case 'l': return C_RegisterClass; 7912 case 'w': return C_RegisterClass; 7913 case 'h': return C_RegisterClass; 7914 case 'x': return C_RegisterClass; 7915 case 't': return C_RegisterClass; 7916 case 'j': return C_Other; // Constant for movw. 7917 // An address with a single base register. Due to the way we 7918 // currently handle addresses it is the same as an 'r' memory constraint. 7919 case 'Q': return C_Memory; 7920 } 7921 } else if (Constraint.size() == 2) { 7922 switch (Constraint[0]) { 7923 default: break; 7924 // All 'U+' constraints are addresses. 7925 case 'U': return C_Memory; 7926 } 7927 } 7928 return TargetLowering::getConstraintType(Constraint); 7929 } 7930 7931 /// Examine constraint type and operand type and determine a weight value. 7932 /// This object must already have been set up with the operand type 7933 /// and the current alternative constraint selected. 7934 TargetLowering::ConstraintWeight 7935 ARMTargetLowering::getSingleConstraintMatchWeight( 7936 AsmOperandInfo &info, const char *constraint) const { 7937 ConstraintWeight weight = CW_Invalid; 7938 Value *CallOperandVal = info.CallOperandVal; 7939 // If we don't have a value, we can't do a match, 7940 // but allow it at the lowest weight. 7941 if (CallOperandVal == NULL) 7942 return CW_Default; 7943 Type *type = CallOperandVal->getType(); 7944 // Look at the constraint type. 7945 switch (*constraint) { 7946 default: 7947 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 7948 break; 7949 case 'l': 7950 if (type->isIntegerTy()) { 7951 if (Subtarget->isThumb()) 7952 weight = CW_SpecificReg; 7953 else 7954 weight = CW_Register; 7955 } 7956 break; 7957 case 'w': 7958 if (type->isFloatingPointTy()) 7959 weight = CW_Register; 7960 break; 7961 } 7962 return weight; 7963 } 7964 7965 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 7966 RCPair 7967 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7968 EVT VT) const { 7969 if (Constraint.size() == 1) { 7970 // GCC ARM Constraint Letters 7971 switch (Constraint[0]) { 7972 case 'l': // Low regs or general regs. 7973 if (Subtarget->isThumb()) 7974 return RCPair(0U, ARM::tGPRRegisterClass); 7975 else 7976 return RCPair(0U, ARM::GPRRegisterClass); 7977 case 'h': // High regs or no regs. 7978 if (Subtarget->isThumb()) 7979 return RCPair(0U, ARM::hGPRRegisterClass); 7980 break; 7981 case 'r': 7982 return RCPair(0U, ARM::GPRRegisterClass); 7983 case 'w': 7984 if (VT == MVT::f32) 7985 return RCPair(0U, ARM::SPRRegisterClass); 7986 if (VT.getSizeInBits() == 64) 7987 return RCPair(0U, ARM::DPRRegisterClass); 7988 if (VT.getSizeInBits() == 128) 7989 return RCPair(0U, ARM::QPRRegisterClass); 7990 break; 7991 case 'x': 7992 if (VT == MVT::f32) 7993 return RCPair(0U, ARM::SPR_8RegisterClass); 7994 if (VT.getSizeInBits() == 64) 7995 return RCPair(0U, ARM::DPR_8RegisterClass); 7996 if (VT.getSizeInBits() == 128) 7997 return RCPair(0U, ARM::QPR_8RegisterClass); 7998 break; 7999 case 't': 8000 if (VT == MVT::f32) 8001 return RCPair(0U, ARM::SPRRegisterClass); 8002 break; 8003 } 8004 } 8005 if (StringRef("{cc}").equals_lower(Constraint)) 8006 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 8007 8008 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 8009 } 8010 8011 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 8012 /// vector. If it is invalid, don't add anything to Ops. 8013 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 8014 std::string &Constraint, 8015 std::vector<SDValue>&Ops, 8016 SelectionDAG &DAG) const { 8017 SDValue Result(0, 0); 8018 8019 // Currently only support length 1 constraints. 8020 if (Constraint.length() != 1) return; 8021 8022 char ConstraintLetter = Constraint[0]; 8023 switch (ConstraintLetter) { 8024 default: break; 8025 case 'j': 8026 case 'I': case 'J': case 'K': case 'L': 8027 case 'M': case 'N': case 'O': 8028 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 8029 if (!C) 8030 return; 8031 8032 int64_t CVal64 = C->getSExtValue(); 8033 int CVal = (int) CVal64; 8034 // None of these constraints allow values larger than 32 bits. Check 8035 // that the value fits in an int. 8036 if (CVal != CVal64) 8037 return; 8038 8039 switch (ConstraintLetter) { 8040 case 'j': 8041 // Constant suitable for movw, must be between 0 and 8042 // 65535. 8043 if (Subtarget->hasV6T2Ops()) 8044 if (CVal >= 0 && CVal <= 65535) 8045 break; 8046 return; 8047 case 'I': 8048 if (Subtarget->isThumb1Only()) { 8049 // This must be a constant between 0 and 255, for ADD 8050 // immediates. 8051 if (CVal >= 0 && CVal <= 255) 8052 break; 8053 } else if (Subtarget->isThumb2()) { 8054 // A constant that can be used as an immediate value in a 8055 // data-processing instruction. 8056 if (ARM_AM::getT2SOImmVal(CVal) != -1) 8057 break; 8058 } else { 8059 // A constant that can be used as an immediate value in a 8060 // data-processing instruction. 8061 if (ARM_AM::getSOImmVal(CVal) != -1) 8062 break; 8063 } 8064 return; 8065 8066 case 'J': 8067 if (Subtarget->isThumb()) { // FIXME thumb2 8068 // This must be a constant between -255 and -1, for negated ADD 8069 // immediates. This can be used in GCC with an "n" modifier that 8070 // prints the negated value, for use with SUB instructions. It is 8071 // not useful otherwise but is implemented for compatibility. 8072 if (CVal >= -255 && CVal <= -1) 8073 break; 8074 } else { 8075 // This must be a constant between -4095 and 4095. It is not clear 8076 // what this constraint is intended for. Implemented for 8077 // compatibility with GCC. 8078 if (CVal >= -4095 && CVal <= 4095) 8079 break; 8080 } 8081 return; 8082 8083 case 'K': 8084 if (Subtarget->isThumb1Only()) { 8085 // A 32-bit value where only one byte has a nonzero value. Exclude 8086 // zero to match GCC. This constraint is used by GCC internally for 8087 // constants that can be loaded with a move/shift combination. 8088 // It is not useful otherwise but is implemented for compatibility. 8089 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 8090 break; 8091 } else if (Subtarget->isThumb2()) { 8092 // A constant whose bitwise inverse can be used as an immediate 8093 // value in a data-processing instruction. This can be used in GCC 8094 // with a "B" modifier that prints the inverted value, for use with 8095 // BIC and MVN instructions. It is not useful otherwise but is 8096 // implemented for compatibility. 8097 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 8098 break; 8099 } else { 8100 // A constant whose bitwise inverse can be used as an immediate 8101 // value in a data-processing instruction. This can be used in GCC 8102 // with a "B" modifier that prints the inverted value, for use with 8103 // BIC and MVN instructions. It is not useful otherwise but is 8104 // implemented for compatibility. 8105 if (ARM_AM::getSOImmVal(~CVal) != -1) 8106 break; 8107 } 8108 return; 8109 8110 case 'L': 8111 if (Subtarget->isThumb1Only()) { 8112 // This must be a constant between -7 and 7, 8113 // for 3-operand ADD/SUB immediate instructions. 8114 if (CVal >= -7 && CVal < 7) 8115 break; 8116 } else if (Subtarget->isThumb2()) { 8117 // A constant whose negation can be used as an immediate value in a 8118 // data-processing instruction. This can be used in GCC with an "n" 8119 // modifier that prints the negated value, for use with SUB 8120 // instructions. It is not useful otherwise but is implemented for 8121 // compatibility. 8122 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 8123 break; 8124 } else { 8125 // A constant whose negation can be used as an immediate value in a 8126 // data-processing instruction. This can be used in GCC with an "n" 8127 // modifier that prints the negated value, for use with SUB 8128 // instructions. It is not useful otherwise but is implemented for 8129 // compatibility. 8130 if (ARM_AM::getSOImmVal(-CVal) != -1) 8131 break; 8132 } 8133 return; 8134 8135 case 'M': 8136 if (Subtarget->isThumb()) { // FIXME thumb2 8137 // This must be a multiple of 4 between 0 and 1020, for 8138 // ADD sp + immediate. 8139 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 8140 break; 8141 } else { 8142 // A power of two or a constant between 0 and 32. This is used in 8143 // GCC for the shift amount on shifted register operands, but it is 8144 // useful in general for any shift amounts. 8145 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 8146 break; 8147 } 8148 return; 8149 8150 case 'N': 8151 if (Subtarget->isThumb()) { // FIXME thumb2 8152 // This must be a constant between 0 and 31, for shift amounts. 8153 if (CVal >= 0 && CVal <= 31) 8154 break; 8155 } 8156 return; 8157 8158 case 'O': 8159 if (Subtarget->isThumb()) { // FIXME thumb2 8160 // This must be a multiple of 4 between -508 and 508, for 8161 // ADD/SUB sp = sp + immediate. 8162 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 8163 break; 8164 } 8165 return; 8166 } 8167 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 8168 break; 8169 } 8170 8171 if (Result.getNode()) { 8172 Ops.push_back(Result); 8173 return; 8174 } 8175 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 8176 } 8177 8178 bool 8179 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 8180 // The ARM target isn't yet aware of offsets. 8181 return false; 8182 } 8183 8184 bool ARM::isBitFieldInvertedMask(unsigned v) { 8185 if (v == 0xffffffff) 8186 return 0; 8187 // there can be 1's on either or both "outsides", all the "inside" 8188 // bits must be 0's 8189 unsigned int lsb = 0, msb = 31; 8190 while (v & (1 << msb)) --msb; 8191 while (v & (1 << lsb)) ++lsb; 8192 for (unsigned int i = lsb; i <= msb; ++i) { 8193 if (v & (1 << i)) 8194 return 0; 8195 } 8196 return 1; 8197 } 8198 8199 /// isFPImmLegal - Returns true if the target can instruction select the 8200 /// specified FP immediate natively. If false, the legalizer will 8201 /// materialize the FP immediate as a load from a constant pool. 8202 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 8203 if (!Subtarget->hasVFP3()) 8204 return false; 8205 if (VT == MVT::f32) 8206 return ARM_AM::getFP32Imm(Imm) != -1; 8207 if (VT == MVT::f64) 8208 return ARM_AM::getFP64Imm(Imm) != -1; 8209 return false; 8210 } 8211 8212 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 8213 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 8214 /// specified in the intrinsic calls. 8215 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 8216 const CallInst &I, 8217 unsigned Intrinsic) const { 8218 switch (Intrinsic) { 8219 case Intrinsic::arm_neon_vld1: 8220 case Intrinsic::arm_neon_vld2: 8221 case Intrinsic::arm_neon_vld3: 8222 case Intrinsic::arm_neon_vld4: 8223 case Intrinsic::arm_neon_vld2lane: 8224 case Intrinsic::arm_neon_vld3lane: 8225 case Intrinsic::arm_neon_vld4lane: { 8226 Info.opc = ISD::INTRINSIC_W_CHAIN; 8227 // Conservatively set memVT to the entire set of vectors loaded. 8228 uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; 8229 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 8230 Info.ptrVal = I.getArgOperand(0); 8231 Info.offset = 0; 8232 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 8233 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 8234 Info.vol = false; // volatile loads with NEON intrinsics not supported 8235 Info.readMem = true; 8236 Info.writeMem = false; 8237 return true; 8238 } 8239 case Intrinsic::arm_neon_vst1: 8240 case Intrinsic::arm_neon_vst2: 8241 case Intrinsic::arm_neon_vst3: 8242 case Intrinsic::arm_neon_vst4: 8243 case Intrinsic::arm_neon_vst2lane: 8244 case Intrinsic::arm_neon_vst3lane: 8245 case Intrinsic::arm_neon_vst4lane: { 8246 Info.opc = ISD::INTRINSIC_VOID; 8247 // Conservatively set memVT to the entire set of vectors stored. 8248 unsigned NumElts = 0; 8249 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 8250 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 8251 if (!ArgTy->isVectorTy()) 8252 break; 8253 NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; 8254 } 8255 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 8256 Info.ptrVal = I.getArgOperand(0); 8257 Info.offset = 0; 8258 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 8259 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 8260 Info.vol = false; // volatile stores with NEON intrinsics not supported 8261 Info.readMem = false; 8262 Info.writeMem = true; 8263 return true; 8264 } 8265 case Intrinsic::arm_strexd: { 8266 Info.opc = ISD::INTRINSIC_W_CHAIN; 8267 Info.memVT = MVT::i64; 8268 Info.ptrVal = I.getArgOperand(2); 8269 Info.offset = 0; 8270 Info.align = 8; 8271 Info.vol = true; 8272 Info.readMem = false; 8273 Info.writeMem = true; 8274 return true; 8275 } 8276 case Intrinsic::arm_ldrexd: { 8277 Info.opc = ISD::INTRINSIC_W_CHAIN; 8278 Info.memVT = MVT::i64; 8279 Info.ptrVal = I.getArgOperand(0); 8280 Info.offset = 0; 8281 Info.align = 8; 8282 Info.vol = true; 8283 Info.readMem = true; 8284 Info.writeMem = false; 8285 return true; 8286 } 8287 default: 8288 break; 8289 } 8290 8291 return false; 8292 } 8293